Encoder: SVC encoding support added

Added support for encoding 'Scalable Baseline' profile, corresponding to
profile_idc of 83 in 'Rec. ITU-T H.264 (11/2007)'.

Bug: 248891908
Test: svcenc -c enc.cfg
Change-Id: Ib12ca4c4a8c0e674738ae2af01558a08cefe0929
This commit is contained in:
Ashwin Natesan 2023-01-30 14:16:47 +05:30 committed by Harish Mahendrakar
parent da77ac1a5f
commit bb0f31cb6b
132 changed files with 85450 additions and 154 deletions

View file

@ -35,6 +35,17 @@ cc_library_headers {
min_sdk_version: "29",
}
cc_library_headers {
name: "libsvcenc_headers",
export_include_dirs: [
"common",
"common/svc",
"encoder",
"encoder/svc"
],
min_sdk_version: "29",
}
cc_library_headers {
name: "libavcenc_headers",
export_include_dirs: [
@ -44,6 +55,106 @@ cc_library_headers {
min_sdk_version: "29",
}
cc_defaults {
name: "libavc_enc_defaults",
vendor_available: true,
host_supported: true,
shared_libs: [
"liblog",
"libcutils",
],
cflags: [
"-DNDEBUG",
"-UHP_PL",
"-DN_MB_ENABLE",
"-fPIC",
"-O3",
"-Wall",
"-Werror",
"-Wno-error=constant-conversion",
],
arch: {
arm: {
local_include_dirs: [
"common/arm",
"encoder/arm",
],
cflags: [
"-DARM",
// These will be overriden by armv7_a_neon
"-DDISABLE_NEON",
],
neon: {
cflags: [
"-UDISABLE_NEON",
],
},
},
arm64: {
cflags: [
"-DARMV8",
"-DARM",
],
local_include_dirs: [
"common/arm",
"common/armv8",
"encoder/arm",
"encoder/armv8",
],
},
riscv64: {
local_include_dirs: [
"common/riscv",
"encoder/riscv",
],
},
x86: {
cflags: [
"-DX86",
"-msse4.2",
],
local_include_dirs: [
"encoder/x86",
"common/x86",
],
},
x86_64: {
cflags: [
"-DX86",
"-msse4.2",
],
local_include_dirs: [
"encoder/x86",
"common/x86",
],
},
},
sanitize: {
integer_overflow: true,
misc_undefined: ["bounds"],
cfi: true,
config: {
cfi_assembly_support: true,
},
blocklist: "libavc_blocklist.txt",
},
apex_available: [
"//apex_available:platform", //due to libstagefright_soft_avcenc
"com.android.media.swcodec",
],
min_sdk_version: "29",
}
cc_defaults {
name: "libavc_mvc_dec_defaults",
cflags: [
@ -349,24 +460,7 @@ cc_library_static {
cc_library_static {
name: "libavcenc",
vendor_available: true,
host_supported: true,
shared_libs: [
"liblog",
"libcutils",
],
cflags: [
"-DNDEBUG",
"-UHP_PL",
"-DN_MB_ENABLE",
"-fPIC",
"-O3",
"-Wall",
"-Werror",
"-Wno-error=constant-conversion",
],
defaults: ["libavc_enc_defaults"],
export_include_dirs: [
"common",
@ -435,23 +529,11 @@ cc_library_static {
arch: {
arm: {
local_include_dirs: [
"encoder/arm",
"common/arm",
],
srcs: [
"encoder/arm/ih264e_function_selector.c",
"common/arm/ih264_arm_memory_barrier.s",
],
cflags: [
"-DARM",
// This will be overriden by armv7_a_neon
"-DDISABLE_NEON",
],
neon: {
srcs: [
"encoder/arm/ih264e_function_selector_a9q.c",
@ -479,25 +561,10 @@ cc_library_static {
"encoder/arm/ih264e_fmt_conv.s",
"encoder/arm/ime_distortion_metrics_a9q.s",
],
cflags: [
"-UDISABLE_NEON",
],
},
},
arm64: {
cflags: [
"-DARMV8",
"-DARM",
],
local_include_dirs: [
"encoder/arm",
"encoder/armv8",
"common/armv8",
],
srcs: [
"encoder/arm/ih264e_function_selector.c",
"encoder/arm/ih264e_function_selector_av8.c",
@ -525,27 +592,12 @@ cc_library_static {
},
riscv64: {
local_include_dirs: [
"common/riscv",
"encoder/riscv",
],
srcs: [
"encoder/riscv/ih264e_function_selector.c",
],
},
x86: {
cflags: [
"-DX86",
"-msse4.2",
],
local_include_dirs: [
"encoder/x86",
"common/x86",
],
srcs: [
"encoder/x86/ih264e_function_selector.c",
"encoder/x86/ih264e_function_selector_sse42.c",
@ -571,16 +623,6 @@ cc_library_static {
},
x86_64: {
cflags: [
"-DX86",
"-msse4.2",
],
local_include_dirs: [
"encoder/x86",
"common/x86",
],
srcs: [
"encoder/x86/ih264e_function_selector.c",
"encoder/x86/ih264e_function_selector_sse42.c",
@ -605,21 +647,161 @@ cc_library_static {
],
},
},
}
sanitize: {
integer_overflow: true,
misc_undefined: ["bounds"],
cfi: true,
config: {
cfi_assembly_support: true,
},
blocklist: "libavc_blocklist.txt",
},
apex_available: [
"//apex_available:platform", //due to libstagefright_soft_avcenc
"com.android.media.swcodec",
cc_library_static {
name: "libsvcenc",
defaults: ["libavc_enc_defaults"],
whole_static_libs: [
"libavcenc",
],
min_sdk_version: "29",
export_include_dirs: [
"common",
"common/svc",
"encoder",
"encoder/svc",
],
srcs: [
"common/svc/isvc_cabac_tables.c",
"common/svc/isvc_common_tables.c",
"common/svc/isvc_intra_resample.c",
"common/svc/isvc_iquant_itrans_recon.c",
"common/svc/isvc_mem_fns.c",
"common/svc/isvc_resi_trans_quant.c",
"encoder/svc/irc_svc_rate_control_api.c",
"encoder/svc/isvce_api.c",
"encoder/svc/isvce_cabac.c",
"encoder/svc/isvce_cabac_encode.c",
"encoder/svc/isvce_cabac_init.c",
"encoder/svc/isvce_cavlc.c",
"encoder/svc/isvce_core_coding.c",
"encoder/svc/isvce_deblk.c",
"encoder/svc/isvce_downscaler.c",
"encoder/svc/isvce_encode.c",
"encoder/svc/isvce_encode_header.c",
"encoder/svc/isvce_fmt_conv.c",
"encoder/svc/isvce_function_selector_generic.c",
"encoder/svc/isvce_globals.c",
"encoder/svc/isvce_ibl_eval.c",
"encoder/svc/isvce_ilp_mv.c",
"encoder/svc/isvce_intra_modes_eval.c",
"encoder/svc/isvce_mc.c",
"encoder/svc/isvce_me.c",
"encoder/svc/isvce_mode_stat_visualiser.c",
"encoder/svc/isvce_nalu_stat_aggregator.c",
"encoder/svc/isvce_process.c",
"encoder/svc/isvce_rate_control.c",
"encoder/svc/isvce_rc_mem_interface.c",
"encoder/svc/isvce_rc_utils.c",
"encoder/svc/isvce_residual_pred.c",
"encoder/svc/isvce_sub_pic_rc.c",
"encoder/svc/isvce_utils.c",
],
arch: {
arm: {
local_include_dirs: [
"common/arm/svc",
"encoder/arm/svc",
],
srcs: [
"encoder/arm/svc/isvce_function_selector.c",
],
neon: {
srcs: [
"encoder/arm/svc/isvce_function_selector_a9q.c",
"common/arm/svc/isvc_intra_sampling_neon.c",
"common/arm/svc/isvc_iquant_itrans_recon_neon.c",
"common/arm/svc/isvc_mem_fns_neon.c",
"common/arm/svc/isvc_resi_trans_quant_neon.c",
"encoder/arm/svc/isvce_downscaler_neon.c",
"encoder/arm/svc/isvce_rc_utils_neon.c",
"encoder/arm/svc/isvce_residual_pred_neon.c",
],
},
},
arm64: {
local_include_dirs: [
"common/arm/svc",
"encoder/arm/svc",
],
srcs: [
"encoder/arm/svc/isvce_function_selector.c",
"encoder/arm/svc/isvce_function_selector_av8.c",
"common/arm/svc/isvc_intra_sampling_neon.c",
"common/arm/svc/isvc_iquant_itrans_recon_neon.c",
"common/arm/svc/isvc_mem_fns_neon.c",
"common/arm/svc/isvc_resi_trans_quant_neon.c",
"encoder/arm/svc/isvce_downscaler_neon.c",
"encoder/arm/svc/isvce_rc_utils_neon.c",
"encoder/arm/svc/isvce_residual_pred_neon.c",
],
},
riscv64: {
local_include_dirs: [
"encoder/riscv/svc",
],
srcs: [
"encoder/riscv/svc/isvce_function_selector.c",
],
},
x86: {
local_include_dirs: [
"encoder/x86/svc",
"common/x86/svc",
],
srcs: [
"common/x86/svc/isvc_intra_resample_sse42.c",
"common/x86/svc/isvc_iquant_itrans_recon_dc_ssse3.c",
"common/x86/svc/isvc_iquant_itrans_recon_sse42.c",
"common/x86/svc/isvc_iquant_itrans_recon_ssse3.c",
"common/x86/svc/isvc_mem_fns_sse42.c",
"common/x86/svc/isvc_mem_fns_ssse3.c",
"common/x86/svc/isvc_padding_ssse3.c",
"common/x86/svc/isvc_resi_trans_quant_sse42.c",
"encoder/x86/svc/isvce_downscaler_sse42.c",
"encoder/x86/svc/isvce_function_selector.c",
"encoder/x86/svc/isvce_function_selector_sse42.c",
"encoder/x86/svc/isvce_function_selector_ssse3.c",
"encoder/x86/svc/isvce_rc_utils_sse42.c",
"encoder/x86/svc/isvce_residual_pred_sse42.c",
],
},
x86_64: {
local_include_dirs: [
"encoder/x86/svc",
"common/x86/svc",
],
srcs: [
"common/x86/svc/isvc_intra_resample_sse42.c",
"common/x86/svc/isvc_iquant_itrans_recon_dc_ssse3.c",
"common/x86/svc/isvc_iquant_itrans_recon_sse42.c",
"common/x86/svc/isvc_iquant_itrans_recon_ssse3.c",
"common/x86/svc/isvc_mem_fns_sse42.c",
"common/x86/svc/isvc_mem_fns_ssse3.c",
"common/x86/svc/isvc_padding_ssse3.c",
"common/x86/svc/isvc_resi_trans_quant_sse42.c",
"encoder/x86/svc/isvce_downscaler_sse42.c",
"encoder/x86/svc/isvce_function_selector.c",
"encoder/x86/svc/isvce_function_selector_sse42.c",
"encoder/x86/svc/isvce_function_selector_ssse3.c",
"encoder/x86/svc/isvce_rc_utils_sse42.c",
"encoder/x86/svc/isvce_residual_pred_sse42.c",
],
},
},
}
subdirs = ["test"]

View file

@ -4,6 +4,8 @@ enable_language(ASM)
set(AVC_ROOT "${CMAKE_CURRENT_SOURCE_DIR}")
set(AVC_CONFIG_DIR "${CMAKE_CURRENT_BINARY_DIR}")
option(ENABLE_MVC "Enables svcenc and svcdec builds" OFF)
option(ENABLE_SVC "Enables svcenc and svcdec builds" OFF)
if("${AVC_ROOT}" STREQUAL "${AVC_CONFIG_DIR}")
message(
@ -36,13 +38,29 @@ libavc_set_link_libraries()
include("${AVC_ROOT}/common/common.cmake")
include("${AVC_ROOT}/decoder/libavcdec.cmake")
include("${AVC_ROOT}/decoder/mvc/libmvcdec.cmake")
if (${ENABLE_MVC})
include("${AVC_ROOT}/decoder/mvc/libmvcdec.cmake")
endif()
include("${AVC_ROOT}/encoder/libavcenc.cmake")
if (${ENABLE_SVC})
include("${AVC_ROOT}/common/svccommon.cmake")
include("${AVC_ROOT}/encoder/svc/libsvcenc.cmake")
endif()
include("${AVC_ROOT}/test/decoder/avcdec.cmake")
include("${AVC_ROOT}/test/mvcdec/mvcdec.cmake")
if (${ENABLE_MVC})
include("${AVC_ROOT}/test/mvcdec/mvcdec.cmake")
endif()
include("${AVC_ROOT}/test/encoder/avcenc.cmake")
if (${ENABLE_SVC})
include("${AVC_ROOT}/test/svcenc/svcenc.cmake")
endif()
include("${AVC_ROOT}/fuzzer/avc_dec_fuzzer.cmake")
include("${AVC_ROOT}/fuzzer/mvc_dec_fuzzer.cmake")
if (${ENABLE_MVC})
include("${AVC_ROOT}/fuzzer/mvc_dec_fuzzer.cmake")
endif()
include("${AVC_ROOT}/fuzzer/avc_enc_fuzzer.cmake")
if (${ENABLE_SVC})
include("${AVC_ROOT}/fuzzer/svc_enc_fuzzer.cmake")
endif()

View file

@ -0,0 +1,485 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
* *******************************************************************************
* * @file
* isvc_intra_sampling_neon.c
*
* @brief
* neon variants of intra sampling functions used by IBL mode
*
* *******************************************************************************
*/
#include <arm_neon.h>
#include <string.h>
#include "ih264_typedefs.h"
#include "isvc_intra_resample.h"
void isvc_interpolate_base_luma_dyadic_neon(UWORD8 *pu1_inp_buf, WORD16 *pi2_tmp_filt_buf,
UWORD8 *pu1_out_buf, WORD32 i4_out_stride)
{
WORD32 i4_y;
WORD16 i4_coeff_0, i4_coeff_1, i4_coeff_2, i4_coeff_3;
WORD32 i4_filt_stride, i4_src_stride;
UWORD8 *pu1_inp = pu1_inp_buf;
UWORD8 *pu1_out = pu1_out_buf;
WORD16 *pi2_tmp = pi2_tmp_filt_buf;
int16x4_t i4_rslt_vert_16x4_1, i4_rslt_vert_16x4_2;
uint8x8_t i4_samp_vert_8x8_0, i4_samp_vert_8x8_1, i4_samp_vert_8x8_2, i4_samp_vert_8x8_3;
int16x8_t i4_rslt_vert_16x8_0, i4_rslt_vert_16x8_2;
/* Horizontal interpolation */
int32x4_t i4_rslt_horz_r0_1, i4_rslt_horz_r1_1, i4_rslt_horz_r0_2, i4_rslt_horz_r1_2;
uint16x4_t i4_rslt_horz_r0_1_tmp, i4_rslt_horz_r1_1_tmp, i4_rslt_horz_r0_2_tmp,
i4_rslt_horz_r1_2_tmp;
uint16x8_t rslt_16x8_t_1, rslt_16x8_t_2;
int16x4_t i4_samp_horz_16x4_0, i4_samp_horz_16x4_1, i4_samp_horz_16x4_2, i4_samp_horz_16x4_3,
i4_samp_horz_16x4_4;
int16x4_t i4_samp_horz_16x4_5, i4_samp_horz_16x4_6, i4_samp_horz_16x4_7, i4_samp_horz_16x4_8;
int16_t i4_coeff_c0 = -3;
int16_t i4_coeff_c1 = 28;
int16_t i4_coeff_c2 = 8;
int16_t i4_coeff_c3 = -1;
int32x4x2_t i4_rslt_horz_r0_tmp32, i4_rslt_horz_r1_tmp32;
int32x4_t const_512_32x4 = vdupq_n_s32(512);
/* Filter coefficient values for phase 4 */
i4_coeff_0 = -3;
i4_coeff_1 = 28;
i4_coeff_2 = 8;
i4_coeff_3 = -1;
i4_filt_stride = 12;
i4_src_stride = DYADIC_REF_W_Y;
/* Vertical interpolation */
{
/* First 64 bits*/
i4_samp_vert_8x8_0 = vld1_u8((const UWORD8 *) pu1_inp);
pu1_inp += i4_src_stride;
i4_samp_vert_8x8_1 = vld1_u8((const UWORD8 *) pu1_inp);
pu1_inp += i4_src_stride;
i4_samp_vert_8x8_2 = vld1_u8((const UWORD8 *) pu1_inp);
pu1_inp += i4_src_stride;
i4_samp_vert_8x8_3 = vld1_u8((const UWORD8 *) pu1_inp);
pu1_inp += i4_src_stride;
i4_rslt_vert_16x8_0 =
vmulq_n_s16(vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_0)), i4_coeff_3);
i4_rslt_vert_16x8_0 = vmlaq_n_s16(
i4_rslt_vert_16x8_0, vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_1)), i4_coeff_2);
i4_rslt_vert_16x8_0 = vmlaq_n_s16(
i4_rslt_vert_16x8_0, vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_2)), i4_coeff_1);
i4_rslt_vert_16x8_0 = vmlaq_n_s16(
i4_rslt_vert_16x8_0, vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_3)), i4_coeff_0);
vst1q_s16(pi2_tmp, i4_rslt_vert_16x8_0);
pi2_tmp += i4_filt_stride;
for(i4_y = 1; i4_y < 15; i4_y += 2)
{
i4_samp_vert_8x8_0 = i4_samp_vert_8x8_1;
i4_samp_vert_8x8_1 = i4_samp_vert_8x8_2;
i4_samp_vert_8x8_2 = i4_samp_vert_8x8_3;
i4_samp_vert_8x8_3 = vld1_u8((const UWORD8 *) pu1_inp);
i4_rslt_vert_16x8_0 =
vmulq_n_s16(vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_0)), i4_coeff_0);
i4_rslt_vert_16x8_0 =
vmlaq_n_s16(i4_rslt_vert_16x8_0,
vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_1)), i4_coeff_1);
i4_rslt_vert_16x8_0 =
vmlaq_n_s16(i4_rslt_vert_16x8_0,
vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_2)), i4_coeff_2);
i4_rslt_vert_16x8_0 =
vmlaq_n_s16(i4_rslt_vert_16x8_0,
vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_3)), i4_coeff_3);
i4_rslt_vert_16x8_2 =
vmulq_n_s16(vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_0)), i4_coeff_3);
i4_rslt_vert_16x8_2 =
vmlaq_n_s16(i4_rslt_vert_16x8_2,
vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_1)), i4_coeff_2);
i4_rslt_vert_16x8_2 =
vmlaq_n_s16(i4_rslt_vert_16x8_2,
vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_2)), i4_coeff_1);
i4_rslt_vert_16x8_2 =
vmlaq_n_s16(i4_rslt_vert_16x8_2,
vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_3)), i4_coeff_0);
vst1q_s16(pi2_tmp, (i4_rslt_vert_16x8_0));
pi2_tmp += i4_filt_stride;
vst1q_s16(pi2_tmp, (i4_rslt_vert_16x8_2));
pi2_tmp += i4_filt_stride;
pu1_inp += i4_src_stride;
}
/* y = 15, y_phase = 4 */
i4_samp_vert_8x8_0 = i4_samp_vert_8x8_1;
i4_samp_vert_8x8_1 = i4_samp_vert_8x8_2;
i4_samp_vert_8x8_2 = i4_samp_vert_8x8_3;
i4_samp_vert_8x8_3 = vld1_u8((const UWORD8 *) pu1_inp);
i4_rslt_vert_16x8_0 =
vmulq_n_s16(vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_0)), i4_coeff_0);
i4_rslt_vert_16x8_0 = vmlaq_n_s16(
i4_rslt_vert_16x8_0, vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_1)), i4_coeff_1);
i4_rslt_vert_16x8_0 = vmlaq_n_s16(
i4_rslt_vert_16x8_0, vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_2)), i4_coeff_2);
i4_rslt_vert_16x8_0 = vmlaq_n_s16(
i4_rslt_vert_16x8_0, vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_3)), i4_coeff_3);
vst1q_s16(pi2_tmp, (i4_rslt_vert_16x8_0));
}
{
/* Remaining 32 bits */
pu1_inp = pu1_inp_buf + 8;
pi2_tmp = pi2_tmp_filt_buf + 8;
i4_samp_vert_8x8_0 = vld1_u8((const UWORD8 *) pu1_inp);
pu1_inp += i4_src_stride;
i4_samp_vert_8x8_1 = vld1_u8((const UWORD8 *) pu1_inp);
pu1_inp += i4_src_stride;
i4_samp_vert_8x8_2 = vld1_u8((const UWORD8 *) pu1_inp);
pu1_inp += i4_src_stride;
i4_samp_vert_8x8_3 = vld1_u8((const UWORD8 *) pu1_inp);
pu1_inp += i4_src_stride;
i4_rslt_vert_16x4_1 = vmul_n_s16(
vreinterpret_s16_u16(vget_low_u16(vmovl_u8(i4_samp_vert_8x8_0))), i4_coeff_3);
i4_rslt_vert_16x4_1 = vmla_n_s16(
i4_rslt_vert_16x4_1, vreinterpret_s16_u16(vget_low_u16(vmovl_u8(i4_samp_vert_8x8_1))),
i4_coeff_2);
i4_rslt_vert_16x4_1 = vmla_n_s16(
i4_rslt_vert_16x4_1, vreinterpret_s16_u16(vget_low_u16(vmovl_u8(i4_samp_vert_8x8_2))),
i4_coeff_1);
i4_rslt_vert_16x4_1 = vmla_n_s16(
i4_rslt_vert_16x4_1, vreinterpret_s16_u16(vget_low_u16(vmovl_u8(i4_samp_vert_8x8_3))),
i4_coeff_0);
vst1_s16(pi2_tmp, (i4_rslt_vert_16x4_1));
pi2_tmp += i4_filt_stride;
for(i4_y = 1; i4_y < 15; i4_y += 2)
{
i4_samp_vert_8x8_0 = i4_samp_vert_8x8_1;
i4_samp_vert_8x8_1 = i4_samp_vert_8x8_2;
i4_samp_vert_8x8_2 = i4_samp_vert_8x8_3;
i4_samp_vert_8x8_3 = vld1_u8((const UWORD8 *) pu1_inp);
i4_rslt_vert_16x4_1 = vmul_n_s16(
vreinterpret_s16_u16(vget_low_u16(vmovl_u8(i4_samp_vert_8x8_0))), i4_coeff_0);
i4_rslt_vert_16x4_1 = vmla_n_s16(
i4_rslt_vert_16x4_1,
vreinterpret_s16_u16(vget_low_u16(vmovl_u8(i4_samp_vert_8x8_1))), i4_coeff_1);
i4_rslt_vert_16x4_1 = vmla_n_s16(
i4_rslt_vert_16x4_1,
vreinterpret_s16_u16(vget_low_u16(vmovl_u8(i4_samp_vert_8x8_2))), i4_coeff_2);
i4_rslt_vert_16x4_1 = vmla_n_s16(
i4_rslt_vert_16x4_1,
vreinterpret_s16_u16(vget_low_u16(vmovl_u8(i4_samp_vert_8x8_3))), i4_coeff_3);
i4_rslt_vert_16x4_2 = vmul_n_s16(
vreinterpret_s16_u16(vget_low_u16(vmovl_u8(i4_samp_vert_8x8_0))), i4_coeff_3);
i4_rslt_vert_16x4_2 = vmla_n_s16(
i4_rslt_vert_16x4_2,
vreinterpret_s16_u16(vget_low_u16(vmovl_u8(i4_samp_vert_8x8_1))), i4_coeff_2);
i4_rslt_vert_16x4_2 = vmla_n_s16(
i4_rslt_vert_16x4_2,
vreinterpret_s16_u16(vget_low_u16(vmovl_u8(i4_samp_vert_8x8_2))), i4_coeff_1);
i4_rslt_vert_16x4_2 = vmla_n_s16(
i4_rslt_vert_16x4_2,
vreinterpret_s16_u16(vget_low_u16(vmovl_u8(i4_samp_vert_8x8_3))), i4_coeff_0);
vst1_s16(pi2_tmp, (i4_rslt_vert_16x4_1));
pi2_tmp += i4_filt_stride;
vst1_s16(pi2_tmp, (i4_rslt_vert_16x4_2));
pi2_tmp += i4_filt_stride;
pu1_inp += i4_src_stride;
}
i4_samp_vert_8x8_0 = i4_samp_vert_8x8_1;
i4_samp_vert_8x8_1 = i4_samp_vert_8x8_2;
i4_samp_vert_8x8_2 = i4_samp_vert_8x8_3;
i4_samp_vert_8x8_3 = vld1_u8((const UWORD8 *) pu1_inp);
i4_rslt_vert_16x4_1 = vmul_n_s16(
vreinterpret_s16_u16(vget_low_u16(vmovl_u8(i4_samp_vert_8x8_0))), i4_coeff_0);
i4_rslt_vert_16x4_1 = vmla_n_s16(
i4_rslt_vert_16x4_1, vreinterpret_s16_u16(vget_low_u16(vmovl_u8(i4_samp_vert_8x8_1))),
i4_coeff_1);
i4_rslt_vert_16x4_1 = vmla_n_s16(
i4_rslt_vert_16x4_1, vreinterpret_s16_u16(vget_low_u16(vmovl_u8(i4_samp_vert_8x8_2))),
i4_coeff_2);
i4_rslt_vert_16x4_1 = vmla_n_s16(
i4_rslt_vert_16x4_1, vreinterpret_s16_u16(vget_low_u16(vmovl_u8(i4_samp_vert_8x8_3))),
i4_coeff_3);
vst1_s16(pi2_tmp, (i4_rslt_vert_16x4_1));
/* Reinitializing the ptrs */
pu1_inp = pu1_inp_buf;
pi2_tmp = pi2_tmp_filt_buf;
}
/* Horizontal interpolation */
for(i4_y = 0; i4_y < 16; i4_y++)
{
i4_samp_horz_16x4_0 = vld1_s16(pi2_tmp);
i4_samp_horz_16x4_1 = vld1_s16(pi2_tmp + 1);
i4_samp_horz_16x4_2 = vld1_s16(pi2_tmp + 2);
i4_samp_horz_16x4_3 = vld1_s16(pi2_tmp + 3);
i4_samp_horz_16x4_4 = vld1_s16(pi2_tmp + 4);
i4_samp_horz_16x4_5 = vld1_s16(pi2_tmp + 5);
i4_samp_horz_16x4_6 = vld1_s16(pi2_tmp + 6);
i4_samp_horz_16x4_7 = vld1_s16(pi2_tmp + 7);
i4_samp_horz_16x4_8 = vld1_s16(pi2_tmp + 8);
i4_rslt_horz_r0_1 =
vmull_n_s16(i4_samp_horz_16x4_0, i4_coeff_c3); /* a0c3 a1c3 a2c3 a3c3 */
i4_rslt_horz_r0_1 =
vmlal_n_s16(i4_rslt_horz_r0_1, i4_samp_horz_16x4_1,
i4_coeff_c2); /* a0c0+a1c1 a1c0+a2c1 a2c0+a3c1 a3c0+a4c1 */
i4_rslt_horz_r0_1 = vmlal_n_s16(i4_rslt_horz_r0_1, i4_samp_horz_16x4_2, i4_coeff_c1);
i4_rslt_horz_r0_1 = vmlal_n_s16(i4_rslt_horz_r0_1, i4_samp_horz_16x4_3, i4_coeff_c0);
/* i4_rslt_horz_r0_1 : contains res at even pos:0,2,4,6 */
i4_rslt_horz_r1_1 =
vmull_n_s16(i4_samp_horz_16x4_1, i4_coeff_c0); /* a0c0 a1c0 a2c0 a3c0 */
i4_rslt_horz_r1_1 =
vmlal_n_s16(i4_rslt_horz_r1_1, i4_samp_horz_16x4_2,
i4_coeff_c1); /* a0c0+a1c1 a1c0+a2c1 a2c0+a3c1 a3c0+a4c1 */
i4_rslt_horz_r1_1 = vmlal_n_s16(i4_rslt_horz_r1_1, i4_samp_horz_16x4_3, i4_coeff_c2);
i4_rslt_horz_r1_1 = vmlal_n_s16(i4_rslt_horz_r1_1, i4_samp_horz_16x4_4, i4_coeff_c3);
/* i4_rslt_horz_r1_1 : contains res at odd pos:1,3,5,7 */
i4_rslt_horz_r0_2 =
vmull_n_s16(i4_samp_horz_16x4_4, i4_coeff_c3); /* a0c3 a1c3 a2c3 a3c3 */
i4_rslt_horz_r0_2 =
vmlal_n_s16(i4_rslt_horz_r0_2, i4_samp_horz_16x4_5,
i4_coeff_c2); /* a0c0+a1c1 a1c0+a2c1 a2c0+a3c1 a3c0+a4c1 */
i4_rslt_horz_r0_2 = vmlal_n_s16(i4_rslt_horz_r0_2, i4_samp_horz_16x4_6, i4_coeff_c1);
i4_rslt_horz_r0_2 = vmlal_n_s16(i4_rslt_horz_r0_2, i4_samp_horz_16x4_7, i4_coeff_c0);
/* i4_rslt_horz_r0_1 : contains res at even pos:8,10,12,14 */
i4_rslt_horz_r1_2 =
vmull_n_s16(i4_samp_horz_16x4_5, i4_coeff_c0); /* a0c0 a1c0 a2c0 a3c0 */
i4_rslt_horz_r1_2 =
vmlal_n_s16(i4_rslt_horz_r1_2, i4_samp_horz_16x4_6,
i4_coeff_c1); /* a0c0+a1c1 a1c0+a2c1 a2c0+a3c1 a3c0+a4c1 */
i4_rslt_horz_r1_2 = vmlal_n_s16(i4_rslt_horz_r1_2, i4_samp_horz_16x4_7, i4_coeff_c2);
i4_rslt_horz_r1_2 = vmlal_n_s16(i4_rslt_horz_r1_2, i4_samp_horz_16x4_8, i4_coeff_c3);
/* i4_rslt_horz_r1_1 : contains res at odd pos:1,3,5,7 */
i4_rslt_horz_r0_tmp32 = vzipq_s32(i4_rslt_horz_r0_1, i4_rslt_horz_r1_1);
i4_rslt_horz_r1_tmp32 = vzipq_s32(i4_rslt_horz_r0_2, i4_rslt_horz_r1_2);
i4_rslt_horz_r0_1 = vaddq_s32(i4_rslt_horz_r0_tmp32.val[0], const_512_32x4);
i4_rslt_horz_r1_1 = vaddq_s32(i4_rslt_horz_r0_tmp32.val[1], const_512_32x4);
i4_rslt_horz_r0_2 = vaddq_s32(i4_rslt_horz_r1_tmp32.val[0], const_512_32x4);
i4_rslt_horz_r1_2 = vaddq_s32(i4_rslt_horz_r1_tmp32.val[1], const_512_32x4);
i4_rslt_horz_r0_1_tmp = vqshrun_n_s32(i4_rslt_horz_r0_1, 10);
i4_rslt_horz_r1_1_tmp = vqshrun_n_s32(i4_rslt_horz_r1_1, 10);
i4_rslt_horz_r0_2_tmp = vqshrun_n_s32(i4_rslt_horz_r0_2, 10);
i4_rslt_horz_r1_2_tmp = vqshrun_n_s32(i4_rslt_horz_r1_2, 10);
rslt_16x8_t_1 = vcombine_u16(i4_rslt_horz_r0_1_tmp, i4_rslt_horz_r1_1_tmp);
rslt_16x8_t_2 = vcombine_u16(i4_rslt_horz_r0_2_tmp, i4_rslt_horz_r1_2_tmp);
vst1_u8(pu1_out, vqmovn_u16(rslt_16x8_t_1));
vst1_u8(pu1_out + 8, vqmovn_u16(rslt_16x8_t_2));
pu1_out += i4_out_stride;
pi2_tmp += i4_filt_stride;
}
}
void isvc_horz_interpol_chroma_dyadic_neon(WORD16 *pi2_tmp_filt_buf, UWORD8 *pu1_out_buf,
WORD32 i4_out_stride, WORD32 i4_phase_0,
WORD32 i4_phase_1)
{
WORD32 i4_y;
WORD32 i4_coeff_0, i4_coeff_1, i4_coeff_2, i4_coeff_3;
UWORD8 *pu1_out = pu1_out_buf;
WORD16 *pi2_tmp = pi2_tmp_filt_buf;
WORD32 i4_filt_stride = 6;
WORD32 i4_dst_stride = i4_out_stride;
int16x8_t i4_samp_horz_16x8_r0_0, i4_samp_horz_16x8_r0_1, i4_samp_horz_16x8_r0_2;
int16x8_t i4_samp_horz_16x8_r1_0, i4_samp_horz_16x8_r1_1, i4_samp_horz_16x8_r1_2;
int16x8_t i4_rslt_horz_r0_1, i4_rslt_horz_r0_2;
int16x8_t i4_rslt_horz_r1_1, i4_rslt_horz_r1_2;
int16x8x2_t temp_horz_16x8_r0;
int16x8x2_t temp_horz_16x8_r1;
int16x8_t final_horz_16x8_r0_1;
int16x8_t final_horz_16x8_r1_1;
uint8x16_t i4_out_horz_8x16_r0, i4_out_horz_8x16_r1;
uint8x16_t chroma_mask_8x16 = vreinterpretq_u8_u16(vdupq_n_u16(0x00ff));
i4_coeff_0 = 16 - i4_phase_0;
i4_coeff_1 = i4_phase_0;
i4_coeff_2 = 16 - i4_phase_1;
i4_coeff_3 = i4_phase_1;
/* Horizontal interpolation */
for(i4_y = 0; i4_y < 8; i4_y += 2)
{
i4_samp_horz_16x8_r0_0 = vld1q_s16(pi2_tmp); /* a0 a1 a2 a3 a4 a5 a6 a7 */
i4_samp_horz_16x8_r0_1 = vld1q_s16(pi2_tmp + 1); /* a1 a2 a3 a4 */
i4_samp_horz_16x8_r0_2 = vld1q_s16(pi2_tmp + 2); /* a2 a3 a4 a5 */
i4_samp_horz_16x8_r1_0 = vld1q_s16(pi2_tmp + i4_filt_stride);
i4_samp_horz_16x8_r1_1 = vld1q_s16(pi2_tmp + i4_filt_stride + 1);
i4_samp_horz_16x8_r1_2 = vld1q_s16(pi2_tmp + (i4_filt_stride + 2));
i4_rslt_horz_r0_1 =
vmulq_n_s16(i4_samp_horz_16x8_r0_0, i4_coeff_0); /* a0c0 a1c0 a2c0 a3c0 */
i4_rslt_horz_r0_2 =
vmulq_n_s16(i4_samp_horz_16x8_r0_1, i4_coeff_2); /* a1c2 a2c2 a3c2 a4c2 */
i4_rslt_horz_r0_1 = vmlaq_n_s16(i4_rslt_horz_r0_1, i4_samp_horz_16x8_r0_1,
i4_coeff_1); /* a0c0+a1c1 a1c0+a2c1 a2c0+a3c1 a3c0+a4c1 */
i4_rslt_horz_r0_2 = vmlaq_n_s16(i4_rslt_horz_r0_2, i4_samp_horz_16x8_r0_2,
i4_coeff_3); /* a1c2+a2c3 a2c2+a3c3 a3c2+a4c3 a4c2+a5c3 */
i4_rslt_horz_r1_1 = vmulq_n_s16(i4_samp_horz_16x8_r1_0, i4_coeff_0);
i4_rslt_horz_r1_2 = vmulq_n_s16(i4_samp_horz_16x8_r1_1, i4_coeff_2);
i4_rslt_horz_r1_1 = vmlaq_n_s16(i4_rslt_horz_r1_1, i4_samp_horz_16x8_r1_1, i4_coeff_1);
i4_rslt_horz_r1_2 = vmlaq_n_s16(i4_rslt_horz_r1_2, i4_samp_horz_16x8_r1_2, i4_coeff_3);
temp_horz_16x8_r0 = vzipq_s16(i4_rslt_horz_r0_1, i4_rslt_horz_r0_2);
temp_horz_16x8_r1 = vzipq_s16(i4_rslt_horz_r1_1, i4_rslt_horz_r1_2);
final_horz_16x8_r0_1 = temp_horz_16x8_r0.val[0];
final_horz_16x8_r1_1 = temp_horz_16x8_r1.val[0];
final_horz_16x8_r0_1 = vrshrq_n_s16(final_horz_16x8_r0_1, 8);
final_horz_16x8_r1_1 = vrshrq_n_s16(final_horz_16x8_r1_1, 8);
i4_out_horz_8x16_r0 = vld1q_u8(pu1_out);
i4_out_horz_8x16_r1 = vld1q_u8(pu1_out + i4_dst_stride);
i4_out_horz_8x16_r0 = vbslq_u8(chroma_mask_8x16, vreinterpretq_u8_s16(final_horz_16x8_r0_1),
i4_out_horz_8x16_r0);
i4_out_horz_8x16_r1 = vbslq_u8(chroma_mask_8x16, vreinterpretq_u8_s16(final_horz_16x8_r1_1),
i4_out_horz_8x16_r1);
vst1q_u8(pu1_out, i4_out_horz_8x16_r0);
vst1q_u8(pu1_out + i4_dst_stride, i4_out_horz_8x16_r1);
/* Incrementing ptr */
pi2_tmp += (i4_filt_stride << 1);
pu1_out += (i4_dst_stride << 1);
}
}
void isvc_vert_interpol_chroma_dyadic_neon(UWORD8 *pu1_inp_buf, WORD16 *pi2_tmp_filt_buf,
WORD32 i4_phase_0, WORD32 i4_phase_1)
{
WORD32 i4_coeff_0, i4_coeff_1, i4_coeff_2, i4_coeff_3;
WORD32 i4_src_stride = DYADIC_REF_W_C;
UWORD8 *pu1_inp = pu1_inp_buf;
WORD16 *pi2_tmp = pi2_tmp_filt_buf;
uint8x8_t i4_samp_vert_8x8_r0, i4_samp_vert_8x8_r1, i4_samp_vert_8x8_r2, i4_samp_vert_8x8_r3,
i4_samp_vert_8x8_r4, i4_samp_vert_8x8_r5;
int16x8_t i4_rslt_vert_16x8_r0, i4_rslt_vert_16x8_r1, i4_rslt_vert_16x8_r2,
i4_rslt_vert_16x8_r3, i4_rslt_vert_16x8_r4, i4_rslt_vert_16x8_r5, i4_rslt_vert_16x8_r6,
i4_rslt_vert_16x8_r7;
i4_coeff_0 = 16 - i4_phase_0;
i4_coeff_1 = i4_phase_0;
i4_coeff_2 = 16 - i4_phase_1;
i4_coeff_3 = i4_phase_1;
/* Vertical interpolation */
i4_samp_vert_8x8_r0 = vld1_u8(pu1_inp);
pu1_inp += i4_src_stride;
i4_samp_vert_8x8_r1 = vld1_u8(pu1_inp);
pu1_inp += i4_src_stride;
i4_samp_vert_8x8_r2 = vld1_u8(pu1_inp);
pu1_inp += i4_src_stride;
i4_samp_vert_8x8_r3 = vld1_u8(pu1_inp);
pu1_inp += i4_src_stride;
i4_samp_vert_8x8_r4 = vld1_u8(pu1_inp);
pu1_inp += i4_src_stride;
i4_samp_vert_8x8_r5 = vld1_u8(pu1_inp);
pu1_inp += i4_src_stride;
i4_rslt_vert_16x8_r0 =
vmulq_n_s16(vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_r0)), i4_coeff_0);
i4_rslt_vert_16x8_r0 = vmlaq_n_s16(
i4_rslt_vert_16x8_r0, vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_r1)), i4_coeff_1);
vst1q_s16(pi2_tmp, i4_rslt_vert_16x8_r0);
i4_rslt_vert_16x8_r1 =
vmulq_n_s16(vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_r1)), i4_coeff_2);
i4_rslt_vert_16x8_r1 = vmlaq_n_s16(
i4_rslt_vert_16x8_r1, vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_r2)), i4_coeff_3);
vst1q_s16(pi2_tmp + 6, i4_rslt_vert_16x8_r1);
i4_rslt_vert_16x8_r2 =
vmulq_n_s16(vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_r1)), i4_coeff_0);
i4_rslt_vert_16x8_r2 = vmlaq_n_s16(
i4_rslt_vert_16x8_r2, vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_r2)), i4_coeff_1);
vst1q_s16(pi2_tmp + 12, i4_rslt_vert_16x8_r2);
i4_rslt_vert_16x8_r3 =
vmulq_n_s16(vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_r2)), i4_coeff_2);
i4_rslt_vert_16x8_r3 = vmlaq_n_s16(
i4_rslt_vert_16x8_r3, vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_r3)), i4_coeff_3);
vst1q_s16(pi2_tmp + 18, i4_rslt_vert_16x8_r3);
i4_rslt_vert_16x8_r4 =
vmulq_n_s16(vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_r2)), i4_coeff_0);
i4_rslt_vert_16x8_r4 = vmlaq_n_s16(
i4_rslt_vert_16x8_r4, vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_r3)), i4_coeff_1);
vst1q_s16(pi2_tmp + 24, i4_rslt_vert_16x8_r4);
i4_rslt_vert_16x8_r5 =
vmulq_n_s16(vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_r3)), i4_coeff_2);
i4_rslt_vert_16x8_r5 = vmlaq_n_s16(
i4_rslt_vert_16x8_r5, vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_r4)), i4_coeff_3);
vst1q_s16(pi2_tmp + 30, i4_rslt_vert_16x8_r5);
i4_rslt_vert_16x8_r6 =
vmulq_n_s16(vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_r3)), i4_coeff_0);
i4_rslt_vert_16x8_r6 = vmlaq_n_s16(
i4_rslt_vert_16x8_r6, vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_r4)), i4_coeff_1);
vst1q_s16(pi2_tmp + 36, i4_rslt_vert_16x8_r6);
i4_rslt_vert_16x8_r7 =
vmulq_n_s16(vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_r4)), i4_coeff_2);
i4_rslt_vert_16x8_r7 = vmlaq_n_s16(
i4_rslt_vert_16x8_r7, vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_r5)), i4_coeff_3);
vst1_s16(pi2_tmp + 42, vget_low_s16(i4_rslt_vert_16x8_r7));
vst1q_lane_s16(pi2_tmp + 46, i4_rslt_vert_16x8_r7, 4);
vst1q_lane_s16(pi2_tmp + 47, i4_rslt_vert_16x8_r7, 5);
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,151 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
* *******************************************************************************
* * @file
* isvc_mem_fns_av8.c
*
* @brief
* armv8 variants of
* functions used for memory operations
*
* *******************************************************************************
*/
#include <arm_neon.h>
#include <string.h>
#include "ih264_typedefs.h"
#include "isvc_mem_fns.h"
void isvc_memset_2d_neon(UWORD8 *pu1_dst, WORD32 i4_dst_stride, UWORD8 u1_val, WORD32 i4_blk_wd,
WORD32 i4_blk_ht)
{
if(i4_blk_wd == 4)
{
vst1_lane_u32((UWORD32 *) pu1_dst, vreinterpret_u32_u8(vdup_n_u8(u1_val)), 0);
pu1_dst += i4_dst_stride;
vst1_lane_u32((UWORD32 *) pu1_dst, vreinterpret_u32_u8(vdup_n_u8(u1_val)), 0);
pu1_dst += i4_dst_stride;
vst1_lane_u32((UWORD32 *) pu1_dst, vreinterpret_u32_u8(vdup_n_u8(u1_val)), 0);
pu1_dst += i4_dst_stride;
vst1_lane_u32((UWORD32 *) pu1_dst, vreinterpret_u32_u8(vdup_n_u8(u1_val)), 0);
}
else if(i4_blk_wd == 8)
{
vst1_u8(pu1_dst, vdup_n_u8(u1_val));
pu1_dst += i4_dst_stride;
vst1_u8(pu1_dst, vdup_n_u8(u1_val));
pu1_dst += i4_dst_stride;
vst1_u8(pu1_dst, vdup_n_u8(u1_val));
pu1_dst += i4_dst_stride;
vst1_u8(pu1_dst, vdup_n_u8(u1_val));
pu1_dst += i4_dst_stride;
vst1_u8(pu1_dst, vdup_n_u8(u1_val));
pu1_dst += i4_dst_stride;
vst1_u8(pu1_dst, vdup_n_u8(u1_val));
pu1_dst += i4_dst_stride;
vst1_u8(pu1_dst, vdup_n_u8(u1_val));
pu1_dst += i4_dst_stride;
vst1_u8(pu1_dst, vdup_n_u8(u1_val));
}
else if((i4_blk_wd % 16 == 0) && (i4_blk_ht % 16 == 0))
{
WORD32 i, j;
UWORD8 *pu1_dst_col_ptr, *pu1_dst_row_ptr;
WORD32 i4_width_by_16 = i4_blk_wd / 16;
WORD32 i4_height_by_16 = i4_blk_ht / 16;
for(i = 0; i < i4_height_by_16; i++)
{
pu1_dst_row_ptr = pu1_dst + i * 16 * i4_dst_stride;
for(j = 0; j < i4_width_by_16; j++)
{
pu1_dst_col_ptr = pu1_dst_row_ptr + (j << 4);
vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val));
pu1_dst_col_ptr += i4_dst_stride;
vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val));
pu1_dst_col_ptr += i4_dst_stride;
vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val));
pu1_dst_col_ptr += i4_dst_stride;
vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val));
pu1_dst_col_ptr += i4_dst_stride;
vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val));
pu1_dst_col_ptr += i4_dst_stride;
vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val));
pu1_dst_col_ptr += i4_dst_stride;
vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val));
pu1_dst_col_ptr += i4_dst_stride;
vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val));
pu1_dst_col_ptr += i4_dst_stride;
vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val));
pu1_dst_col_ptr += i4_dst_stride;
vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val));
pu1_dst_col_ptr += i4_dst_stride;
vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val));
pu1_dst_col_ptr += i4_dst_stride;
vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val));
pu1_dst_col_ptr += i4_dst_stride;
vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val));
pu1_dst_col_ptr += i4_dst_stride;
vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val));
pu1_dst_col_ptr += i4_dst_stride;
vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val));
pu1_dst_col_ptr += i4_dst_stride;
vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val));
}
}
}
else
{
WORD32 i;
for(i = 0; i < i4_blk_ht; i++)
{
memset(pu1_dst, u1_val, i4_blk_wd);
pu1_dst += i4_dst_stride;
}
}
}

File diff suppressed because it is too large Load diff

View file

@ -141,11 +141,16 @@ typedef enum
LAST_SIGNIFICANT_COEFF_FLAG_8X8_FRAME = 417,
COEFF_ABS_LEVEL_MINUS1_8X8 = 426,
SIGNIFICANT_COEFF_FLAG_8X8_FIELD = 436,
LAST_SIGNIFICANT_COEFF_FLAG_8X8_FIELD = 451
LAST_SIGNIFICANT_COEFF_FLAG_8X8_FIELD = 451,
/* SVC related CABAC offsets */
BASE_MODE_FLAG = 460,
MOTION_PREDICTION_FLAG_L0 = 463,
MOTION_PREDICTION_FLAG_L1 = 464,
RESIDUAL_PREDICTION_FLAG = 465,
} cabac_table_num_t;
/**
******************************************************************************
* @enum ctxIdxOffset

View file

@ -135,6 +135,9 @@ enum
ISLICE = 2,
SPSLICE = 3,
SISLICE = 4,
EPSLICE = 5,
EBSLICE = 6,
EISLICE = 7,
MAXSLICE_TYPE,
};
@ -144,27 +147,28 @@ enum
* @brief Defines the set of possible nal unit types
******************************************************************************
*/
enum
typedef enum NAL_UNIT_TYPE_T
{
NAL_UNSPEC_0 = 0,
NAL_SLICE_NON_IDR = 1,
NAL_SLICE_DPA = 2,
NAL_SLICE_DPB = 3,
NAL_SLICE_DPC = 4,
NAL_SLICE_IDR = 5,
NAL_SEI = 6,
NAL_SPS = 7,
NAL_PPS = 8,
NAL_AUD = 9,
NAL_EOSEQ = 10,
NAL_EOSTR = 11,
NAL_FILLER = 12,
NAL_SPSE = 13,
NAL_RES_18 = 14,
NAL_AUX_PIC = 19,
NAL_RES_23 = 20,
NAL_UNSPEC_31 = 24,
};
NAL_UNSPEC_0 = 0,
NAL_SLICE_NON_IDR = 1,
NAL_SLICE_DPA = 2,
NAL_SLICE_DPB = 3,
NAL_SLICE_DPC = 4,
NAL_SLICE_IDR = 5,
NAL_SEI = 6,
NAL_SPS = 7,
NAL_PPS = 8,
NAL_AUD = 9,
NAL_EOSEQ = 10,
NAL_EOSTR = 11,
NAL_FILLER = 12,
NAL_SPSE = 13,
NAL_PREFIX = 14,
NAL_SUBSET_SPS = 15,
NAL_AUX_PIC = 19,
NAL_CODED_SLICE_EXTENSION = 20,
NAL_UNSPEC_31 = 24,
} NAL_UNIT_TYPE_T;
/**
******************************************************************************
@ -261,27 +265,29 @@ typedef enum
*/
typedef enum
{
I16x16 = 0,
I4x4 = 1,
I8x8 = 2,
P16x16 = 3,
P16x8 = 4,
P8x16 = 5,
P8x8 = 6,
PSKIP = 7,
IPCM = 8,
B16x16 = 9,
BSKIP = 10,
BDIRECT = 11,
INVALID_MB_TYPE = -1,
I16x16 = 0,
I4x4 = 1,
I8x8 = 2,
P16x16 = 3,
P16x8 = 4,
P8x16 = 5,
P8x8 = 6,
PSKIP = 7,
IPCM = 8,
B16x16 = 9,
BSKIP = 10,
BDIRECT = 11,
BASE_MODE = 12,
MAX_MBTYPES,
}MBTYPES_T;
} MBTYPES_T;
/* Pred Modes */
enum
{
BLOCK_TYPE_INTER_MB = 0,
BLOCK_TYPE_INTRA_MB = 1,
BLOCK_TYPE_SKIP_MB = 2
BLOCK_TYPE_SKIP_MB = 2
};
/* Prediction list */
@ -521,9 +527,16 @@ typedef enum
/* Number of max TU in a MB row */
#define MAX_TU_IN_MB_ROW ((MB_SIZE / MIN_TU_SIZE))
#define MIN_TU_IN_MB_ROW ((MB_SIZE / MAX_TU_SIZE))
/* Number of max PU in a CTb row */
#define MAX_PU_IN_MB_ROW ((MB_SIZE / MIN_PU_SIZE))
#define MAX_TU_IN_MB_COL MAX_TU_IN_MB_ROW
#define MIN_TU_IN_MB_COL MIN_TU_IN_MB_ROW
#define MAX_PU_IN_MB_COL MAX_PU_IN_MB_ROW
/* Number of max PU in a MB */
/*****************************************************************************/
@ -537,7 +550,11 @@ typedef enum
#define MAX_TU_IN_MB ((MB_SIZE / MIN_TU_SIZE) * \
(MB_SIZE / MIN_TU_SIZE))
#define MIN_TU_IN_MB (MIN_TU_IN_MB_ROW * MIN_TU_IN_MB_COL)
#define NUM_4x4_IN_8x8 4
#define NUM_COEFFS_IN_MIN_TU (MIN_TU_SIZE * MIN_TU_SIZE)
/**
* Maximum transform depths

View file

@ -44,6 +44,8 @@
/*Width of a 4x4 block*/
#define SUB_BLK_WIDTH_4x4 4
#define SUB_BLK_HEIGHT_4x4 4
/*Width of an 8x8 block*/
#define SUB_BLK_WIDTH_8x8 8

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,57 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
******************************************************************************
* @file isvc_cabac_tables.h
*
* @brief
* This file contains enumerations, macros and extern declarations of H264
* cabac tables
*
* @author
* Ittiam
*
* @remarks
* none
******************************************************************************
*/
#ifndef _ISVC_CABAC_TABLES_H_
#define _ISVC_CABAC_TABLES_H_
#include "ih264_cabac_tables.h"
/**
******************************************************************************
* @brief max range of cabac contexts in H264 (0-459)
******************************************************************************
*/
#define NUM_SVC_CABAC_CTXTS 467
extern const UWORD32 (*gau4_isvc_cabac_table)[4];
/*****************************************************************************/
/* Cabac tables for context initialization depending upon type of Slice, */
/* cabac init Idc value and Qp. */
/*****************************************************************************/
extern const UWORD8 gau1_isvc_cabac_ctxt_init_table[NUM_CAB_INIT_IDC_PLUS_ONE][QP_RANGE]
[NUM_SVC_CABAC_CTXTS];
#endif

View file

@ -0,0 +1,81 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
*******************************************************************************
* @file
* isvc_common_tables.c
*
* @brief
* Contains common global tables
*
* @author
* Harish M
*
* @par List of Functions:
*
* @remarks
* None
*
*******************************************************************************
*/
/*****************************************************************************/
/* File Includes */
/*****************************************************************************/
/* User include files */
#include "ih264_typedefs.h"
#include "isvc_defs.h"
#include "isvc_macros.h"
#include "isvc_structs.h"
#include "ih264_common_tables.h"
#include "isvc_common_tables.h"
/*****************************************************************************/
/* Extern global definitions */
/*****************************************************************************/
/**
******************************************************************************
* @brief while encoding, basing on the input configuration parameters, the
* the level of the bitstream is computed basing on the table below.
* input : table_idx
* output : level_idc or cpb size
* @remarks Table A-1 level table limits
******************************************************************************
*/
const level_tables_t gas_isvc_lvl_tbl[16] = {
{IH264_LEVEL_10, 1485, 99, 396, 64, 175, 64},
{IH264_LEVEL_1B, 1485, 99, 396, 128, 350, 64},
{IH264_LEVEL_11, 3000, 396, 900, 192, 500, 128},
{IH264_LEVEL_12, 6000, 396, 2376, 384, 1000, 128},
{IH264_LEVEL_13, 11880, 396, 2376, 768, 2000, 128},
{IH264_LEVEL_20, 11880, 396, 2376, 2000, 2000, 128},
{IH264_LEVEL_21, 19800, 792, 4752, 4000, 4000, 256},
{IH264_LEVEL_22, 20250, 1620, 8100, 4000, 4000, 256},
{IH264_LEVEL_30, 40500, 1620, 8100, 10000, 10000, 256},
{IH264_LEVEL_31, 108000, 3600, 18000, 14000, 14000, 512},
{IH264_LEVEL_32, 216000, 5120, 20480, 20000, 20000, 512},
{IH264_LEVEL_40, 245760, 8192, 32768, 20000, 25000, 512},
{IH264_LEVEL_41, 245760, 8192, 32768, 50000, 62500, 512},
{IH264_LEVEL_42, 522240, 8704, 34816, 50000, 62500, 512},
{IH264_LEVEL_50, 589824, 22080, 110400, 135000, 135000, 512},
{IH264_LEVEL_51, 983040, 36864, 184320, 240000, 240000, 512},
};

View file

@ -0,0 +1,50 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
*******************************************************************************
* @file
* isvc_common_tables.h
*
* @brief
* Common tables
*
* @author
* Harish
*
* @par List of Functions:
*
* @remarks
* None
*
*******************************************************************************
*/
#ifndef _ISVC_COMMON_TABLES_H_
#define _ISVC_COMMON_TABLES_H_
/* Dependencies of ih264_common_tables.h */
#include "ih264_defs.h"
#include "ih264_structs.h"
#include "ih264_common_tables.h"
extern const level_tables_t gas_isvc_lvl_tbl[16];
#endif

88
common/svc/isvc_defs.h Normal file
View file

@ -0,0 +1,88 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
*******************************************************************************
* @file
* isvc_defs.h
*
* @brief
* Contains macro defintions, and other typedefs used for SVC encoding
*
* @author
* ittiam
*
* @remarks
* None
*
*******************************************************************************
*/
#ifndef _ISVC_DEFS_H_
#define _ISVC_DEFS_H_
#define MAX_NUM_TEMPORAL_LAYERS 3
#define MAX_NUM_SPATIAL_LAYERS 3
#define MAX_VUI_EXT_NUM_ENTRIES (MAX_NUM_TEMPORAL_LAYERS * MAX_NUM_SPATIAL_LAYERS)
#define SVC_INTER_MB (1 << 0) /*!< Intra MBs other than IPCM and I_BL */
#define SVC_INTRA_MB (1 << 1) /*!< P or B MBs decoded or inferred*/
#define SVC_IPCM_MB (1 << 2) /*!< IPCM_MB decoder or inferred*/
#define SVC_IBL_MB (1 << 3) /*!< I_BL MB always inferred */
#define SVC_INTRA_INTER_MB \
(1 << 4) /*!< Intra Inter MB will have an alternate prediction \
process*/
#define MB_WIDTH_SHIFT 4
#define MB_HEIGHT_SHIFT 4
#define UV 1
#define NUM_SP_COMPONENTS 2
#define NUM_COMPONENTS 3
#define SVC_EXTRACT_MB_MODE(x) ((x) &0x1F)
#define GET_BIT_TX_SIZE(x, y) ((x) & (1 << (7 - (y))))
typedef enum SVC_PROFILES_T
{
IH264_SCALABLE_BASELINE = 83,
IH264_SCALABLE_HIGH_PROFILE = 86
} SVC_PROFILES_T;
typedef enum PRED_MODE_T
{
L0 = 0,
L1 = 1,
BI = 2,
NUM_PRED_DIRS = 2,
INVALID_PRED_MODE = 4,
} PRED_MODE_T;
#endif

View file

@ -0,0 +1,219 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
*******************************************************************************
* @file
* isvc_inter_pred_filters.h
*
* @brief
* Declarations of functions used for inter prediction
*
* @author
* Ittiam
*
* @par List of Functions:
* -ih264_inter_pred_luma_copy
* -ih264_interleave_copy
* -ih264_inter_pred_luma_horz
* -ih264_inter_pred_luma_vert
* -ih264_inter_pred_luma_horz_hpel_vert_hpel
* -ih264_inter_pred_luma_vert_qpel
* -ih264_inter_pred_luma_horz_qpel
* -ih264_inter_pred_luma_horz_qpel_vert_qpel
* -ih264_inter_pred_luma_horz_qpel_vert_hpel
* -ih264_inter_pred_luma_horz_hpel_vert_qpel
* -ih264_inter_pred_luma_bilinear
* -ih264_inter_pred_chroma
* -ih264_inter_pred_luma_copy_a9q
* -ih264_interleave_copy_a9
* -ih264_inter_pred_luma_horz_a9q
* -ih264_inter_pred_luma_vert_a9q
* -ih264_inter_pred_luma_bilinear_a9q
* -ih264_inter_pred_luma_horz_hpel_vert_hpel_a9q
* -ih264_inter_pred_luma_horz_qpel_a9q
* -ih264_inter_pred_luma_vert_qpel_a9q
* -ih264_inter_pred_luma_horz_qpel_vert_qpel_a9q
* -ih264_inter_pred_luma_horz_qpel_vert_hpel_a9q
* -ih264_inter_pred_luma_horz_hpel_vert_qpel_a9q
* -ih264_inter_pred_chroma_a9q
* -ih264_inter_pred_luma_copy_av8
* -ih264_interleave_copy_av8
* -ih264_inter_pred_luma_horz_av8
* -ih264_inter_pred_luma_vert_av8
* -ih264_inter_pred_luma_bilinear_av8
* -ih264_inter_pred_luma_horz_hpel_vert_hpel_av8
* -ih264_inter_pred_luma_horz_qpel_av8
* -ih264_inter_pred_luma_vert_qpel_av8
* -ih264_inter_pred_luma_horz_qpel_vert_qpel_av8
* -ih264_inter_pred_luma_horz_qpel_vert_hpel_av8
* -ih264_inter_pred_luma_horz_hpel_vert_qpel_av8
* -ih264_inter_pred_chroma_av8
* -ih264_inter_pred_chroma_dx_zero_av8
* -ih264_inter_pred_chroma_dy_zero_av8
* -ih264_inter_pred_luma_copy_ssse3
* -ih264_inter_pred_luma_copy_ssse3
* -ih264_inter_pred_luma_horz_ssse3
* -ih264_inter_pred_luma_vert_ssse3
* -ih264_inter_pred_luma_bilinear_ssse3
* -ih264_inter_pred_luma_horz_hpel_vert_hpel_ssse3
* -ih264_inter_pred_luma_horz_qpel_ssse3
* -ih264_inter_pred_luma_vert_qpel_ssse3
* -ih264_inter_pred_luma_horz_qpel_vert_qpel_ssse3
* -ih264_inter_pred_luma_horz_qpel_vert_hpel_ssse3
* -ih264_inter_pred_luma_horz_hpel_vert_qpel_ssse3
* -ih264_inter_pred_chroma_ssse3
*
* @remarks
* None
*
*******************************************************************************
*/
#ifndef _ISVC_INTER_PRED_FILTERS_H_
#define _ISVC_INTER_PRED_FILTERS_H_
/*****************************************************************************/
/* Constant Data variables */
/*****************************************************************************/
extern const WORD32 ih264_g_six_tap[3]; /* coefficients for 6 tap filtering*/
/*****************************************************************************/
/* Extern Function Declarations */
/*****************************************************************************/
typedef void FT_INTER_PRED_LUMA(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, WORD32 dst_strd,
WORD32 ht, WORD32 wd, UWORD8 *pu1_tmp, WORD32 dydx);
typedef void FT_INTERLEAVE_COPY(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, WORD32 dst_strd,
WORD32 ht, WORD32 wd);
typedef void FT_INTER_PRED_LUMA_BILINEAR(UWORD8 *pu1_src1, UWORD8 *pu1_src2, UWORD8 *pu1_dst,
WORD32 src_strd1, WORD32 src_strd2, WORD32 dst_strd,
WORD32 height, WORD32 width);
typedef void FT_INTER_PRED_CHROMA(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd,
WORD32 dst_strd, WORD32 dx, WORD32 dy, WORD32 ht, WORD32 wd);
/* No NEON Declarations */
FT_INTER_PRED_LUMA ih264_inter_pred_luma_copy;
FT_INTERLEAVE_COPY ih264_interleave_copy;
FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz;
FT_INTER_PRED_LUMA ih264_inter_pred_luma_vert;
FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_hpel_vert_hpel;
FT_INTER_PRED_LUMA ih264_inter_pred_luma_vert_qpel;
FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_qpel;
FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_qpel_vert_qpel;
FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_qpel_vert_hpel;
FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_hpel_vert_qpel;
FT_INTER_PRED_LUMA_BILINEAR ih264_inter_pred_luma_bilinear;
FT_INTER_PRED_CHROMA ih264_inter_pred_chroma;
/* A9 NEON Declarations */
FT_INTER_PRED_LUMA ih264_inter_pred_luma_copy_a9q;
FT_INTERLEAVE_COPY ih264_interleave_copy_a9;
FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_a9q;
FT_INTER_PRED_LUMA ih264_inter_pred_luma_vert_a9q;
FT_INTER_PRED_LUMA_BILINEAR ih264_inter_pred_luma_bilinear_a9q;
FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_hpel_vert_hpel_a9q;
FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_qpel_a9q;
FT_INTER_PRED_LUMA ih264_inter_pred_luma_vert_qpel_a9q;
FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_qpel_vert_qpel_a9q;
FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_qpel_vert_hpel_a9q;
FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_hpel_vert_qpel_a9q;
FT_INTER_PRED_CHROMA ih264_inter_pred_chroma_a9q;
/* AV8 NEON Declarations */
FT_INTER_PRED_LUMA ih264_inter_pred_luma_copy_av8;
FT_INTERLEAVE_COPY ih264_interleave_copy_av8;
FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_av8;
FT_INTER_PRED_LUMA ih264_inter_pred_luma_vert_av8;
FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_hpel_vert_hpel_av8;
FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_qpel_av8;
FT_INTER_PRED_LUMA ih264_inter_pred_luma_vert_qpel_av8;
FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_qpel_vert_qpel_av8;
FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_qpel_vert_hpel_av8;
FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_hpel_vert_qpel_av8;
FT_INTER_PRED_CHROMA ih264_inter_pred_chroma_av8;
FT_INTER_PRED_CHROMA ih264_inter_pred_chroma_dx_zero_av8;
FT_INTER_PRED_CHROMA ih264_inter_pred_chroma_dy_zero_av8;
/* SSSE3 Intrinsic Declarations */
FT_INTER_PRED_LUMA ih264_inter_pred_luma_copy_ssse3;
FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_ssse3;
FT_INTER_PRED_LUMA ih264_inter_pred_luma_vert_ssse3;
FT_INTER_PRED_LUMA_BILINEAR ih264_inter_pred_luma_bilinear_ssse3;
FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_hpel_vert_hpel_ssse3;
FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_qpel_ssse3;
FT_INTER_PRED_LUMA ih264_inter_pred_luma_vert_qpel_ssse3;
FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_qpel_vert_qpel_ssse3;
FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_qpel_vert_hpel_ssse3;
FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_hpel_vert_qpel_ssse3;
FT_INTER_PRED_CHROMA ih264_inter_pred_chroma_ssse3;
/** Nothing past this point */
#endif

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,251 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
#ifndef _ISVC_INTRA_RESAMPLE_H_
#define _ISVC_INTRA_RESAMPLE_H_
#include "ih264_typedefs.h"
#include "isvc_macros.h"
#include "ih264_platform_macros.h"
#include "isvc_structs.h"
#define DYADIC_REF_W_Y 20
#define DYADIC_REF_H_Y 20
#define DYADIC_REF_W_C 10
#define DYADIC_REF_H_C 10
#define MAX_NUM_RES_LYRS 4
#define MAX_PIX_FILL_LUMA 4
#define MAX_PIX_FILL_CHROMA 2
#define MAX_REF_ARR_WD_HT 48
#define MAX_REF_IDX_ARRAY (MAX_REF_ARR_WD_HT + MB_SIZE)
#define CLIPUCHAR(x) CLIP3(0, 255, (x))
#define REF_ARRAY_WIDTH 48
#define REF_ARRAY_HEIGHT 48
typedef void FT_INTERPOLATE_LUMA_2X(UWORD8 *pu1_inp_buf, WORD16 *pi2_tmp_filt_buf,
UWORD8 *pu1_out_buf, WORD32 i4_out_stride);
typedef void FT_VERT_INTERPOLATE_CHROMA_2X(UWORD8 *pu1_inp_buf, WORD16 *pi2_tmp_filt_buf,
WORD32 i4_phase_0, WORD32 i4_phase_1);
typedef void FT_HORZ_INTERPOLATE_CHROMA_2X(WORD16 *pi2_tmp_filt_buf, UWORD8 *pu1_out_buf,
WORD32 i4_out_stride, WORD32 i4_phase_0,
WORD32 i4_phase_1);
typedef struct mem_element_t
{
/* Buffer pointer */
void *pv_buffer;
/* size of the structure or unit */
WORD32 i4_element_size;
/* Stride of buffer in terms of number of elements.*/
WORD32 i4_num_element_stride;
} mem_element_t;
typedef struct seg_description_t
{
/* describes segment dimension */
UWORD8 u1_seg_dim;
/* describes offset from start */
UWORD8 u1_seg_off;
/* describes whether mb is adjoining the segment
0 => not adjoining 1 => adjoining */
UWORD8 u1_mb_adjoin;
/* distance to nearest MB */
WORD8 i1_dist_idx;
/* describes the nearest mb boundary
+1 => rightMB/bottomMB
-1 => leftMB/topMB */
WORD8 i1_nearst_mb_bdry;
} seg_description_t;
typedef struct seg_lookup_desc_t
{
/* place holder to store the number of segments */
UWORD8 u1_num_segments;
/* this variable indicates where is start locatiion of the segment with
respect to less the block_width or greater than block width*/
UWORD8 u4_start_pos;
/* place holder to store per segment description */
seg_description_t s_segments[4];
} seg_lookup_desc_t;
typedef struct intra_samp_lyr_ctxt
{
/* mb position */
coordinates_t *ps_mb_pos;
/* reference layer width in terms luma samples */
WORD32 i4_ref_width;
/* reference layer height in terms luma samples */
WORD32 i4_ref_height;
/* Constrained intra resampling flag. Range is [0,1]. */
WORD8 i1_constrained_intra_rsmpl_flag;
/* Chroma xPhase for even values of x for dyadic cases */
WORD32 i4_x_phase_0;
/* Chroma xPhase for odd values of x for dyadic cases */
WORD32 i4_x_phase_1;
/* Chroma yPhase for even values of y for dyadic cases */
WORD32 i4_y_phase_0;
/* Chroma yPhase for odd values of y for dyadic cases */
WORD32 i4_y_phase_1;
FT_INTERPOLATE_LUMA_2X *pf_interpolate_luma;
FT_VERT_INTERPOLATE_CHROMA_2X *pf_vert_interpol_chroma;
FT_HORZ_INTERPOLATE_CHROMA_2X *pf_horz_interpol_chroma;
WORD16 i2_x_min_pos;
WORD16 i2_x_max_pos;
WORD16 i2_y_min_pos;
WORD16 i2_y_max_pos;
coordinates_t *ps_phase;
WORD32 *pi4_ref_array_positions_x;
WORD32 *pi4_ref_array_positions_y;
coordinates_t *ps_offsets;
coordinates_t *ps_ref_array_dims;
/* buffers to store lookup for horizontal segment description */
seg_lookup_desc_t as_seg_lookup_horz[MB_SIZE];
/* buffers to store lookup for vertical segment description */
seg_lookup_desc_t as_seg_lookup_vert[MB_SIZE];
/* buffers to store lookup for x indexes to get
availability from 4x4 availability grid */
UWORD8 au1_refarray_x_idx[MAX_REF_IDX_ARRAY];
/* buffers to store lookup for y indexes to get
availability from 4x4 availability grid */
UWORD8 au1_refarray_y_idx[MAX_REF_IDX_ARRAY];
} intra_samp_lyr_ctxt;
typedef struct intra_sampling_ctxt_t
{
/* Array of resolution layer ctxt. */
intra_samp_lyr_ctxt as_res_lyrs[MAX_NUM_RES_LYRS];
/* pointer to array of SPS */
void *ps_sps;
/* buffer to store the reference layer data before intra sampling */
UWORD8 *pu1_refarray_buffer;
/* buffer to hold the reference layer Cb data before intra
resampling (used for dyadic cases only) */
UWORD8 *pu1_refarray_cb;
/* buffer to hold the reference layer Cr data before intra
resampling (used for dyadic cases only) */
UWORD8 *pu1_refarray_cr;
/* intermideate buffer for interpolation */
WORD32 *pi4_temp_interpolation_buffer;
/* resolution id of the layer which is to be processed */
WORD32 i4_res_lyr_id;
/* reference layer width in terms luma samples */
WORD32 i4_ref_width;
/* reference layer width in terms luma samples */
WORD32 i4_refarray_stride;
/* reference layer height in terms luma samples */
WORD32 i4_ref_height;
} intra_sampling_ctxt_t;
typedef struct inter_lyr_mb_prms_t
{
/* NNZs of Chroma. Here each bit corresonds
to a NNZs of 4x4 sub block. Lower 4 bits are
used for Cb and upper are used for Cr */
UWORD8 u1_chroma_nnz;
/* NNZs of Luma. Here each bit corresonds
to a NNZs of 4x4 sub block in raster scan order. */
UWORD16 u2_luma_nnz;
/* Packed MB mode transform size of an MB */
WORD8 i1_mb_mode;
} inter_lyr_mb_prms_t;
/* Function declarations */
extern void isvc_intra_samp_mb_dyadic(void *pv_intra_samp_ctxt, mem_element_t *ps_ref_luma,
mem_element_t *ps_ref_chroma,
mem_element_t *ps_ref_mb_mode_map,
mem_element_t *ps_curr_luma, mem_element_t *ps_curr_chroma,
UWORD16 u2_mb_x, UWORD16 u2_mb_y,
WORD32 i4_scaled_ref_layer_left_offset,
WORD32 i4_scaled_ref_layer_top_offset);
extern void isvc_intra_samp_mb(void *pv_intra_samp_ctxt_luma, void *pv_intra_samp_ctxt_chroma,
mem_element_t *ps_ref_luma, mem_element_t *ps_ref_chroma,
mem_element_t *ps_ref_mb_mode_map, mem_element_t *ps_curr_luma,
mem_element_t *ps_curr_chroma);
extern void isvc_intra_resamp_generate_segment_lookup(seg_lookup_desc_t *ps_seg_lookup_table,
WORD32 i4_dimension, WORD32 i4_mb_size,
WORD32 i4_shift_val);
/* C Declarations */
extern FT_INTERPOLATE_LUMA_2X isvc_interpolate_base_luma_dyadic;
extern FT_VERT_INTERPOLATE_CHROMA_2X isvc_vert_interpol_chroma_dyadic;
extern FT_HORZ_INTERPOLATE_CHROMA_2X isvc_horz_interpol_chroma_dyadic;
/* SSE42 Declarations */
extern FT_INTERPOLATE_LUMA_2X isvc_interpolate_base_luma_dyadic_sse42;
extern FT_VERT_INTERPOLATE_CHROMA_2X isvc_vert_interpol_chroma_dyadic_sse42;
extern FT_HORZ_INTERPOLATE_CHROMA_2X isvc_horz_interpol_chroma_dyadic_sse42;
/* NEON Declarations */
extern FT_INTERPOLATE_LUMA_2X isvc_interpolate_base_luma_dyadic_neon;
extern FT_VERT_INTERPOLATE_CHROMA_2X isvc_vert_interpol_chroma_dyadic_neon;
extern FT_HORZ_INTERPOLATE_CHROMA_2X isvc_horz_interpol_chroma_dyadic_neon;
#endif

File diff suppressed because it is too large Load diff

37
common/svc/isvc_macros.h Normal file
View file

@ -0,0 +1,37 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
*******************************************************************************
* @file
* isvc_macros.h
*
* @brief
* Contains macro definitions used in SVC
*
*******************************************************************************
*/
#ifndef _ISVC_MACROS_H_
#define _ISVC_MACROS_H_
#define FORCEINLINE __attribute__((always_inline)) inline
#endif

317
common/svc/isvc_mem_fns.c Normal file
View file

@ -0,0 +1,317 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
*******************************************************************************
* @file
* isvc_mem_fns.c
*
* @brief
* Functions used for memory operations
*
* @author
* Ittiam
*
* @par List of Functions:
* isvc_memcpy()
* isvc_memcpy_mul_8()
* isvc_memset()
* isvc_memset_mul_8()
* isvc_memset_16bit()
* isvc_memset_16bit_mul_8()
* isvc_memory_alloc()
* isvc_memory_free()
*
* @remarks
* None
*
******************************************************************************
*/
/*****************************************************************************/
/* File Includes */
/*****************************************************************************/
/* System include files */
#include <stdio.h>
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
/* User include files */
#include "ih264_typedefs.h"
#include "isvc_mem_fns.h"
/**
********************************************************************************
* @brief copies a 2d blk from one location to another
*
* @param[out] pu1_dst : dst pointer
*
* @param[in] i4_dst_stride: stride of destination
*
* @param[in] pu1_src : src ptr
*
* @param[in] i4_src_stride: stride of src
*
* @param[in] i4_blk_wd : blk width
*
* @param[in] i4_blk_ht : blk height
*
* @return void
********************************************************************************
*/
void isvc_copy_2d(UWORD8 *pu1_dst, WORD32 i4_dst_stride, UWORD8 *pu1_src, WORD32 i4_src_stride,
WORD32 i4_blk_wd, WORD32 i4_blk_ht)
{
WORD32 i;
for(i = 0; i < i4_blk_ht; i++)
{
memmove(pu1_dst, pu1_src, i4_blk_wd * sizeof(pu1_dst[0]));
pu1_dst += i4_dst_stride;
pu1_src += i4_src_stride;
}
}
/**
********************************************************************************
* @brief memsets a 2d blk
*
* @param[out] pu1_dst : dst pointer
*
* @param[in] i4_dst_stride: stride of destination
*
* @param[in] i4_blk_wd : blk width
*
* @param[in] i4_blk_ht : blk height
*
* @return void
********************************************************************************
*/
void isvc_memset_2d(UWORD8 *pu1_dst, WORD32 i4_dst_stride, UWORD8 u1_val, WORD32 i4_blk_wd,
WORD32 i4_blk_ht)
{
WORD32 i;
for(i = 0; i < i4_blk_ht; i++)
{
memset(pu1_dst, u1_val, i4_blk_wd);
pu1_dst += i4_dst_stride;
}
}
/**
*******************************************************************************
*
* @brief
* Function for copying to an interleaved destination
*
* @par Description:
* Copies the array of width 'wd' and height 'ht' from the location pointed
* by 'src' to the location pointed by 'dst'
*
* @param[in] pu1_src
* UWORD8 pointer to the source
*
* @param[out] pu1_dst
* UWORD8 pointer to the destination
*
* @param[in] src_strd
* integer source stride
*
* @param[in] dst_strd
* integer destination stride
*
* @param[in] ht
* integer height of the array
*
* @param[in] wd
* integer width of the array
*
* @returns
*
* @remarks
* The alternate elements of src will be copied to alternate locations in dsr
* Other locations are not touched
*
*******************************************************************************
*/
void isvc_interleaved_copy(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, WORD32 dst_strd,
WORD32 ht, WORD32 wd)
{
WORD32 row, col;
wd *= 2;
for(row = 0; row < ht; row++)
{
for(col = 0; col < wd; col += 2)
{
pu1_dst[col] = pu1_src[col];
}
pu1_src += src_strd;
pu1_dst += dst_strd;
}
}
/**
*******************************************************************************
*
* @brief
* Function for copying to an interleaved destination
*
* @par Description:
* Copies the array of width 'wd' and height 'ht' from the location pointed
* by 'src' to the location pointed by 'dst'
*
* @param[in] pu1_src
* UWORD8 pointer to the source
*
* @param[out] pu1_dst
* UWORD8 pointer to the destination
*
* @param[in] src_strd
* integer source stride
*
* @param[in] dst_strd
* integer destination stride
*
* @param[in] ht
* integer height of the array
*
* @param[in] wd
* integer width of the array
*
* @returns
*
* @remarks
* The alternate elements of src will be copied to alternate locations in dsr
* Other locations are not touched
*
*******************************************************************************
*/
void isvc_16bit_interleaved_copy(WORD16 *pi2_src, WORD16 *pi2_dst, WORD32 src_strd, WORD32 dst_strd,
WORD32 ht, WORD32 wd)
{
WORD32 row, col;
wd *= 2;
for(row = 0; row < ht; row++)
{
for(col = 0; col < wd; col += 2)
{
pi2_dst[col] = pi2_src[col];
}
pi2_src += src_strd;
pi2_dst += dst_strd;
}
}
/**
*******************************************************************************
*
* @brief
* Function for memsetting to an interleaved destination
*
* @par Description:
* Memsets the array of width 'wd' and height 'ht' pointed by 'src'
*
* @param[in] pu1_src
* UWORD8 pointer to the source
*
* @param[in] src_strd
* integer source stride
*
* @param[in] value
* Value to set
*
* @param[in] ht
* integer height of the array
*
* @param[in] wd
* integer width of the array
*
* @returns
*
* @remarks
* The alternate elements of src will be copied to alternate locations in dsr
* Other locations are not touched
*
*******************************************************************************
*/
void isvc_16bit_interleaved_memset(WORD16 *pi2_src, WORD32 i4_src_strd, WORD16 i2_value,
WORD32 i4_wd, WORD32 i4_ht)
{
WORD32 row, col;
i4_wd *= 2;
for(row = 0; row < i4_ht; row++)
{
for(col = 0; col < i4_wd; col += 2)
{
pi2_src[col] = i2_value;
}
pi2_src += i4_src_strd;
}
}
/**
*******************************************************************************
*
* @brief
* Checks if any pixel in a block is non-zero
*
* @param[in] pu1_data
* UWORD8 pointer to the block to be checked
*
* @param[in] i4_data_strd
* Stride of data buffer
*
* @param[in] u4_wd
* Width of the block
*
* @param[in] u4_ht
* Height of the block
*
*******************************************************************************
*/
UWORD8 isvc_is_nonzero_blk(UWORD8 *pu1_data, WORD32 i4_data_strd, UWORD32 u4_wd, UWORD32 u4_ht)
{
UWORD32 i, j;
for(i = 0; i < u4_ht; i++)
{
for(j = 0; j < u4_wd; j++)
{
if(pu1_data[j + i * i4_data_strd])
{
return 1;
}
}
}
return 0;
}

109
common/svc/isvc_mem_fns.h Normal file
View file

@ -0,0 +1,109 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
*******************************************************************************
* @file
* isvc_mem_fns.h
*
* @brief
* Function declarations used for memory functions
*
* @author
* Ittiam
*
* @remarks
* None
*
*******************************************************************************
*/
#ifndef _ISVC_MEM_FNS_H_
#define _ISVC_MEM_FNS_H_
#include "ih264_typedefs.h"
typedef void *FT_MEM_ALLOC(UWORD32 u4_size);
typedef void FT_MEM_FREE(void *pv_mem);
typedef void FT_MEMCPY(UWORD8 *pu1_dst, UWORD8 *pu1_src, UWORD32 num_bytes);
typedef void FT_COPY_2D(UWORD8 *pu1_dst, WORD32 i4_dst_stride, UWORD8 *pu1_src,
WORD32 i4_src_stride, WORD32 i4_blk_wd, WORD32 i4_blk_ht);
typedef void FT_MEMSET_2D(UWORD8 *pu1_dst, WORD32 i4_dst_stride, UWORD8 u1_val, WORD32 i4_blk_wd,
WORD32 i4_blk_ht);
typedef void FT_MEMSET(UWORD8 *pu1_dst, UWORD8 value, UWORD32 num_bytes);
typedef void FT_MEMSET_16BIT(UWORD16 *pu2_dst, UWORD16 value, UWORD32 num_words);
typedef void FT_16BIT_INTERLEAVED_COPY(WORD16 *pi2_src, WORD16 *pi2_dst, WORD32 src_strd,
WORD32 dst_strd, WORD32 ht, WORD32 wd);
typedef void FT_16BIT_INTERLEAVED_MEMSET(WORD16 *pi2_src, WORD32 i4_src_strd, WORD16 i2_value,
WORD32 i4_wd, WORD32 i4_ht);
typedef UWORD8 FT_NONZERO_CHECKER(UWORD8 *pu1_data, WORD32 i4_data_strd, UWORD32 u4_wd,
UWORD32 u4_ht);
/* C function declarations */
extern FT_MEMCPY ih264_memcpy;
extern FT_MEMCPY ih264_memcpy_mul_8;
extern FT_MEMSET ih264_memset;
extern FT_MEMSET ih264_memset_mul_8;
extern FT_MEMSET_16BIT ih264_memset_16bit;
extern FT_MEMSET_16BIT ih264_memset_16bit_mul_8;
extern FT_COPY_2D isvc_copy_2d;
extern FT_MEMSET_2D isvc_memset_2d;
extern FT_16BIT_INTERLEAVED_COPY isvc_16bit_interleaved_copy;
extern FT_16BIT_INTERLEAVED_MEMSET isvc_16bit_interleaved_memset;
extern FT_NONZERO_CHECKER isvc_is_nonzero_blk;
extern FT_MEM_ALLOC isvc_memory_alloc;
extern FT_MEM_FREE isvc_memory_free;
/* A9 Q function declarations */
extern FT_MEMCPY isvc_memcpy_a9q;
extern FT_MEMCPY ih264_memcpy_mul_8_a9q;
extern FT_MEMSET ih264_memset_a9q;
extern FT_MEMSET ih264_memset_mul_8_a9q;
extern FT_MEMSET_16BIT ih264_memset_16bit_a9q;
extern FT_MEMSET_16BIT ih264_memset_16bit_mul_8_a9q;
/* AV8 function declarations */
extern FT_MEMCPY ih264_memcpy_av8;
extern FT_MEMCPY ih264_memcpy_mul_8_av8;
extern FT_MEMSET ih264_memset_av8;
extern FT_MEMSET ih264_memset_mul_8_av8;
extern FT_MEMSET_16BIT ih264_memset_16bit_av8;
extern FT_MEMSET_16BIT ih264_memset_16bit_mul_8_av8;
/* NEON function declarations */
extern FT_MEMSET_2D isvc_memset_2d_neon;
/* SSSE3 variants */
extern FT_MEMCPY ih264_memcpy_mul_8_ssse3;
extern FT_MEMSET ih264_memset_mul_8_ssse3;
extern FT_MEMSET_16BIT ih264_memset_16bit_mul_8_ssse3;
extern FT_COPY_2D isvc_copy_2d_ssse3;
/* SSE4.2 variants */
extern FT_MEMSET_2D isvc_memset_2d_sse42;
#endif

View file

@ -0,0 +1,840 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
*******************************************************************************
* @file
* ih264_resi_trans_quant.c
*
* @brief
* Contains function definitions single stage forward transform for H.264
* It will calculate the residue, do the cf and then do quantization
*
* @author
* Ittiam
*
* @par List of Functions:
* - ih264_resi_trans_quant_4x4()
* - ih264_resi_trans_quant_chroma_4x4
* - ih264_hadamard_quant_4x4
* - ih264_hadamard_quant_2x2_uv
* - ih264_resi_trans_quant_8x8
*
* @remarks
*******************************************************************************
*/
/* System include files */
#include <stdbool.h>
#include <stddef.h>
/* User include files */
#include "ih264_typedefs.h"
#include "ih264_defs.h"
#include "ih264_size_defs.h"
#include "ih264_macros.h"
#include "ih264_trans_macros.h"
#include "ih264_trans_data.h"
#include "ih264_structs.h"
#include "isvc_trans_quant_itrans_iquant.h"
static FORCEINLINE WORD16 isvc_subtract_upsampled_res(WORD16 i2_residue, WORD16 i2_upsampled_res)
{
return (CLIP3(-((WORD16) UINT8_MAX), ((WORD16) UINT8_MAX), i2_residue - i2_upsampled_res));
}
/**
*******************************************************************************
*
* @brief
* This function performs forward transform and quantization on a 4*4 block
*
* @par Description:
* The function accepts source buffer and estimation buffer. From these, it
* computes the residue. This is residue is then transformed and quantized.
* The transform and quantization are in placed computed. They use the residue
* buffer for this.
*
* @param[in] pu1_src
* Pointer to source sub-block
*
* @param[in] pu1_pred
* Pointer to prediction sub-block
*
* @param[in] pi2_out
* Pointer to residual sub-block
*
* @param[in] i4_src_stride
* Source stride
*
* @param[in] i4_pred_stride
* Prediction stride
*
* @param[in] dst_strd
* Destination stride
*
* @param[in] u4_qbits
* QP_BITS_h264_4x4 + floor(QP/6)
*
* @param[in] pu2_threshold_matrix
* Pointer to Forward Quant Threshold Matrix
*
* @param[in] pu2_scale_matrix
* Pointer to Forward Quant Scale Matrix
*
* @param[in] u4_round_factor
* Quantization Round factor
*
* @param[out] pu1_nnz
* Total non-zero coefficients in the current sub-block
*
* @returns
*
* @remarks
* None
*
*******************************************************************************
*/
void isvc_resi_trans_quant_4x4(buffer_container_t *ps_src, buffer_container_t *ps_pred,
buffer_container_t *ps_out, buffer_container_t *ps_upsampled_res,
resi_trans_quant_constants_t *ps_quant_constants, UWORD8 *pu1_nnz,
WORD16 *pi2_dc_out, UWORD8 u1_use_upsampled_res)
{
UWORD32 i;
WORD32 x0, x1, x2, x3, x4, x5, x6, x7;
WORD32 i4_value;
UWORD8 *pu1_src = ps_src->pv_data;
UWORD8 *pu1_pred = ps_pred->pv_data;
WORD16 *pi2_out = ps_out->pv_data;
WORD16 *pi2_upsampled_res = ps_upsampled_res ? ps_upsampled_res->pv_data : NULL;
WORD32 i4_src_stride = ps_src->i4_data_stride;
WORD32 i4_pred_stride = ps_pred->i4_data_stride;
WORD32 i4_upsampled_res_stride = ps_upsampled_res ? ps_upsampled_res->i4_data_stride : 0;
WORD16 *pi2_out_tmp = pi2_out;
UWORD32 u4_nonzero_coeff = 0;
const UWORD16 *pu2_scale_matrix = ps_quant_constants->pu2_scale_matrix;
const UWORD16 *pu2_threshold_matrix = ps_quant_constants->pu2_threshold_matrix;
UWORD32 u4_qbits = ps_quant_constants->u4_qbits;
UWORD32 u4_round_factor = ps_quant_constants->u4_round_factor;
for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
{
/* computing prediction error (residue) */
x4 = pu1_src[0] - pu1_pred[0];
x5 = pu1_src[1] - pu1_pred[1];
x6 = pu1_src[2] - pu1_pred[2];
x7 = pu1_src[3] - pu1_pred[3];
if(u1_use_upsampled_res)
{
x4 = isvc_subtract_upsampled_res(x4, pi2_upsampled_res[0]);
x5 = isvc_subtract_upsampled_res(x5, pi2_upsampled_res[1]);
x6 = isvc_subtract_upsampled_res(x6, pi2_upsampled_res[2]);
x7 = isvc_subtract_upsampled_res(x7, pi2_upsampled_res[3]);
}
/* Horizontal transform */
x0 = x4 + x7;
x1 = x5 + x6;
x2 = x5 - x6;
x3 = x4 - x7;
pi2_out_tmp[0] = x0 + x1;
pi2_out_tmp[1] = (x3 << 1) + x2;
pi2_out_tmp[2] = x0 - x1;
pi2_out_tmp[3] = x3 - (x2 << 1);
/* pointing to next row; */
pu1_src += i4_src_stride;
pu1_pred += i4_pred_stride;
pi2_out_tmp += 4;
pi2_upsampled_res += i4_upsampled_res_stride;
}
pi2_out_tmp = pi2_out;
for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
{
/* Vertical transform and quantization */
x4 = pi2_out_tmp[0];
x5 = pi2_out_tmp[4];
x6 = pi2_out_tmp[8];
x7 = pi2_out_tmp[12];
x0 = x4 + x7;
x1 = x5 + x6;
x2 = x5 - x6;
x3 = x4 - x7;
/* quantization is done in place */
i4_value = x0 + x1;
if(i == 0)
{
(*pi2_dc_out) = i4_value;
}
FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits,
u4_nonzero_coeff);
pi2_out_tmp[0] = i4_value;
i4_value = (x3 << 1) + x2;
FWD_QUANT(i4_value, pu2_threshold_matrix[4], pu2_scale_matrix[4], u4_round_factor, u4_qbits,
u4_nonzero_coeff);
pi2_out_tmp[4] = i4_value;
i4_value = x0 - x1;
FWD_QUANT(i4_value, pu2_threshold_matrix[8], pu2_scale_matrix[8], u4_round_factor, u4_qbits,
u4_nonzero_coeff);
pi2_out_tmp[8] = i4_value;
i4_value = x3 - (x2 << 1);
FWD_QUANT(i4_value, pu2_threshold_matrix[12], pu2_scale_matrix[12], u4_round_factor,
u4_qbits, u4_nonzero_coeff);
pi2_out_tmp[12] = i4_value;
pi2_out_tmp++;
pu2_scale_matrix++;
pu2_threshold_matrix++;
}
/* Return total nonzero coefficients in the current sub block */
*pu1_nnz = u4_nonzero_coeff;
}
/**
*******************************************************************************
*
* @brief
* This function performs forward transform and quantization on a 4*4 chroma
*block with interleaved values
*
* @par Description:
* The function accepts source buffer and estimation buffer. From these, it
* computes the residue. This is residue is then transformed and quantized.
* The transform and quantization are in placed computed. They use the residue
* buffer for this.
*
* @param[in] pu1_src
* Pointer to source sub-block
*
* @param[in] pu1_pred
* Pointer to prediction sub-block
*
* @param[in] pi2_out
* Pointer to residual sub-block
*
* @param[in] i4_src_stride
* Source stride
*
* @param[in] i4_pred_stride
* Prediction stride
*
* @param[in] dst_strd
* Destination stride
*
* @param[in] u4_qbits
* QP_BITS_h264_4x4 + floor(QP/6)
*
* @param[in] pu2_threshold_matrix
* Pointer to Forward Quant Threshold Matrix
*
* @param[in] pu2_scale_matrix
* Pointer to Forward Quant Scale Matrix
*
* @param[in] u4_round_factor
* Quantization Round factor
*
* @param[out] pu1_nnz
* Total non-zero coefficients in the current sub-block
*
* @returns
*
* @remarks
* None
*
*******************************************************************************
*/
void isvc_resi_trans_quant_chroma_4x4(buffer_container_t *ps_src, buffer_container_t *ps_pred,
buffer_container_t *ps_out,
buffer_container_t *ps_upsampled_res,
resi_trans_quant_constants_t *ps_quant_constants,
UWORD8 *pu1_nnz, WORD16 *pi2_dc_out,
UWORD8 u1_use_upsampled_res)
{
UWORD32 i;
WORD32 x0, x1, x2, x3, x4, x5, x6, x7;
WORD32 i4_value;
UWORD8 *pu1_src = ps_src->pv_data;
UWORD8 *pu1_pred = ps_pred->pv_data;
WORD16 *pi2_out = ps_out->pv_data;
WORD16 *pi2_upsampled_res = ps_upsampled_res ? ps_upsampled_res->pv_data : NULL;
WORD32 i4_src_stride = ps_src->i4_data_stride;
WORD32 i4_pred_stride = ps_pred->i4_data_stride;
WORD32 i4_upsampled_res_stride = ps_upsampled_res ? ps_upsampled_res->i4_data_stride : 0;
WORD16 *pi2_out_tmp = pi2_out;
UWORD32 u4_nonzero_coeff = 0;
const UWORD16 *pu2_scale_matrix = ps_quant_constants->pu2_scale_matrix;
const UWORD16 *pu2_threshold_matrix = ps_quant_constants->pu2_threshold_matrix;
UWORD32 u4_qbits = ps_quant_constants->u4_qbits;
UWORD32 u4_round_factor = ps_quant_constants->u4_round_factor;
for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
{
/* computing prediction error (residue) */
x4 = pu1_src[0] - pu1_pred[0];
x5 = pu1_src[2] - pu1_pred[2];
x6 = pu1_src[4] - pu1_pred[4];
x7 = pu1_src[6] - pu1_pred[6];
if(u1_use_upsampled_res)
{
x4 = isvc_subtract_upsampled_res(x4, pi2_upsampled_res[0]);
x5 = isvc_subtract_upsampled_res(x5, pi2_upsampled_res[1]);
x6 = isvc_subtract_upsampled_res(x6, pi2_upsampled_res[2]);
x7 = isvc_subtract_upsampled_res(x7, pi2_upsampled_res[3]);
}
/* Horizontal transform */
x0 = x4 + x7;
x1 = x5 + x6;
x2 = x5 - x6;
x3 = x4 - x7;
pi2_out_tmp[0] = x0 + x1;
pi2_out_tmp[1] = (x3 << 1) + x2;
pi2_out_tmp[2] = x0 - x1;
pi2_out_tmp[3] = x3 - (x2 << 1);
/* pointing to next row; */
pu1_src += i4_src_stride;
pu1_pred += i4_pred_stride;
pi2_out_tmp += 4;
pi2_upsampled_res += i4_upsampled_res_stride;
}
pi2_out_tmp = pi2_out;
for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
{
/* Vertical transform and quantization */
x4 = pi2_out_tmp[0];
x5 = pi2_out_tmp[4];
x6 = pi2_out_tmp[8];
x7 = pi2_out_tmp[12];
x0 = x4 + x7;
x1 = x5 + x6;
x2 = x5 - x6;
x3 = x4 - x7;
/* quantization is done in place */
i4_value = x0 + x1;
if(i == 0)
{
*pi2_dc_out = i4_value;
}
FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits,
u4_nonzero_coeff);
pi2_out_tmp[0] = i4_value;
i4_value = (x3 << 1) + x2;
FWD_QUANT(i4_value, pu2_threshold_matrix[4], pu2_scale_matrix[4], u4_round_factor, u4_qbits,
u4_nonzero_coeff);
pi2_out_tmp[4] = i4_value;
i4_value = x0 - x1;
FWD_QUANT(i4_value, pu2_threshold_matrix[8], pu2_scale_matrix[8], u4_round_factor, u4_qbits,
u4_nonzero_coeff);
pi2_out_tmp[8] = i4_value;
i4_value = x3 - (x2 << 1);
FWD_QUANT(i4_value, pu2_threshold_matrix[12], pu2_scale_matrix[12], u4_round_factor,
u4_qbits, u4_nonzero_coeff);
pi2_out_tmp[12] = i4_value;
pi2_out_tmp++;
pu2_scale_matrix++;
pu2_threshold_matrix++;
}
/* Return total nonzero coefficients in the current sub block */
*pu1_nnz = u4_nonzero_coeff;
}
/**
*******************************************************************************
*
* @brief
* This function performs forward hadamard transform and quantization on a 4*4
*block
*
* @par Description:
* The function accepts source buffer and estimation buffer. From these, it
* computes the residue. This is residue is then transformed and quantized.
* The transform and quantization are in placed computed. They use the residue
* buffer for this.
*
* @param[in] pu1_src
* Pointer to source sub-block
*
* @param[in] pu1_pred
* Pointer to prediction sub-block
*
* @param[in] pi2_out
* Pointer to residual sub-block
*
* @param[in] i4_src_stride
* Source stride
*
* @param[in] i4_pred_stride
* Prediction stride
*
* @param[in] dst_strd
* Destination stride
*
* @param[in] u4_qbits
* QP_BITS_h264_4x4 + floor(QP/6)
*
* @param[in] pu2_threshold_matrix
* Pointer to Forward Quant Threshold Matrix
*
* @param[in] pu2_scale_matrix
* Pointer to Forward Quant Scale Matrix
*
* @param[in] u4_round_factor
* Quantization Round factor
*
* @param[out] pu1_nnz
* Total non-zero coefficients in the current sub-block
*
* @returns
*
* @remarks
* None
*
*/
void isvc_hadamard_quant_4x4(WORD16 *pi2_src, WORD16 *pi2_dst,
resi_trans_quant_constants_t *ps_quant_constants, UWORD8 *pu1_nnz)
{
WORD32 i;
WORD32 x0, x1, x2, x3, x4, x5, x6, x7, i4_value;
const UWORD16 *pu2_scale_matrix = ps_quant_constants->pu2_scale_matrix;
const UWORD16 *pu2_threshold_matrix = ps_quant_constants->pu2_threshold_matrix;
UWORD32 u4_qbits = ps_quant_constants->u4_qbits;
UWORD32 u4_round_factor = ps_quant_constants->u4_round_factor;
*pu1_nnz = 0;
for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
{
x4 = pi2_src[0];
x5 = pi2_src[1];
x6 = pi2_src[2];
x7 = pi2_src[3];
x0 = x4 + x7;
x1 = x5 + x6;
x2 = x5 - x6;
x3 = x4 - x7;
pi2_dst[0] = x0 + x1;
pi2_dst[1] = x3 + x2;
pi2_dst[2] = x0 - x1;
pi2_dst[3] = x3 - x2;
pi2_src += 4;
pi2_dst += 4;
}
/* Vertical transform and quantization */
pi2_dst -= SUB_BLK_WIDTH_4x4 << 2;
for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
{
x4 = pi2_dst[0];
x5 = pi2_dst[4];
x6 = pi2_dst[8];
x7 = pi2_dst[12];
x0 = x4 + x7;
x1 = x5 + x6;
x2 = x5 - x6;
x3 = x4 - x7;
i4_value = (x0 + x1) >> 1;
FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits,
pu1_nnz[0]);
pi2_dst[0] = i4_value;
i4_value = (x3 + x2) >> 1;
FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits,
pu1_nnz[0]);
pi2_dst[4] = i4_value;
i4_value = (x0 - x1) >> 1;
FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits,
pu1_nnz[0]);
pi2_dst[8] = i4_value;
i4_value = (x3 - x2) >> 1;
FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits,
pu1_nnz[0]);
pi2_dst[12] = i4_value;
pi2_dst++;
}
}
/**
*******************************************************************************
*
* @brief
* This function performs forward hadamard transform and quantization on a 2*2
*block for both U and V planes
*
* @par Description:
* The function accepts source buffer and estimation buffer. From these, it
* computes the residue. This is residue is then transformed and quantized.
* The transform and quantization are in placed computed. They use the residue
* buffer for this.
*
* @param[in] pu1_src
* Pointer to source sub-block
*
* @param[in] pu1_pred
* Pointer to prediction sub-block
*
* @param[in] pi2_out
* Pointer to residual sub-block
*
* @param[in] i4_src_stride
* Source stride
*
* @param[in] i4_pred_stride
* Prediction stride
*
* @param[in] dst_strd
* Destination stride
*
* @param[in] u4_qbits
* QP_BITS_h264_4x4 + floor(QP/6)
*
* @param[in] pu2_threshold_matrix
* Pointer to Forward Quant Threshold Matrix
*
* @param[in] pu2_scale_matrix
* Pointer to Forward Quant Scale Matrix
*
* @param[in] u4_round_factor
* Quantization Round factor
*
* @param[out] pu1_nnz
* Total non-zero coefficients in the current sub-block
*
* @returns
*
* @remarks
* NNZ for dc is populated at 0 and 5th position of pu1_nnz
*
*/
void isvc_hadamard_quant_2x2_uv(WORD16 *pi2_src, WORD16 *pi2_dst,
resi_trans_quant_constants_t *ps_quant_constants, UWORD8 *pu1_nnz)
{
WORD32 x0, x1, x2, x3, x4, x5, x6, x7;
WORD32 i4_value, plane;
const UWORD16 *pu2_scale_matrix = ps_quant_constants->pu2_scale_matrix;
const UWORD16 *pu2_threshold_matrix = ps_quant_constants->pu2_threshold_matrix;
UWORD32 u4_qbits = ps_quant_constants->u4_qbits;
UWORD32 u4_round_factor = ps_quant_constants->u4_round_factor;
for(plane = 0; plane < 2; plane++)
{
pu1_nnz[plane] = 0;
/* Horizontal transform */
x4 = pi2_src[0];
x5 = pi2_src[1];
x6 = pi2_src[2];
x7 = pi2_src[3];
x0 = x4 + x5;
x1 = x4 - x5;
x2 = x6 + x7;
x3 = x6 - x7;
/* Vertical transform and quantization */
i4_value = (x0 + x2);
FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits,
pu1_nnz[plane]);
pi2_dst[0] = i4_value;
i4_value = (x0 - x2);
FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits,
pu1_nnz[plane]);
pi2_dst[2] = i4_value;
i4_value = (x1 - x3);
FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits,
pu1_nnz[plane]);
pi2_dst[3] = i4_value;
i4_value = (x1 + x3);
FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits,
pu1_nnz[plane]);
pi2_dst[1] = i4_value;
pi2_dst += 4;
pi2_src += 4;
}
}
/*
*******************************************************************************
*
* @brief
* This function performs Single stage forward transform CF8 and quantization
*on 8*8 blocks for h.264
*
* @par Description:
* Performs single stage 8x8 forward transform CF8 after calculating the
*residue The result is then quantized
*
* @param[in] pu1_src
* Input 8x8 pixels
*
* @param[in] pu1_pred
* Input 8x8 pixels
*
* @param[in] pi1_out
* Output 8x8 pixels
*
* @param[in] u4_thresh
* Threshold under which the coeffs are not quantized
*
* @param[in] u4_qp_div
* QP/6
*
* @param[in] u4_qp_rem
* QP%6
*
* @param[in] u2_src_stride
* Source stride
*
* @param[in] i4_pred_stride
* stride for prediciton buffer
*
* @param[in] dst_strd
* stride for destination buffer
*
* @param[in] pu4_quant_mat
* Pointer to the 4x4 quantization matrix
*
* @returns Void
*
*
*******************************************************************************
*/
void isvc_resi_trans_quant_8x8(buffer_container_t *ps_src, buffer_container_t *ps_pred,
buffer_container_t *ps_out, buffer_container_t *ps_upsampled_res,
resi_trans_quant_constants_t *ps_quant_constants, UWORD8 *pu1_nnz,
WORD16 *pi2_dc_out, UWORD8 u1_use_upsampled_res)
{
UWORD32 i;
WORD32 a0, a1, a2, a3, a4, a5, a6, a7;
WORD32 r0, r1, r2, r3, r4, r5, r6, r7;
UWORD8 *pu1_src = ps_src->pv_data;
UWORD8 *pu1_pred = ps_pred->pv_data;
WORD16 *pi2_out = ps_out->pv_data;
WORD16 *pi2_upsampled_res = ps_upsampled_res ? ps_upsampled_res->pv_data : NULL;
WORD32 i4_src_stride = ps_src->i4_data_stride;
WORD32 i4_pred_stride = ps_pred->i4_data_stride;
WORD32 i4_upsampled_res_stride = ps_upsampled_res ? ps_upsampled_res->i4_data_stride : 0;
WORD16 *pi2_out_tmp = pi2_out;
UWORD32 u4_nonzero_coeff = 0;
const UWORD16 *pu2_scale_matrix = ps_quant_constants->pu2_scale_matrix;
const UWORD16 *pu2_threshold_matrix = ps_quant_constants->pu2_threshold_matrix;
UWORD32 u4_qbits = ps_quant_constants->u4_qbits;
UWORD32 u4_round_factor = ps_quant_constants->u4_round_factor;
UNUSED(pi2_dc_out);
/*Horizontal transform */
/* we are going to use the a's and r's in a twisted way since */
/*i dont want to declare more variables */
for(i = 0; i < SUB_BLK_WIDTH_8x8; ++i)
{
r0 = pu1_src[0];
r0 -= pu1_pred[0];
r1 = pu1_src[1];
r1 -= pu1_pred[1];
r2 = pu1_src[2];
r2 -= pu1_pred[2];
r3 = pu1_src[3];
r3 -= pu1_pred[3];
r4 = pu1_src[4];
r4 -= pu1_pred[4];
r5 = pu1_src[5];
r5 -= pu1_pred[5];
r6 = pu1_src[6];
r6 -= pu1_pred[6];
r7 = pu1_src[7];
r7 -= pu1_pred[7];
if(u1_use_upsampled_res)
{
r0 = isvc_subtract_upsampled_res(r0, pi2_upsampled_res[0]);
r1 = isvc_subtract_upsampled_res(r1, pi2_upsampled_res[1]);
r2 = isvc_subtract_upsampled_res(r2, pi2_upsampled_res[2]);
r3 = isvc_subtract_upsampled_res(r3, pi2_upsampled_res[3]);
r4 = isvc_subtract_upsampled_res(r4, pi2_upsampled_res[4]);
r5 = isvc_subtract_upsampled_res(r5, pi2_upsampled_res[5]);
r6 = isvc_subtract_upsampled_res(r6, pi2_upsampled_res[6]);
r7 = isvc_subtract_upsampled_res(r7, pi2_upsampled_res[7]);
}
a0 = r0 + r7;
a1 = r1 + r6;
a2 = r2 + r5;
a3 = r3 + r4;
a4 = a0 + a3;
a5 = a1 + a2;
a6 = a0 - a3;
a7 = a1 - a2;
pi2_out_tmp[0] = a4 + a5;
pi2_out_tmp[2] = a6 + (a7 >> 1);
pi2_out_tmp[4] = a4 - a5;
pi2_out_tmp[6] = (a6 >> 1) - a7;
a0 = r0 - r7;
a1 = r1 - r6;
a2 = r2 - r5;
a3 = r3 - r4;
a4 = a1 + a2 + ((a0 >> 1) + a0);
a5 = a0 - a3 - ((a2 >> 1) + a2);
a6 = a0 + a3 - ((a1 >> 1) + a1);
a7 = a1 - a2 + ((a3 >> 1) + a3);
pi2_out_tmp[1] = a4 + (a7 >> 2);
pi2_out_tmp[3] = a5 + (a6 >> 2);
pi2_out_tmp[5] = a6 - (a5 >> 2);
pi2_out_tmp[7] = (a4 >> 2) - a7;
pu1_src += i4_src_stride;
pu1_pred += i4_pred_stride;
pi2_out_tmp += 8;
pi2_upsampled_res += i4_upsampled_res_stride;
}
/*vertical transform and quant */
pi2_out_tmp = pi2_out;
for(i = 0; i < SUB_BLK_WIDTH_8x8; ++i)
{
r0 = pi2_out_tmp[0];
r1 = pi2_out_tmp[8];
r2 = pi2_out_tmp[16];
r3 = pi2_out_tmp[24];
r4 = pi2_out_tmp[32];
r5 = pi2_out_tmp[40];
r6 = pi2_out_tmp[48];
r7 = pi2_out_tmp[56];
a0 = r0 + r7;
a1 = r1 + r6;
a2 = r2 + r5;
a3 = r3 + r4;
a4 = a0 + a3;
a5 = a1 + a2;
a6 = a0 - a3;
a7 = a1 - a2;
a0 = r0 - r7;
a1 = r1 - r6;
a2 = r2 - r5;
a3 = r3 - r4;
r0 = a4 + a5;
r2 = a6 + (a7 >> 1);
r4 = a4 - a5;
r6 = (a6 >> 1) - a7;
a4 = a1 + a2 + ((a0 >> 1) + a0);
a5 = a0 - a3 - ((a2 >> 1) + a2);
a6 = a0 + a3 - ((a1 >> 1) + a1);
a7 = a1 - a2 + ((a3 >> 1) + a3);
r1 = a4 + (a7 >> 2);
r3 = a5 + (a6 >> 2);
r5 = a6 - (a5 >> 2);
r7 = (a4 >> 2) - a7;
FWD_QUANT(r0, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits,
u4_nonzero_coeff);
pi2_out_tmp[0] = r0;
FWD_QUANT(r1, pu2_threshold_matrix[8], pu2_scale_matrix[8], u4_round_factor, u4_qbits,
u4_nonzero_coeff);
pi2_out_tmp[8] = r1;
FWD_QUANT(r2, pu2_threshold_matrix[16], pu2_scale_matrix[16], u4_round_factor, u4_qbits,
u4_nonzero_coeff);
pi2_out_tmp[16] = r2;
FWD_QUANT(r3, pu2_threshold_matrix[24], pu2_scale_matrix[24], u4_round_factor, u4_qbits,
u4_nonzero_coeff);
pi2_out_tmp[24] = r3;
FWD_QUANT(r4, pu2_threshold_matrix[32], pu2_scale_matrix[32], u4_round_factor, u4_qbits,
u4_nonzero_coeff);
pi2_out_tmp[32] = r4;
FWD_QUANT(r5, pu2_threshold_matrix[40], pu2_scale_matrix[40], u4_round_factor, u4_qbits,
u4_nonzero_coeff);
pi2_out_tmp[40] = r5;
FWD_QUANT(r6, pu2_threshold_matrix[48], pu2_scale_matrix[48], u4_round_factor, u4_qbits,
u4_nonzero_coeff);
pi2_out_tmp[48] = r6;
FWD_QUANT(r7, pu2_threshold_matrix[56], pu2_scale_matrix[56], u4_round_factor, u4_qbits,
u4_nonzero_coeff);
pi2_out_tmp[56] = r7;
pi2_out_tmp++;
pu2_scale_matrix++;
pu2_threshold_matrix++;
}
/* Return total nonzero coefficients in the current sub block */
*pu1_nnz = u4_nonzero_coeff;
}

335
common/svc/isvc_structs.h Normal file
View file

@ -0,0 +1,335 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
*******************************************************************************
* @file
* isvc_structs.h
*
* @brief
* Contains struct definition used for SVC
*
* @author
* ittiam
*
* @remarks
* None
*
*******************************************************************************
*/
#ifndef _ISVC_STRUCTS_H_
#define _ISVC_STRUCTS_H_
#include "ih264_typedefs.h"
#include "iv2.h"
#include "ih264_defs.h"
#include "ih264_structs.h"
#include "isvc_defs.h"
typedef struct buffer_container_t
{
void *pv_data;
WORD32 i4_data_stride;
} buffer_container_t;
typedef struct yuv_buf_props_t
{
buffer_container_t as_component_bufs[NUM_COMPONENTS];
IV_COLOR_FORMAT_T e_color_format;
UWORD32 u4_width;
UWORD32 u4_height;
UWORD8 u1_bit_depth;
} yuv_buf_props_t;
typedef struct nal_unit_header_t
{
UWORD8 u1_nal_ref_idc;
UWORD8 u1_nal_unit_type;
} nal_unit_header_t;
typedef struct coordinates_t
{
WORD32 i4_abscissa;
WORD32 i4_ordinate;
} coordinates_t;
typedef struct svc_au_buf_t
{
/* Array of structs that contain properties of the buffers used for storing */
yuv_buf_props_t *ps_layer_yuv_buf_props;
/* Temporal ID */
WORD8 i1_temporal_id;
/* Num Spatial Layers */
UWORD8 u1_num_spatial_layers;
/* Resolution ration b/w spatial layers */
DOUBLE d_spatial_res_ratio;
/* absolute value of POC */
WORD32 i4_abs_poc;
/* POC % MaxPicOrderCntLSB */
WORD32 i4_poc_lsb;
/* Lower 32 bits of time stamp */
UWORD32 u4_timestamp_low;
/* Higher 32 bits of time stamp */
UWORD32 u4_timestamp_high;
/* Is Pic used as refPic for future frames? */
WORD32 i4_used_as_ref;
/* frame_num in the slice header */
WORD32 i4_frame_num;
/*
* 0: Top Field
* 1: Bottom Field
*/
WORD8 i1_field_type;
/* buffer ID from frame buffer manager */
WORD32 i4_buf_id;
} svc_au_buf_t;
typedef struct svc_nalu_ext_t
{
nal_unit_header_t s_nalu_header;
/* idr_flag */
UWORD8 u1_idr_flag;
/* priority_id (Range = [0, 63]) */
UWORD8 u1_priority_id;
/* no_inter_layer_pred_flag */
UWORD8 u1_no_inter_layer_pred_flag;
/* dependency_id (Range = [0, 7]) */
UWORD8 u1_dependency_id;
/* quality_id (Range = [0, 15]) */
UWORD8 u1_quality_id;
/* temporal_id (Range = [0, 7]) */
UWORD8 u1_temporal_id;
/* use_ref_base_pic_flag */
UWORD8 u1_use_ref_base_pic_flag;
/* discardable_flag */
UWORD8 u1_discardable_flag;
/* output_flag */
UWORD8 u1_output_flag;
/* reserved_three_2bits */
UWORD8 u1_reserved_three_2bits;
} svc_nalu_ext_t;
typedef struct svc_vui_ext_t
{
/* specifies the maximum layers in the SVC bitstream */
UWORD32 u4_vui_ext_num_entries_minus1;
/* specifies the dependency ID for each layer */
UWORD8 u1_vui_ext_dependency_id[MAX_VUI_EXT_NUM_ENTRIES];
/* specifies the quality ID for each layer */
UWORD8 u1_vui_ext_quality_id[MAX_VUI_EXT_NUM_ENTRIES];
/* specifies the temporal ID for each layer */
UWORD8 u1_vui_ext_temporal_id[MAX_VUI_EXT_NUM_ENTRIES];
/* specifies the timing_info_present_flag value of the i-th sub-bitstream */
UWORD8 u1_vui_ext_timing_info_present_flag[MAX_VUI_EXT_NUM_ENTRIES];
/* specifies the num_units_in_tick value of the i-th sub-bitstream */
UWORD32 u4_vui_ext_num_units_in_tick[MAX_VUI_EXT_NUM_ENTRIES];
/* specifies the time_scale value of the i-th sub-bitstream */
UWORD32 u4_vui_ext_time_scale[MAX_VUI_EXT_NUM_ENTRIES];
/* specifies the fixed_frame_rate_flag value of the i-th sub-bitstream */
UWORD8 u1_vui_ext_fixed_frame_rate_flag[MAX_VUI_EXT_NUM_ENTRIES];
/* specifies the nal_hrd_parameters_present_flag value of the i-th */
UWORD8 u1_vui_ext_nal_hrd_params_present_flag[MAX_VUI_EXT_NUM_ENTRIES];
/* specifies the vcl_hrd_parameters_present_flag value of the i-th */
UWORD8 u1_vui_ext_vcl_hrd_params_present_flag[MAX_VUI_EXT_NUM_ENTRIES];
/* specifies the low_delay_hrd_flag value of the i-th sub-bitstream */
UWORD8 u1_vui_ext_low_delay_hrd_flag[MAX_VUI_EXT_NUM_ENTRIES];
/* specifies the pic_struct_present_flag value of the i-th sub-bitstream */
UWORD8 u1_vui_ext_pic_struct_present_flag[MAX_VUI_EXT_NUM_ENTRIES];
} svc_vui_ext_t;
typedef struct sps_svc_ext_t
{
/* inter_layer_deblocking_filter_control_present_flag */
UWORD8 u1_inter_layer_deblocking_filter_control_present_flag;
/* extended_spatial_scalability_idc */
UWORD8 u1_extended_spatial_scalability_idc;
/* chroma_phase_x_plus1_flag */
UWORD8 u1_chroma_phase_x_plus1;
/* chroma_phase_y_plus1 */
UWORD8 u1_chroma_phase_y_plus1;
/* seq_ref_layer_chroma_phase_x_plus1_flag */
UWORD8 u1_seq_ref_layer_chroma_phase_x_plus1_flag;
/* seq_ref_layer_chroma_phase_y_plus1 */
UWORD8 u1_seq_ref_layer_chroma_phase_y_plus1;
/* seq_scaled_ref_layer_left_offset */
WORD32 i4_seq_scaled_ref_layer_left_offset;
/* seq_scaled_ref_layer_top_offset */
WORD32 i4_seq_scaled_ref_layer_top_offset;
/* seq_scaled_ref_layer_right_offset */
WORD32 i4_seq_scaled_ref_layer_right_offset;
/* seq_scaled_ref_layer_bottom_offset */
WORD32 i4_seq_scaled_ref_layer_bottom_offset;
/* seq_tcoeff_level_prediction_flag */
WORD8 i1_seq_tcoeff_level_prediction_flag;
/* adaptive_tcoeff_level_prediction_flag */
WORD8 i1_adaptive_tcoeff_level_prediction_flag;
/* slice_header_restriction_flag */
WORD8 i1_slice_header_restriction_flag;
} sps_svc_ext_t;
typedef struct subset_sps_t
{
/* SPS structure */
sps_t s_sps;
/* Structure containing flags specific to SVC SPS */
sps_svc_ext_t s_sps_svc_ext;
/* svc_vui_parameters_present_flag */
WORD8 i1_svc_vui_parameters_present_flag;
svc_vui_ext_t s_svc_vui;
/* additional_extension2_data_flag */
WORD8 i1_additional_extension2_flag;
} subset_sps_t;
typedef struct svc_slice_header_t
{
/* ref_layer_dq_id */
UWORD32 u4_ref_layer_dq_id;
/* disable_inter_layer_deblocking_filter_idc */
UWORD32 u4_disable_inter_layer_deblocking_filter_idc;
/* inter_layer_slice_alpha_c0_offset_div2 */
WORD32 i4_inter_layer_slice_alpha_c0_offset_div2;
/* inter_layer_slice_beta_offset_div2 */
WORD32 i4_inter_layer_slice_beta_offset_div2;
/* constrained_intra_resampling_flag */
WORD8 i1_constrained_intra_resampling_flag;
/* ref_layer_chroma_phase_x_plus1_flag */
WORD8 i1_ref_layer_chroma_phase_x_plus1_flag;
/* ref_layer_chroma_phase_y_plus1 */
WORD8 i1_ref_layer_chroma_phase_y_plus1;
/* scaled_ref_layer_left_offset */
WORD32 i4_scaled_ref_layer_left;
/* scaled_ref_layer_top_offset */
WORD32 i4_scaled_ref_layer_top;
/* scaled_ref_layer_right_offset */
WORD32 i4_scaled_ref_layer_right;
/* scaled_ref_layer_bottom_offset */
WORD32 i4_scaled_ref_layer_bottom;
/* slice_skip_flag */
WORD8 i1_slice_skip_flag;
/* num_mbs_in_slice_minus1 */
UWORD32 u4_num_mbs_in_slice_minus1;
/* adaptive_base_mode_flag */
WORD8 i1_adaptive_base_mode_flag;
/* default_base_mode_flag */
WORD8 i1_default_base_mode_flag;
/* adaptive_motion_prediction_flag */
WORD8 i1_adaptive_motion_prediction_flag;
/* default_motion_prediction_flag */
WORD8 i1_default_motion_prediction_flag;
/* adaptive_residual_prediction_flag */
WORD8 i1_adaptive_residual_prediction_flag;
/* default_residual_prediction_flag */
WORD8 i1_default_residual_prediction_flag;
/* tcoeff_level_prediction_flag */
WORD8 i1_tcoeff_level_prediction_flag;
/* scan_idx_start */
UWORD32 u4_scan_idx_start;
/* scan_idx_end */
UWORD32 u4_scan_idx_end;
WORD32 i4_store_ref_base_pic_flag;
slice_header_t s_slice_header;
} svc_slice_header_t;
#endif

View file

@ -0,0 +1,253 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
*******************************************************************************
* @file
* isvc_trans_quant.h
*
* @brief
* Contains declarations for forward and inverse transform paths for H264
*
* @author
* Ittiam
*
* @remarks
*
*******************************************************************************
*/
#ifndef _ISVC_TRANS_QUANT_ITRANS_IQUANT_H_
#define _ISVC_TRANS_QUANT_ITRANS_IQUANT_H_
#include <stdint.h>
#include "ih264_typedefs.h"
#include "ih264_debug.h"
#include "ih264_macros.h"
#include "isvc_macros.h"
#include "isvc_structs.h"
/* With and without residual_pred use */
#define NUM_RESI_TRANS_QUANT_VARIANTS 2
#define NUM_IQ_IT_RECON_VARIANTS 3
/* Structs */
typedef struct resi_trans_quant_constants_t
{
const UWORD16 *pu2_scale_matrix;
const UWORD16 *pu2_threshold_matrix;
UWORD32 u4_qbits;
UWORD32 u4_round_factor;
} resi_trans_quant_constants_t;
typedef struct iq_it_res_rec_constants_t
{
const UWORD16 *pu2_iscal_mat;
const UWORD16 *pu2_weigh_mat;
UWORD32 u4_qp_div_6;
} iq_it_res_rec_constants_t;
/* Typedefs */
typedef void FT_RESI_TRANS_DCTRANS_QUANT(UWORD8 *pu1_src, UWORD8 *pu1_pred, WORD16 *pi2_out,
WORD32 src_strd, WORD32 pred_strd, WORD32 dst_strd,
const UWORD16 *pu2_scale_mat,
const UWORD16 *pu2_thresh_mat, UWORD32 u4_qbit,
UWORD32 u4_round_fact, UWORD8 *pu1_nnz);
typedef void FT_IDCTRANS_IQUANT_ITRANS_RECON(WORD16 *pi2_src, UWORD8 *pu1_pred, UWORD8 *pu1_out,
WORD32 src_strd, WORD32 pred_strd, WORD32 out_strd,
const UWORD16 *pu2_iscale_mat,
const UWORD16 *pu2_weigh_mat, UWORD32 qp_div,
UWORD32 pi4_cntrl, WORD32 *pi4_tmp);
typedef void FT_RESI_TRANS_QUANT(buffer_container_t *ps_src, buffer_container_t *ps_pred,
buffer_container_t *ps_out, buffer_container_t *ps_upsampled_res,
resi_trans_quant_constants_t *ps_quant_constants, UWORD8 *pu1_nnz,
WORD16 *pi2_dc_out, UWORD8 u1_use_upsampled_res);
typedef void FT_LUMA_16X16_RESI_TRANS_DCTRANS_QUANT(
UWORD8 *pu1_src, UWORD8 *pu1_pred, WORD16 *pi2_out, WORD32 src_strd, WORD32 pred_strd,
WORD32 dst_strd, const UWORD16 *pu2_scale_matrix, const UWORD16 *pu2_threshold_matrix,
UWORD32 u4_qbits, UWORD32 u4_round_factor, UWORD8 *pu1_nnz, UWORD32 u4_dc_flag);
typedef void FT_CHROMA_8X8_RESI_TRANS_DCTRANS_QUANT(
UWORD8 *pu1_src, UWORD8 *pu1_pred, WORD16 *pi2_out, WORD32 src_strd, WORD32 pred_strd,
WORD32 dst_strd, const UWORD16 *pu2_scale_matrix, const UWORD16 *pu2_threshold_matrix,
UWORD32 u4_qbits, UWORD32 u4_round_factor, UWORD8 *pu1_nnz);
typedef void FT_IQ_IT_RECON(buffer_container_t *ps_src, buffer_container_t *ps_pred,
buffer_container_t *ps_res_pred, buffer_container_t *ps_res,
buffer_container_t *ps_rec,
iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants, WORD16 *pi2_tmp,
WORD16 *pi2_dc_src, WORD32 i4_iq_start_idx, UWORD8 u1_res_accumulate);
typedef void FT_LUMA_16X16_IDCTRANS_IQUANT_ITRANS_RECON(
WORD16 *pi2_src, UWORD8 *pu1_pred, UWORD8 *pu1_out, WORD32 src_strd, WORD32 pred_strd,
WORD32 out_strd, const UWORD16 *pu2_iscale_mat, const UWORD16 *pu2_weigh_mat, UWORD32 qp_div,
UWORD32 pi4_cntrl, UWORD32 u4_dc_trans_flag, WORD32 *pi4_tmp);
typedef void FT_CHROMA_8X8_IDCTRANS_IQUANT_ITRANS_RECON(
WORD16 *pi2_src, UWORD8 *pu1_pred, UWORD8 *pu1_out, WORD32 src_strd, WORD32 pred_strd,
WORD32 out_strd, const UWORD16 *pu2_iscale_mat, const UWORD16 *pu2_weigh_mat, UWORD32 qp_div,
UWORD32 pi4_cntrl, WORD32 *pi4_tmp);
typedef void FT_IHADAMARD_SCALING(WORD16 *pi2_src, WORD16 *pi2_out, const UWORD16 *pu2_iscal_mat,
const UWORD16 *pu2_weigh_mat, UWORD32 u4_qp_div_6,
WORD32 *pi4_tmp);
typedef void FT_HADAMARD_QUANT(WORD16 *pi2_src, WORD16 *pi2_dst,
resi_trans_quant_constants_t *ps_quant_constants, UWORD8 *pu1_nnz);
/*****************************************************************************/
/* Extern Function Declarations */
/*****************************************************************************/
extern FT_RESI_TRANS_QUANT isvc_resi_trans_quant_4x4;
extern FT_RESI_TRANS_QUANT isvc_resi_trans_quant_chroma_4x4;
extern FT_RESI_TRANS_QUANT isvc_resi_trans_quant_8x8;
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_4x4;
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_8x8;
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_4x4_dc;
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_chroma_4x4;
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_chroma_4x4_dc;
extern FT_IQ_IT_RECON isvc_zcbf_iquant_itrans_recon_4x4;
extern FT_IQ_IT_RECON isvc_chroma_zcbf_iquant_itrans_recon_4x4;
extern FT_IHADAMARD_SCALING ih264_ihadamard_scaling_4x4;
extern FT_IHADAMARD_SCALING ih264_ihadamard_scaling_2x2_uv;
extern FT_HADAMARD_QUANT isvc_hadamard_quant_4x4;
extern FT_HADAMARD_QUANT isvc_hadamard_quant_2x2_uv;
/* A9 Declarations */
extern FT_RESI_TRANS_QUANT isvc_resi_trans_quant_4x4_a9;
extern FT_RESI_TRANS_QUANT isvc_resi_trans_quant_chroma_4x4_a9;
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_4x4_a9;
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_8x8_a9;
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_4x4_dc_a9;
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_8x8_dc_a9;
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_chroma_4x4_a9;
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_chroma_4x4_dc_a9;
extern FT_LUMA_16X16_RESI_TRANS_DCTRANS_QUANT isvc_luma_16x16_resi_trans_dctrans_quant_a9;
extern FT_CHROMA_8X8_RESI_TRANS_DCTRANS_QUANT isvc_chroma_8x8_resi_trans_dctrans_quant_a9;
extern FT_LUMA_16X16_IDCTRANS_IQUANT_ITRANS_RECON isvc_luma_16x16_idctrans_iquant_itrans_recon_a9;
extern FT_CHROMA_8X8_IDCTRANS_IQUANT_ITRANS_RECON isvc_chroma_8x8_idctrans_iquant_itrans_recon_a9;
extern FT_IHADAMARD_SCALING ih264_ihadamard_scaling_4x4_a9;
extern FT_IHADAMARD_SCALING ih264_ihadamard_scaling_2x2_uv_a9;
extern FT_HADAMARD_QUANT isvc_hadamard_quant_4x4_a9;
extern FT_HADAMARD_QUANT isvc_hadamard_quant_2x2_uv_a9;
/* Av8 Declarations */
extern FT_RESI_TRANS_QUANT isvc_resi_trans_quant_4x4_av8;
extern FT_RESI_TRANS_QUANT isvc_resi_trans_quant_chroma_4x4_av8;
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_4x4_av8;
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_8x8_av8;
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_4x4_dc_av8;
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_8x8_dc_av8;
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_chroma_4x4_av8;
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_chroma_4x4_dc_av8;
extern FT_IHADAMARD_SCALING ih264_ihadamard_scaling_4x4_av8;
extern FT_IHADAMARD_SCALING ih264_ihadamard_scaling_2x2_uv_av8;
/* NEON Declarations */
extern FT_RESI_TRANS_QUANT isvc_resi_trans_quant_4x4_neon;
extern FT_RESI_TRANS_QUANT isvc_resi_trans_quant_4x4_with_residual_sub_neon;
extern FT_RESI_TRANS_QUANT isvc_resi_trans_quant_chroma_4x4_neon;
extern FT_RESI_TRANS_QUANT isvc_resi_trans_quant_chroma_4x4_with_residual_sub_neon;
/* SSSE3 Declarations */
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_4x4_ssse3;
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_8x8_ssse3;
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_4x4_dc_ssse3;
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_8x8_dc_ssse3;
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_chroma_4x4_dc_ssse3;
extern FT_IHADAMARD_SCALING ih264_ihadamard_scaling_4x4_ssse3;
extern FT_IHADAMARD_SCALING ih264_ihadamard_scaling_2x2_uv_ssse3;
/* SSSE42 Declarations */
extern FT_RESI_TRANS_QUANT isvc_resi_trans_quant_4x4_sse42;
extern FT_RESI_TRANS_QUANT isvc_resi_trans_quant_4x4_with_res_pred_sse42;
extern FT_RESI_TRANS_QUANT isvc_resi_trans_quant_chroma_4x4_sse42;
extern FT_RESI_TRANS_QUANT isvc_resi_trans_quant_chroma_4x4_with_res_pred_sse42;
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_4x4_sse42;
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_res_4x4_sse42;
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_res_4x4_with_res_acc_sse42;
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_chroma_4x4_sse42;
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_res_chroma_4x4_sse42;
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_res_chroma_4x4_with_res_acc_sse42;
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_dc_4x4_sse42;
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_res_dc_4x4_sse42;
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_res_dc_with_res_acc_4x4_sse42;
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_chroma_4x4_dc_sse42;
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_res_chroma_4x4_dc_sse42;
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_res_chroma_4x4_dc_with_res_acc_sse42;
extern FT_IHADAMARD_SCALING ih264_ihadamard_scaling_4x4_sse42;
extern FT_HADAMARD_QUANT isvc_hadamard_quant_4x4_sse42;
extern FT_HADAMARD_QUANT isvc_hadamard_quant_2x2_uv_sse42;
/* NEON Declarations */
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_4x4_neon;
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_4x4_with_res_output_neon;
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_4x4_with_res_accumulate_neon;
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_chroma_4x4_neon;
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_chroma_4x4_with_res_output_neon;
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_chroma_4x4_with_res_accumulate_neon;
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_4x4_dc_neon;
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_4x4_dc_with_res_output_neon;
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_4x4_dc_with_res_accumulate_neon;
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_chroma_4x4_dc_neon;
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_chroma_4x4_dc_with_res_output_neon;
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_chroma_4x4_dc_with_res_accumulate_neon;
static FORCEINLINE UWORD8 isvc_get_resi_trans_quant_variant_idx(UWORD8 u1_use_upsampled_res)
{
return u1_use_upsampled_res;
}
static FORCEINLINE UWORD8 isvc_get_iq_it_recon_variant_idx(UWORD8 u1_is_intra,
UWORD8 u1_res_accumulate)
{
ASSERT(!((1 == u1_is_intra) && (1 == u1_res_accumulate)));
return u1_is_intra * 2 + u1_res_accumulate;
}
static FORCEINLINE WORD16 isvc_get_residue(WORD16 i2_it_out, WORD16 i2_res_pred,
UWORD8 u1_res_accumulate)
{
return (u1_res_accumulate
? (CLIP3(-((WORD16) UINT8_MAX), ((WORD16) UINT8_MAX), i2_it_out + i2_res_pred))
: (CLIP3(-((WORD16) UINT8_MAX), ((WORD16) UINT8_MAX), i2_it_out)));
}
#endif

39
common/svccommon.cmake Normal file
View file

@ -0,0 +1,39 @@
# src files
list(
APPEND
LIBAVC_COMMON_SRCS
"${AVC_ROOT}/common/svc/isvc_common_tables.c"
"${AVC_ROOT}/common/svc/isvc_cabac_tables.c"
"${AVC_ROOT}/common/svc/isvc_intra_resample.c"
"${AVC_ROOT}/common/svc/isvc_iquant_itrans_recon.c"
"${AVC_ROOT}/common/svc/isvc_mem_fns.c"
"${AVC_ROOT}/common/svc/isvc_resi_trans_quant.c")
include_directories(${AVC_ROOT}/common/svc)
# arm/x86 sources
if("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "aarch64" OR
"${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "aarch32")
list(
APPEND
LIBAVC_COMMON_ASMS
"${AVC_ROOT}/common/arm/svc/isvc_intra_sampling_neon.c"
"${AVC_ROOT}/common/arm/svc/isvc_iquant_itrans_recon_neon.c"
"${AVC_ROOT}/common/arm/svc/isvc_mem_fns_neon.c"
"${AVC_ROOT}/common/arm/svc/isvc_resi_trans_quant_neon.c")
include_directories(${AVC_ROOT}/common/arm/svc)
else()
list(
APPEND
LIBAVC_COMMON_SRCS
"${AVC_ROOT}/common/x86/svc/isvc_iquant_itrans_recon_dc_ssse3.c"
"${AVC_ROOT}/common/x86/svc/isvc_iquant_itrans_recon_sse42.c"
"${AVC_ROOT}/common/x86/svc/isvc_iquant_itrans_recon_ssse3.c"
"${AVC_ROOT}/common/x86/svc/isvc_mem_fns_sse42.c"
"${AVC_ROOT}/common/x86/svc/isvc_mem_fns_ssse3.c"
"${AVC_ROOT}/common/x86/svc/isvc_padding_ssse3.c"
"${AVC_ROOT}/common/x86/svc/isvc_resi_trans_quant_sse42.c"
"${AVC_ROOT}/common/x86/svc/isvc_intra_resample_sse42.c")
include_directories(${AVC_ROOT}/common/x86/svc)
endif()

View file

@ -0,0 +1,658 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/*!
**************************************************************************
* * \file ih264d_resamp_svc.c
*
* \brief
* Contains routines that
* resample for SVC resampling
*
* Detailed_description
*
* \date
*
*
*
* \author
* **************************************************************************
*/
#include <immintrin.h>
#include "ih264_typedefs.h"
#include "ih264_debug.h"
#include "isvc_intra_resample.h"
void isvc_interpolate_base_luma_dyadic_sse42(UWORD8 *pu1_inp_buf, WORD16 *pi2_tmp_filt_buf,
UWORD8 *pu1_out_buf, WORD32 i4_out_stride)
{
WORD32 i4_y;
WORD32 i4_filt_stride, i4_src_stride;
UWORD8 *pu1_inp, *pu1_out;
WORD16 *pi2_tmp;
__m128i i4_samp_16x8b_0, i4_samp_16x8b_1, i4_samp_16x8b_2, i4_samp_16x8b_3;
__m128i i4_samp_8x16b_0, i4_samp_8x16b_1, i4_samp_8x16b_2, i4_samp_8x16b_3;
__m128i i4_res_8x16b_r1_1, i4_res_8x16b_r1_2, i4_res_8x16b_r1_3;
__m128i i4_res_8x16b_r2_1, i4_res_8x16b_r2_2, i4_res_8x16b_r2_3;
/* Filter coefficient values for phase 4 */
__m128i i4_coeff_8x16b_0 = _mm_set1_epi16(-3);
__m128i i4_coeff_8x16b_1 = _mm_set1_epi16(28);
i4_filt_stride = 12;
i4_src_stride = DYADIC_REF_W_Y;
/* Initializing pointers */
pu1_inp = pu1_inp_buf;
pi2_tmp = pi2_tmp_filt_buf;
pu1_out = pu1_out_buf;
/* Vertical interpolation */
/*First 64 bit */
/* y = 0, y_phase = 12 */
i4_samp_16x8b_0 = _mm_loadl_epi64((__m128i *) (pu1_inp));
i4_samp_16x8b_1 = _mm_loadl_epi64((__m128i *) (pu1_inp + i4_src_stride));
i4_samp_16x8b_2 = _mm_loadl_epi64((__m128i *) (pu1_inp + (i4_src_stride << 1)));
i4_samp_16x8b_3 = _mm_loadl_epi64((__m128i *) (pu1_inp + (i4_src_stride << 1) + i4_src_stride));
pu1_inp += (i4_src_stride << 2);
i4_samp_8x16b_0 = _mm_cvtepu8_epi16(i4_samp_16x8b_0);
i4_samp_8x16b_1 = _mm_cvtepu8_epi16(i4_samp_16x8b_1);
i4_samp_8x16b_2 = _mm_cvtepu8_epi16(i4_samp_16x8b_2);
i4_samp_8x16b_3 = _mm_cvtepu8_epi16(i4_samp_16x8b_3);
/* since y_phase 12 for y = 0 */
/*Multiply by 8 => left shift by 3*/
i4_res_8x16b_r1_1 = _mm_slli_epi16(i4_samp_8x16b_1, 3);
i4_res_8x16b_r1_2 = _mm_mullo_epi16(i4_samp_8x16b_2, i4_coeff_8x16b_1);
i4_res_8x16b_r1_3 = _mm_mullo_epi16(i4_samp_8x16b_3, i4_coeff_8x16b_0);
i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_2);
i4_res_8x16b_r1_3 = _mm_subs_epi16(i4_res_8x16b_r1_3, i4_samp_8x16b_0);
i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_3);
_mm_storeu_si128((__m128i *) pi2_tmp, i4_res_8x16b_r1_1);
pi2_tmp += i4_filt_stride;
for(i4_y = 1; i4_y < 15; i4_y += 2)
{
i4_samp_8x16b_0 = i4_samp_8x16b_1;
i4_samp_8x16b_1 = i4_samp_8x16b_2;
i4_samp_8x16b_2 = i4_samp_8x16b_3;
i4_samp_8x16b_3 = _mm_cvtepu8_epi16(_mm_loadl_epi64((__m128i *) (pu1_inp)));
/* y_phase is 4 for odd values of y */
/* and 12 for even values of y */
//*Multiply by 8 => left shift by 3*/
i4_res_8x16b_r1_1 = _mm_mullo_epi16(i4_samp_8x16b_0, i4_coeff_8x16b_0);
i4_res_8x16b_r1_2 = _mm_mullo_epi16(i4_samp_8x16b_1, i4_coeff_8x16b_1);
i4_res_8x16b_r1_3 = _mm_slli_epi16(i4_samp_8x16b_2, 3);
i4_res_8x16b_r2_1 = _mm_slli_epi16(i4_samp_8x16b_1, 3);
i4_res_8x16b_r2_2 = _mm_mullo_epi16(i4_samp_8x16b_2, i4_coeff_8x16b_1);
i4_res_8x16b_r2_3 = _mm_mullo_epi16(i4_samp_8x16b_3, i4_coeff_8x16b_0);
i4_res_8x16b_r1_3 = _mm_subs_epi16(i4_res_8x16b_r1_3, i4_samp_8x16b_3);
i4_res_8x16b_r2_3 = _mm_subs_epi16(i4_res_8x16b_r2_3, i4_samp_8x16b_0);
i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_2);
i4_res_8x16b_r2_1 = _mm_adds_epi16(i4_res_8x16b_r2_1, i4_res_8x16b_r2_2);
i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_3);
i4_res_8x16b_r2_1 = _mm_adds_epi16(i4_res_8x16b_r2_1, i4_res_8x16b_r2_3);
/* Storing the results */
_mm_storeu_si128((__m128i *) pi2_tmp, i4_res_8x16b_r1_1);
_mm_storeu_si128((__m128i *) (pi2_tmp + i4_filt_stride), i4_res_8x16b_r2_1);
pi2_tmp += (i4_filt_stride << 1);
pu1_inp += i4_src_stride;
} /* End of loop over y */
/* y = 15, y_phase = 4 */
i4_samp_8x16b_0 = i4_samp_8x16b_1;
i4_samp_8x16b_1 = i4_samp_8x16b_2;
i4_samp_8x16b_2 = i4_samp_8x16b_3;
i4_samp_8x16b_3 = _mm_cvtepu8_epi16(_mm_loadl_epi64((__m128i *) (pu1_inp)));
i4_res_8x16b_r1_1 = _mm_mullo_epi16(i4_samp_8x16b_0, i4_coeff_8x16b_0);
i4_res_8x16b_r1_2 = _mm_mullo_epi16(i4_samp_8x16b_1, i4_coeff_8x16b_1);
i4_res_8x16b_r1_3 = _mm_slli_epi16(i4_samp_8x16b_2, 3);
i4_res_8x16b_r1_3 = _mm_subs_epi16(i4_res_8x16b_r1_3, i4_samp_8x16b_3);
i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_2);
i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_3);
/* Store the output */
_mm_storeu_si128((__m128i *) pi2_tmp, i4_res_8x16b_r1_1);
/* Reinitializing the ptrs */
pu1_inp = pu1_inp_buf;
pi2_tmp = pi2_tmp_filt_buf;
/*Remaining 32 bit */
pu1_inp += 8;
pi2_tmp += 8;
/* y = 0, y_phase = 12 */
i4_samp_16x8b_0 = _mm_loadl_epi64((__m128i *) (pu1_inp));
i4_samp_16x8b_1 = _mm_loadl_epi64((__m128i *) (pu1_inp + i4_src_stride));
i4_samp_16x8b_2 = _mm_loadl_epi64((__m128i *) (pu1_inp + (i4_src_stride << 1)));
i4_samp_16x8b_3 =
_mm_loadl_epi64((__m128i *) (pu1_inp + (i4_src_stride << 1) + i4_src_stride));
pu1_inp += (i4_src_stride << 2);
i4_samp_8x16b_0 = _mm_cvtepu8_epi16(i4_samp_16x8b_0);
i4_samp_8x16b_1 = _mm_cvtepu8_epi16(i4_samp_16x8b_1);
i4_samp_8x16b_2 = _mm_cvtepu8_epi16(i4_samp_16x8b_2);
i4_samp_8x16b_3 = _mm_cvtepu8_epi16(i4_samp_16x8b_3);
/* since y_phase 12 for y = 0 */
/*Multiply by 8 => left shift by 3*/
i4_res_8x16b_r1_1 = _mm_slli_epi16(i4_samp_8x16b_1, 3);
i4_res_8x16b_r1_2 = _mm_mullo_epi16(i4_samp_8x16b_2, i4_coeff_8x16b_1);
i4_res_8x16b_r1_3 = _mm_mullo_epi16(i4_samp_8x16b_3, i4_coeff_8x16b_0);
i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_2);
i4_res_8x16b_r1_3 = _mm_subs_epi16(i4_res_8x16b_r1_3, i4_samp_8x16b_0);
i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_3);
_mm_storel_epi64((__m128i *) pi2_tmp, i4_res_8x16b_r1_1);
pi2_tmp += i4_filt_stride;
for(i4_y = 1; i4_y < 15; i4_y += 2)
{
i4_samp_8x16b_0 = i4_samp_8x16b_1;
i4_samp_8x16b_1 = i4_samp_8x16b_2;
i4_samp_8x16b_2 = i4_samp_8x16b_3;
i4_samp_8x16b_3 = _mm_cvtepu8_epi16(_mm_loadl_epi64((__m128i *) (pu1_inp)));
/* y_phase is 4 for odd values of y */
/* and 12 for even values of y */
//*Multiply by 8 => left shift by 3*/
i4_res_8x16b_r1_1 = _mm_mullo_epi16(i4_samp_8x16b_0, i4_coeff_8x16b_0);
i4_res_8x16b_r1_2 = _mm_mullo_epi16(i4_samp_8x16b_1, i4_coeff_8x16b_1);
i4_res_8x16b_r1_3 = _mm_slli_epi16(i4_samp_8x16b_2, 3);
i4_res_8x16b_r2_1 = _mm_slli_epi16(i4_samp_8x16b_1, 3);
i4_res_8x16b_r2_2 = _mm_mullo_epi16(i4_samp_8x16b_2, i4_coeff_8x16b_1);
i4_res_8x16b_r2_3 = _mm_mullo_epi16(i4_samp_8x16b_3, i4_coeff_8x16b_0);
i4_res_8x16b_r1_3 = _mm_subs_epi16(i4_res_8x16b_r1_3, i4_samp_8x16b_3);
i4_res_8x16b_r2_3 = _mm_subs_epi16(i4_res_8x16b_r2_3, i4_samp_8x16b_0);
i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_2);
i4_res_8x16b_r2_1 = _mm_adds_epi16(i4_res_8x16b_r2_1, i4_res_8x16b_r2_2);
i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_3);
i4_res_8x16b_r2_1 = _mm_adds_epi16(i4_res_8x16b_r2_1, i4_res_8x16b_r2_3);
/* Storing the results */
_mm_storel_epi64((__m128i *) pi2_tmp, i4_res_8x16b_r1_1);
_mm_storel_epi64((__m128i *) (pi2_tmp + i4_filt_stride), i4_res_8x16b_r2_1);
pi2_tmp += (i4_filt_stride << 1);
pu1_inp += i4_src_stride;
} /* End of loop over y */
/* y = 15, y_phase = 4 */
i4_samp_8x16b_0 = i4_samp_8x16b_1;
i4_samp_8x16b_1 = i4_samp_8x16b_2;
i4_samp_8x16b_2 = i4_samp_8x16b_3;
i4_samp_8x16b_3 = _mm_cvtepu8_epi16(_mm_loadl_epi64((__m128i *) (pu1_inp)));
i4_res_8x16b_r1_1 = _mm_mullo_epi16(i4_samp_8x16b_0, i4_coeff_8x16b_0);
i4_res_8x16b_r1_2 = _mm_mullo_epi16(i4_samp_8x16b_1, i4_coeff_8x16b_1);
i4_res_8x16b_r1_3 = _mm_slli_epi16(i4_samp_8x16b_2, 3);
i4_res_8x16b_r1_3 = _mm_subs_epi16(i4_res_8x16b_r1_3, i4_samp_8x16b_3);
i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_2);
i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_3);
/* Store the output */
_mm_storel_epi64((__m128i *) pi2_tmp, i4_res_8x16b_r1_1);
/* Reinitializing the ptrs */
pu1_inp = pu1_inp_buf;
pi2_tmp = pi2_tmp_filt_buf;
{
__m128i coeff_c0_c1_8x16b = _mm_set_epi16(28, -3, 28, -3, 28, -3, 28, -3);
__m128i coeff_c2_c3_8x16b = _mm_set_epi16(-1, 8, -1, 8, -1, 8, -1, 8);
__m128i coeff_c3_c2_8x16b = _mm_set_epi16(8, -1, 8, -1, 8, -1, 8, -1);
__m128i coeff_c1_c0_8x16b = _mm_set_epi16(-3, 28, -3, 28, -3, 28, -3, 28);
__m128i i4_samp_8x16b_rpart1_0, i4_samp_8x16b_rpart2_0;
__m128i i4_samp_8x16b_rpart1_1, i4_samp_8x16b_rpart2_1;
__m128i i4_samp_8x16b_rpart1_2, i4_samp_8x16b_rpart2_2;
__m128i i4_samp_8x16b_rpart1_3, i4_samp_8x16b_rpart2_3;
__m128i i4_samp_8x16b_rpart1_4, i4_samp_8x16b_rpart2_4;
__m128i i4_res_4x32b_rpart1_0, i4_res_4x32b_rpart2_0;
__m128i i4_res_4x32b_rpart1_1, i4_res_4x32b_rpart2_1;
__m128i i4_res_4x32b_rpart1_2, i4_res_4x32b_rpart2_2;
__m128i i4_res_4x32b_rpart1_3, i4_res_4x32b_rpart2_3;
__m128i res_512 = _mm_set1_epi32(512);
/* Horizontal interpolation */
for(i4_y = 0; i4_y < 16; i4_y++)
{
i4_samp_8x16b_rpart1_0 = _mm_loadu_si128((__m128i *) pi2_tmp);
i4_samp_8x16b_rpart2_0 = _mm_loadu_si128((__m128i *) (pi2_tmp + 4));
i4_samp_8x16b_rpart1_1 = _mm_srli_si128(i4_samp_8x16b_rpart1_0, 2);
i4_samp_8x16b_rpart1_2 = _mm_srli_si128(i4_samp_8x16b_rpart1_0, 4);
i4_samp_8x16b_rpart1_3 = _mm_srli_si128(i4_samp_8x16b_rpart1_0, 6);
i4_samp_8x16b_rpart1_4 = _mm_srli_si128(i4_samp_8x16b_rpart1_0, 8);
i4_samp_8x16b_rpart2_1 = _mm_srli_si128(i4_samp_8x16b_rpart2_0, 2);
i4_samp_8x16b_rpart2_2 = _mm_srli_si128(i4_samp_8x16b_rpart2_0, 4);
i4_samp_8x16b_rpart2_3 = _mm_srli_si128(i4_samp_8x16b_rpart2_0, 6);
i4_samp_8x16b_rpart2_4 = _mm_srli_si128(i4_samp_8x16b_rpart2_0, 8);
i4_samp_8x16b_rpart1_0 =
_mm_unpacklo_epi16(i4_samp_8x16b_rpart1_0, i4_samp_8x16b_rpart1_1);
i4_samp_8x16b_rpart1_1 =
_mm_unpacklo_epi16(i4_samp_8x16b_rpart1_1, i4_samp_8x16b_rpart1_2);
i4_samp_8x16b_rpart1_2 =
_mm_unpacklo_epi16(i4_samp_8x16b_rpart1_2, i4_samp_8x16b_rpart1_3);
i4_samp_8x16b_rpart1_3 =
_mm_unpacklo_epi16(i4_samp_8x16b_rpart1_3, i4_samp_8x16b_rpart1_4);
i4_samp_8x16b_rpart2_0 =
_mm_unpacklo_epi16(i4_samp_8x16b_rpart2_0, i4_samp_8x16b_rpart2_1);
i4_samp_8x16b_rpart2_1 =
_mm_unpacklo_epi16(i4_samp_8x16b_rpart2_1, i4_samp_8x16b_rpart2_2);
i4_samp_8x16b_rpart2_2 =
_mm_unpacklo_epi16(i4_samp_8x16b_rpart2_2, i4_samp_8x16b_rpart2_3);
i4_samp_8x16b_rpart2_3 =
_mm_unpacklo_epi16(i4_samp_8x16b_rpart2_3, i4_samp_8x16b_rpart2_4);
i4_res_4x32b_rpart1_0 = _mm_madd_epi16(i4_samp_8x16b_rpart1_0, coeff_c3_c2_8x16b);
i4_res_4x32b_rpart1_2 = _mm_madd_epi16(i4_samp_8x16b_rpart1_2, coeff_c1_c0_8x16b);
i4_res_4x32b_rpart1_1 = _mm_madd_epi16(i4_samp_8x16b_rpart1_1, coeff_c0_c1_8x16b);
i4_res_4x32b_rpart1_3 = _mm_madd_epi16(i4_samp_8x16b_rpart1_3, coeff_c2_c3_8x16b);
i4_res_4x32b_rpart2_0 = _mm_madd_epi16(i4_samp_8x16b_rpart2_0, coeff_c3_c2_8x16b);
i4_res_4x32b_rpart2_2 = _mm_madd_epi16(i4_samp_8x16b_rpart2_2, coeff_c1_c0_8x16b);
i4_res_4x32b_rpart2_1 = _mm_madd_epi16(i4_samp_8x16b_rpart2_1, coeff_c0_c1_8x16b);
i4_res_4x32b_rpart2_3 = _mm_madd_epi16(i4_samp_8x16b_rpart2_3, coeff_c2_c3_8x16b);
i4_res_4x32b_rpart1_0 = _mm_add_epi32(i4_res_4x32b_rpart1_0, i4_res_4x32b_rpart1_2);
i4_res_4x32b_rpart1_1 = _mm_add_epi32(i4_res_4x32b_rpart1_1, i4_res_4x32b_rpart1_3);
i4_res_4x32b_rpart2_0 = _mm_add_epi32(i4_res_4x32b_rpart2_0, i4_res_4x32b_rpart2_2);
i4_res_4x32b_rpart2_1 = _mm_add_epi32(i4_res_4x32b_rpart2_1, i4_res_4x32b_rpart2_3);
i4_res_4x32b_rpart1_2 =
_mm_unpacklo_epi32(i4_res_4x32b_rpart1_0, i4_res_4x32b_rpart1_1);
i4_res_4x32b_rpart1_3 =
_mm_unpackhi_epi32(i4_res_4x32b_rpart1_0, i4_res_4x32b_rpart1_1);
i4_res_4x32b_rpart2_2 =
_mm_unpacklo_epi32(i4_res_4x32b_rpart2_0, i4_res_4x32b_rpart2_1);
i4_res_4x32b_rpart2_3 =
_mm_unpackhi_epi32(i4_res_4x32b_rpart2_0, i4_res_4x32b_rpart2_1);
i4_res_4x32b_rpart1_0 = _mm_add_epi32(i4_res_4x32b_rpart1_2, res_512);
i4_res_4x32b_rpart1_1 = _mm_add_epi32(i4_res_4x32b_rpart1_3, res_512);
i4_res_4x32b_rpart1_0 = _mm_srai_epi32(i4_res_4x32b_rpart1_0, 10);
i4_res_4x32b_rpart1_1 = _mm_srai_epi32(i4_res_4x32b_rpart1_1, 10);
i4_res_4x32b_rpart2_0 = _mm_add_epi32(i4_res_4x32b_rpart2_2, res_512);
i4_res_4x32b_rpart2_1 = _mm_add_epi32(i4_res_4x32b_rpart2_3, res_512);
i4_res_4x32b_rpart2_0 = _mm_srai_epi32(i4_res_4x32b_rpart2_0, 10);
i4_res_4x32b_rpart2_1 = _mm_srai_epi32(i4_res_4x32b_rpart2_1, 10);
_mm_storeu_si128(
(__m128i *) pu1_out,
_mm_packus_epi16(_mm_packus_epi32(i4_res_4x32b_rpart1_0, i4_res_4x32b_rpart1_1),
_mm_packus_epi32(i4_res_4x32b_rpart2_0, i4_res_4x32b_rpart2_1)));
pi2_tmp += i4_filt_stride;
pu1_out += i4_out_stride;
} /* End of loop over y */
}
}
void isvc_vert_interpol_chroma_dyadic_sse42(UWORD8 *pu1_inp_buf, WORD16 *pi2_tmp_filt_buf,
WORD32 i4_phase_0, WORD32 i4_phase_1)
{
WORD8 i4_coeff_0, i4_coeff_1, i4_coeff_2, i4_coeff_3;
WORD32 i4_filt_stride, i4_src_stride;
UWORD8 *pu1_inp;
WORD16 *pi2_tmp;
__m128i i4_samp_16x8b_0, i4_samp_16x8b_1, i4_samp_16x8b_2, i4_samp_16x8b_3, i4_samp_16x8b_4,
i4_samp_16x8b_5;
__m128i i4_res_8x16b_r0, i4_res_8x16b_r1, i4_res_8x16b_r2, i4_res_8x16b_r3, i4_res_8x16b_r4,
i4_res_8x16b_r5, i4_res_8x16b_r6, i4_res_8x16b_r7;
__m128i i4_res_8x16b_r7_temp;
__m128i i4_c0_c1_16x8b, i4_c2_c3_16x8b;
i4_coeff_0 = (WORD8) (16 - i4_phase_0);
i4_coeff_1 = (WORD8) (i4_phase_0);
i4_coeff_2 = (WORD8) (16 - i4_phase_1);
i4_coeff_3 = (WORD8) (i4_phase_1);
i4_c0_c1_16x8b =
_mm_set_epi8(i4_coeff_1, i4_coeff_0, i4_coeff_1, i4_coeff_0, i4_coeff_1, i4_coeff_0,
i4_coeff_1, i4_coeff_0, i4_coeff_1, i4_coeff_0, i4_coeff_1, i4_coeff_0,
i4_coeff_1, i4_coeff_0, i4_coeff_1, i4_coeff_0);
i4_c2_c3_16x8b =
_mm_set_epi8(i4_coeff_3, i4_coeff_2, i4_coeff_3, i4_coeff_2, i4_coeff_3, i4_coeff_2,
i4_coeff_3, i4_coeff_2, i4_coeff_3, i4_coeff_2, i4_coeff_3, i4_coeff_2,
i4_coeff_3, i4_coeff_2, i4_coeff_3, i4_coeff_2);
/* Initializing pointers */
pu1_inp = pu1_inp_buf;
pi2_tmp = pi2_tmp_filt_buf;
i4_filt_stride = 6;
i4_src_stride = DYADIC_REF_W_C;
i4_samp_16x8b_0 = _mm_loadu_si128((__m128i *) (pu1_inp));
i4_samp_16x8b_1 = _mm_loadu_si128((__m128i *) (pu1_inp + i4_src_stride));
i4_samp_16x8b_2 = _mm_loadu_si128((__m128i *) (pu1_inp + (i4_src_stride << 1)));
i4_samp_16x8b_3 = _mm_loadu_si128((__m128i *) (pu1_inp + (i4_src_stride << 1) + i4_src_stride));
i4_samp_16x8b_4 = _mm_loadu_si128((__m128i *) (pu1_inp + (i4_src_stride << 2)));
i4_samp_16x8b_5 = _mm_loadu_si128((__m128i *) (pu1_inp + (i4_src_stride << 2) + i4_src_stride));
i4_samp_16x8b_0 = _mm_unpacklo_epi8(i4_samp_16x8b_0, i4_samp_16x8b_1);
i4_res_8x16b_r0 = _mm_maddubs_epi16(i4_samp_16x8b_0, i4_c0_c1_16x8b);
_mm_storeu_si128((__m128i *) (pi2_tmp), i4_res_8x16b_r0);
i4_samp_16x8b_1 = _mm_unpacklo_epi8(i4_samp_16x8b_1, i4_samp_16x8b_2);
i4_res_8x16b_r1 = _mm_maddubs_epi16(i4_samp_16x8b_1, i4_c2_c3_16x8b);
_mm_storeu_si128((__m128i *) (pi2_tmp + i4_filt_stride), i4_res_8x16b_r1);
i4_res_8x16b_r2 = _mm_maddubs_epi16(i4_samp_16x8b_1, i4_c0_c1_16x8b);
_mm_storeu_si128((__m128i *) (pi2_tmp + (i4_filt_stride << 1)), i4_res_8x16b_r2);
i4_samp_16x8b_2 = _mm_unpacklo_epi8(i4_samp_16x8b_2, i4_samp_16x8b_3);
i4_res_8x16b_r3 = _mm_maddubs_epi16(i4_samp_16x8b_2, i4_c2_c3_16x8b);
_mm_storeu_si128((__m128i *) (pi2_tmp + (i4_filt_stride << 1) + i4_filt_stride),
i4_res_8x16b_r3);
i4_res_8x16b_r4 = _mm_maddubs_epi16(i4_samp_16x8b_2, i4_c0_c1_16x8b);
_mm_storeu_si128((__m128i *) (pi2_tmp + (i4_filt_stride << 2)), i4_res_8x16b_r4);
i4_samp_16x8b_3 = _mm_unpacklo_epi8(i4_samp_16x8b_3, i4_samp_16x8b_4);
i4_res_8x16b_r5 = _mm_maddubs_epi16(i4_samp_16x8b_3, i4_c2_c3_16x8b);
_mm_storeu_si128((__m128i *) (pi2_tmp + (i4_filt_stride << 2) + i4_filt_stride),
i4_res_8x16b_r5);
i4_res_8x16b_r6 = _mm_maddubs_epi16(i4_samp_16x8b_3, i4_c0_c1_16x8b);
_mm_storel_epi64((__m128i *) (pi2_tmp + (i4_filt_stride << 2) + (i4_filt_stride << 1)),
i4_res_8x16b_r6);
i4_res_8x16b_r6 = _mm_shuffle_epi32(i4_res_8x16b_r6, 78);
i4_samp_16x8b_4 = _mm_unpacklo_epi8(i4_samp_16x8b_4, i4_samp_16x8b_5);
i4_res_8x16b_r7 = _mm_maddubs_epi16(i4_samp_16x8b_4, i4_c2_c3_16x8b);
i4_res_8x16b_r7 = _mm_shuffle_epi32(i4_res_8x16b_r7, 147);
i4_res_8x16b_r7_temp = _mm_blend_epi16(i4_res_8x16b_r6, i4_res_8x16b_r7, 252);
_mm_storeu_si128((__m128i *) (pi2_tmp + (i4_filt_stride << 2) + (i4_filt_stride << 1) + 4),
i4_res_8x16b_r7_temp);
}
void isvc_horz_interpol_chroma_dyadic_sse42(WORD16 *pi2_tmp_filt_buf, UWORD8 *pu1_out_buf,
WORD32 i4_out_stride, WORD32 i4_phase_0,
WORD32 i4_phase_1)
{
WORD32 i4_dst_stride, i4_dst_stride2, i4_dst_stride4;
UWORD8 *pu1_out;
WORD16 *pi2_tmp;
__m128i i4_samp_8x16b_r1_0, i4_samp_8x16b_r1_1, i4_samp_8x16b_r1_2;
__m128i i4_samp_8x16b_r2_0, i4_samp_8x16b_r2_1, i4_samp_8x16b_r2_2;
__m128i i4_samp_8x16b_r3_0, i4_samp_8x16b_r3_1, i4_samp_8x16b_r3_2;
__m128i i4_samp_8x16b_r4_0, i4_samp_8x16b_r4_1, i4_samp_8x16b_r4_2;
__m128i i4_samp_8x16b_r5_0, i4_samp_8x16b_r5_1, i4_samp_8x16b_r5_2;
__m128i i4_samp_8x16b_r6_0, i4_samp_8x16b_r6_1, i4_samp_8x16b_r6_2;
__m128i i4_samp_8x16b_r7_0, i4_samp_8x16b_r7_1, i4_samp_8x16b_r7_2;
__m128i i4_samp_8x16b_r8_0, i4_samp_8x16b_r8_1, i4_samp_8x16b_r8_2;
__m128i i4_res_4x32b_r1_0, i4_res_4x32b_r1_1;
__m128i i4_res_4x32b_r2_0, i4_res_4x32b_r2_1;
__m128i i4_res_4x32b_r3_0, i4_res_4x32b_r3_1;
__m128i i4_res_4x32b_r4_0, i4_res_4x32b_r4_1;
__m128i i4_res_4x32b_r5_0, i4_res_4x32b_r5_1;
__m128i i4_res_4x32b_r6_0, i4_res_4x32b_r6_1;
__m128i i4_res_4x32b_r7_0, i4_res_4x32b_r7_1;
__m128i i4_res_4x32b_r8_0, i4_res_4x32b_r8_1;
__m128i i4_res_final_8x16b_r1, i4_res_final_8x16b_r2, i4_res_final_8x16b_r3,
i4_res_final_8x16b_r4, i4_res_final_8x16b_r5, i4_res_final_8x16b_r6, i4_res_final_8x16b_r7,
i4_res_final_8x16b_r8;
__m128i out_16x8b_r1, out_16x8b_r2, out_16x8b_r3, out_16x8b_r4, out_16x8b_r5, out_16x8b_r6,
out_16x8b_r7, out_16x8b_r8;
__m128i i4_res_final_8x16b_r12_0, i4_res_final_8x16b_r12_1;
__m128i i4_res_final_8x16b_r34_0, i4_res_final_8x16b_r34_1;
__m128i i4_res_final_8x16b_r56_0, i4_res_final_8x16b_r56_1;
__m128i i4_res_final_8x16b_r67_0, i4_res_final_8x16b_r67_1;
__m128i chroma_mask, chroma_mask2;
WORD32 i4_coeff_0 = 16 - i4_phase_0;
WORD32 i4_coeff_1 = i4_phase_0;
WORD32 i4_coeff_2 = 16 - i4_phase_1;
WORD32 i4_coeff_3 = i4_phase_1;
__m128i coeff_c0_c1_8x16b = _mm_set_epi16(i4_coeff_1, i4_coeff_0, i4_coeff_1, i4_coeff_0,
i4_coeff_1, i4_coeff_0, i4_coeff_1, i4_coeff_0);
__m128i coeff_c2_c3_8x16b = _mm_set_epi16(i4_coeff_3, i4_coeff_2, i4_coeff_3, i4_coeff_2,
i4_coeff_3, i4_coeff_2, i4_coeff_3, i4_coeff_2);
__m128i res_128 = _mm_set1_epi32(128);
UWORD32 u4_norm_factor = 8;
/* Initializing pointers */
pu1_out = pu1_out_buf;
pi2_tmp = pi2_tmp_filt_buf;
i4_dst_stride = i4_out_stride;
i4_dst_stride2 = i4_dst_stride << 1;
i4_dst_stride4 = i4_dst_stride << 2;
/* Horizontal interpolation */
i4_samp_8x16b_r1_0 = _mm_loadu_si128((__m128i *) pi2_tmp);
i4_samp_8x16b_r2_0 = _mm_loadu_si128((__m128i *) (pi2_tmp + 6));
i4_samp_8x16b_r3_0 = _mm_loadu_si128((__m128i *) (pi2_tmp + 12));
i4_samp_8x16b_r4_0 = _mm_loadu_si128((__m128i *) (pi2_tmp + 18));
i4_samp_8x16b_r5_0 = _mm_loadu_si128((__m128i *) (pi2_tmp + 24));
i4_samp_8x16b_r6_0 = _mm_loadu_si128((__m128i *) (pi2_tmp + 30));
i4_samp_8x16b_r7_0 = _mm_loadu_si128((__m128i *) (pi2_tmp + 36));
i4_samp_8x16b_r8_0 = _mm_loadu_si128((__m128i *) (pi2_tmp + 42));
i4_samp_8x16b_r1_1 = _mm_srli_si128(i4_samp_8x16b_r1_0, 2);
i4_samp_8x16b_r1_2 = _mm_srli_si128(i4_samp_8x16b_r1_0, 4);
i4_samp_8x16b_r2_1 = _mm_srli_si128(i4_samp_8x16b_r2_0, 2);
i4_samp_8x16b_r2_2 = _mm_srli_si128(i4_samp_8x16b_r2_0, 4);
i4_samp_8x16b_r3_1 = _mm_srli_si128(i4_samp_8x16b_r3_0, 2);
i4_samp_8x16b_r3_2 = _mm_srli_si128(i4_samp_8x16b_r3_0, 4);
i4_samp_8x16b_r4_1 = _mm_srli_si128(i4_samp_8x16b_r4_0, 2);
i4_samp_8x16b_r4_2 = _mm_srli_si128(i4_samp_8x16b_r4_0, 4);
i4_samp_8x16b_r5_1 = _mm_srli_si128(i4_samp_8x16b_r5_0, 2);
i4_samp_8x16b_r5_2 = _mm_srli_si128(i4_samp_8x16b_r5_0, 4);
i4_samp_8x16b_r6_1 = _mm_srli_si128(i4_samp_8x16b_r6_0, 2);
i4_samp_8x16b_r6_2 = _mm_srli_si128(i4_samp_8x16b_r6_0, 4);
i4_samp_8x16b_r7_1 = _mm_srli_si128(i4_samp_8x16b_r7_0, 2);
i4_samp_8x16b_r7_2 = _mm_srli_si128(i4_samp_8x16b_r7_0, 4);
i4_samp_8x16b_r8_1 = _mm_srli_si128(i4_samp_8x16b_r8_0, 2);
i4_samp_8x16b_r8_2 = _mm_srli_si128(i4_samp_8x16b_r8_0, 4);
i4_samp_8x16b_r1_0 = _mm_unpacklo_epi16(i4_samp_8x16b_r1_0, i4_samp_8x16b_r1_1);
i4_samp_8x16b_r2_0 = _mm_unpacklo_epi16(i4_samp_8x16b_r2_0, i4_samp_8x16b_r2_1);
i4_samp_8x16b_r3_0 = _mm_unpacklo_epi16(i4_samp_8x16b_r3_0, i4_samp_8x16b_r3_1);
i4_samp_8x16b_r4_0 = _mm_unpacklo_epi16(i4_samp_8x16b_r4_0, i4_samp_8x16b_r4_1);
i4_samp_8x16b_r5_0 = _mm_unpacklo_epi16(i4_samp_8x16b_r5_0, i4_samp_8x16b_r5_1);
i4_samp_8x16b_r6_0 = _mm_unpacklo_epi16(i4_samp_8x16b_r6_0, i4_samp_8x16b_r6_1);
i4_samp_8x16b_r7_0 = _mm_unpacklo_epi16(i4_samp_8x16b_r7_0, i4_samp_8x16b_r7_1);
i4_samp_8x16b_r8_0 = _mm_unpacklo_epi16(i4_samp_8x16b_r8_0, i4_samp_8x16b_r8_1);
i4_samp_8x16b_r1_1 = _mm_unpacklo_epi16(i4_samp_8x16b_r1_1, i4_samp_8x16b_r1_2);
i4_samp_8x16b_r2_1 = _mm_unpacklo_epi16(i4_samp_8x16b_r2_1, i4_samp_8x16b_r2_2);
i4_samp_8x16b_r3_1 = _mm_unpacklo_epi16(i4_samp_8x16b_r3_1, i4_samp_8x16b_r3_2);
i4_samp_8x16b_r4_1 = _mm_unpacklo_epi16(i4_samp_8x16b_r4_1, i4_samp_8x16b_r4_2);
i4_samp_8x16b_r5_1 = _mm_unpacklo_epi16(i4_samp_8x16b_r5_1, i4_samp_8x16b_r5_2);
i4_samp_8x16b_r6_1 = _mm_unpacklo_epi16(i4_samp_8x16b_r6_1, i4_samp_8x16b_r6_2);
i4_samp_8x16b_r7_1 = _mm_unpacklo_epi16(i4_samp_8x16b_r7_1, i4_samp_8x16b_r7_2);
i4_samp_8x16b_r8_1 = _mm_unpacklo_epi16(i4_samp_8x16b_r8_1, i4_samp_8x16b_r8_2);
// a0c0 + a1c1 a1c0 + a2c1 a2c0 + a3c1 a3c0 + a4c1
i4_res_4x32b_r1_0 = _mm_madd_epi16(i4_samp_8x16b_r1_0, coeff_c0_c1_8x16b);
// b0c0+b1c1 b1c0+b2c1 b2c0+b3c1 b3c0+b4c1
i4_res_4x32b_r2_0 = _mm_madd_epi16(i4_samp_8x16b_r2_0, coeff_c0_c1_8x16b);
i4_res_4x32b_r3_0 = _mm_madd_epi16(i4_samp_8x16b_r3_0, coeff_c0_c1_8x16b);
i4_res_4x32b_r4_0 = _mm_madd_epi16(i4_samp_8x16b_r4_0, coeff_c0_c1_8x16b);
i4_res_4x32b_r5_0 = _mm_madd_epi16(i4_samp_8x16b_r5_0, coeff_c0_c1_8x16b);
i4_res_4x32b_r6_0 = _mm_madd_epi16(i4_samp_8x16b_r6_0, coeff_c0_c1_8x16b);
i4_res_4x32b_r7_0 = _mm_madd_epi16(i4_samp_8x16b_r7_0, coeff_c0_c1_8x16b);
i4_res_4x32b_r8_0 = _mm_madd_epi16(i4_samp_8x16b_r8_0, coeff_c0_c1_8x16b);
// a1c2+a2c3 a2c2+a3c3 a3c2+a4c3 a4c2+a5c3
i4_res_4x32b_r1_1 = _mm_madd_epi16(i4_samp_8x16b_r1_1, coeff_c2_c3_8x16b);
// b1c2+b2c3 b2c2+b3c3 b3c2+b4c3 b4c2+b5c3
i4_res_4x32b_r2_1 = _mm_madd_epi16(i4_samp_8x16b_r2_1, coeff_c2_c3_8x16b);
i4_res_4x32b_r3_1 = _mm_madd_epi16(i4_samp_8x16b_r3_1, coeff_c2_c3_8x16b);
i4_res_4x32b_r4_1 = _mm_madd_epi16(i4_samp_8x16b_r4_1, coeff_c2_c3_8x16b);
i4_res_4x32b_r5_1 = _mm_madd_epi16(i4_samp_8x16b_r5_1, coeff_c2_c3_8x16b);
i4_res_4x32b_r6_1 = _mm_madd_epi16(i4_samp_8x16b_r6_1, coeff_c2_c3_8x16b);
i4_res_4x32b_r7_1 = _mm_madd_epi16(i4_samp_8x16b_r7_1, coeff_c2_c3_8x16b);
i4_res_4x32b_r8_1 = _mm_madd_epi16(i4_samp_8x16b_r8_1, coeff_c2_c3_8x16b);
i4_res_4x32b_r1_0 = _mm_add_epi32(i4_res_4x32b_r1_0, res_128);
i4_res_4x32b_r2_0 = _mm_add_epi32(i4_res_4x32b_r2_0, res_128);
i4_res_4x32b_r3_0 = _mm_add_epi32(i4_res_4x32b_r3_0, res_128);
i4_res_4x32b_r4_0 = _mm_add_epi32(i4_res_4x32b_r4_0, res_128);
i4_res_4x32b_r5_0 = _mm_add_epi32(i4_res_4x32b_r5_0, res_128);
i4_res_4x32b_r6_0 = _mm_add_epi32(i4_res_4x32b_r6_0, res_128);
i4_res_4x32b_r7_0 = _mm_add_epi32(i4_res_4x32b_r7_0, res_128);
i4_res_4x32b_r8_0 = _mm_add_epi32(i4_res_4x32b_r8_0, res_128);
i4_res_4x32b_r1_1 = _mm_add_epi32(i4_res_4x32b_r1_1, res_128);
i4_res_4x32b_r2_1 = _mm_add_epi32(i4_res_4x32b_r2_1, res_128);
i4_res_4x32b_r3_1 = _mm_add_epi32(i4_res_4x32b_r3_1, res_128);
i4_res_4x32b_r4_1 = _mm_add_epi32(i4_res_4x32b_r4_1, res_128);
i4_res_4x32b_r5_1 = _mm_add_epi32(i4_res_4x32b_r5_1, res_128);
i4_res_4x32b_r6_1 = _mm_add_epi32(i4_res_4x32b_r6_1, res_128);
i4_res_4x32b_r7_1 = _mm_add_epi32(i4_res_4x32b_r7_1, res_128);
i4_res_4x32b_r8_1 = _mm_add_epi32(i4_res_4x32b_r8_1, res_128);
i4_res_4x32b_r1_0 = _mm_srai_epi32(i4_res_4x32b_r1_0, u4_norm_factor);
i4_res_4x32b_r2_0 = _mm_srai_epi32(i4_res_4x32b_r2_0, u4_norm_factor);
i4_res_4x32b_r3_0 = _mm_srai_epi32(i4_res_4x32b_r3_0, u4_norm_factor);
i4_res_4x32b_r4_0 = _mm_srai_epi32(i4_res_4x32b_r4_0, u4_norm_factor);
i4_res_4x32b_r5_0 = _mm_srai_epi32(i4_res_4x32b_r5_0, u4_norm_factor);
i4_res_4x32b_r6_0 = _mm_srai_epi32(i4_res_4x32b_r6_0, u4_norm_factor);
i4_res_4x32b_r7_0 = _mm_srai_epi32(i4_res_4x32b_r7_0, u4_norm_factor);
i4_res_4x32b_r8_0 = _mm_srai_epi32(i4_res_4x32b_r8_0, u4_norm_factor);
i4_res_4x32b_r1_1 = _mm_srai_epi32(i4_res_4x32b_r1_1, u4_norm_factor);
i4_res_4x32b_r2_1 = _mm_srai_epi32(i4_res_4x32b_r2_1, u4_norm_factor);
i4_res_4x32b_r3_1 = _mm_srai_epi32(i4_res_4x32b_r3_1, u4_norm_factor);
i4_res_4x32b_r4_1 = _mm_srai_epi32(i4_res_4x32b_r4_1, u4_norm_factor);
i4_res_4x32b_r5_1 = _mm_srai_epi32(i4_res_4x32b_r5_1, u4_norm_factor);
i4_res_4x32b_r6_1 = _mm_srai_epi32(i4_res_4x32b_r6_1, u4_norm_factor);
i4_res_4x32b_r7_1 = _mm_srai_epi32(i4_res_4x32b_r7_1, u4_norm_factor);
i4_res_4x32b_r8_1 = _mm_srai_epi32(i4_res_4x32b_r8_1, u4_norm_factor);
i4_res_final_8x16b_r12_0 = _mm_packs_epi32(i4_res_4x32b_r1_0, i4_res_4x32b_r2_0);
i4_res_final_8x16b_r34_0 = _mm_packs_epi32(i4_res_4x32b_r3_0, i4_res_4x32b_r4_0);
i4_res_final_8x16b_r56_0 = _mm_packs_epi32(i4_res_4x32b_r5_0, i4_res_4x32b_r6_0);
i4_res_final_8x16b_r67_0 = _mm_packs_epi32(i4_res_4x32b_r7_0, i4_res_4x32b_r8_0);
i4_res_final_8x16b_r12_1 = _mm_packs_epi32(i4_res_4x32b_r1_1, i4_res_4x32b_r2_1);
i4_res_final_8x16b_r34_1 = _mm_packs_epi32(i4_res_4x32b_r3_1, i4_res_4x32b_r4_1);
i4_res_final_8x16b_r56_1 = _mm_packs_epi32(i4_res_4x32b_r5_1, i4_res_4x32b_r6_1);
i4_res_final_8x16b_r67_1 = _mm_packs_epi32(i4_res_4x32b_r7_1, i4_res_4x32b_r8_1);
i4_res_final_8x16b_r1 = _mm_unpacklo_epi16(i4_res_final_8x16b_r12_0, i4_res_final_8x16b_r12_1);
i4_res_final_8x16b_r2 = _mm_unpackhi_epi16(i4_res_final_8x16b_r12_0, i4_res_final_8x16b_r12_1);
i4_res_final_8x16b_r3 = _mm_unpacklo_epi16(i4_res_final_8x16b_r34_0, i4_res_final_8x16b_r34_1);
i4_res_final_8x16b_r4 = _mm_unpackhi_epi16(i4_res_final_8x16b_r34_0, i4_res_final_8x16b_r34_1);
i4_res_final_8x16b_r5 = _mm_unpacklo_epi16(i4_res_final_8x16b_r56_0, i4_res_final_8x16b_r56_1);
i4_res_final_8x16b_r6 = _mm_unpackhi_epi16(i4_res_final_8x16b_r56_0, i4_res_final_8x16b_r56_1);
i4_res_final_8x16b_r7 = _mm_unpacklo_epi16(i4_res_final_8x16b_r67_0, i4_res_final_8x16b_r67_1);
i4_res_final_8x16b_r8 = _mm_unpackhi_epi16(i4_res_final_8x16b_r67_0, i4_res_final_8x16b_r67_1);
chroma_mask = _mm_set1_epi16(0xFF00);
chroma_mask2 = _mm_set1_epi16(0x00FF);
out_16x8b_r1 = _mm_loadu_si128((__m128i *) (&pu1_out[0]));
out_16x8b_r2 = _mm_loadu_si128((__m128i *) (&pu1_out[i4_dst_stride]));
out_16x8b_r3 = _mm_loadu_si128((__m128i *) (&pu1_out[i4_dst_stride2]));
out_16x8b_r4 = _mm_loadu_si128((__m128i *) (&pu1_out[i4_dst_stride2 + i4_dst_stride]));
out_16x8b_r5 = _mm_loadu_si128((__m128i *) (&pu1_out[i4_dst_stride4]));
out_16x8b_r6 = _mm_loadu_si128((__m128i *) (&pu1_out[i4_dst_stride4 + i4_dst_stride]));
out_16x8b_r7 = _mm_loadu_si128((__m128i *) (&pu1_out[i4_dst_stride4 + i4_dst_stride2]));
out_16x8b_r8 =
_mm_loadu_si128((__m128i *) (&pu1_out[i4_dst_stride4 + i4_dst_stride2 + i4_dst_stride]));
out_16x8b_r1 = _mm_and_si128(out_16x8b_r1, chroma_mask);
out_16x8b_r2 = _mm_and_si128(out_16x8b_r2, chroma_mask);
out_16x8b_r3 = _mm_and_si128(out_16x8b_r3, chroma_mask);
out_16x8b_r4 = _mm_and_si128(out_16x8b_r4, chroma_mask);
out_16x8b_r5 = _mm_and_si128(out_16x8b_r5, chroma_mask);
out_16x8b_r6 = _mm_and_si128(out_16x8b_r6, chroma_mask);
out_16x8b_r7 = _mm_and_si128(out_16x8b_r7, chroma_mask);
out_16x8b_r8 = _mm_and_si128(out_16x8b_r8, chroma_mask);
i4_res_final_8x16b_r1 = _mm_and_si128(i4_res_final_8x16b_r1, chroma_mask2);
i4_res_final_8x16b_r2 = _mm_and_si128(i4_res_final_8x16b_r2, chroma_mask2);
i4_res_final_8x16b_r3 = _mm_and_si128(i4_res_final_8x16b_r3, chroma_mask2);
i4_res_final_8x16b_r4 = _mm_and_si128(i4_res_final_8x16b_r4, chroma_mask2);
i4_res_final_8x16b_r5 = _mm_and_si128(i4_res_final_8x16b_r5, chroma_mask2);
i4_res_final_8x16b_r6 = _mm_and_si128(i4_res_final_8x16b_r6, chroma_mask2);
i4_res_final_8x16b_r7 = _mm_and_si128(i4_res_final_8x16b_r7, chroma_mask2);
i4_res_final_8x16b_r8 = _mm_and_si128(i4_res_final_8x16b_r8, chroma_mask2);
out_16x8b_r1 = _mm_add_epi8(i4_res_final_8x16b_r1, out_16x8b_r1);
out_16x8b_r2 = _mm_add_epi8(i4_res_final_8x16b_r2, out_16x8b_r2);
out_16x8b_r3 = _mm_add_epi8(i4_res_final_8x16b_r3, out_16x8b_r3);
out_16x8b_r4 = _mm_add_epi8(i4_res_final_8x16b_r4, out_16x8b_r4);
out_16x8b_r5 = _mm_add_epi8(i4_res_final_8x16b_r5, out_16x8b_r5);
out_16x8b_r6 = _mm_add_epi8(i4_res_final_8x16b_r6, out_16x8b_r6);
out_16x8b_r7 = _mm_add_epi8(i4_res_final_8x16b_r7, out_16x8b_r7);
out_16x8b_r8 = _mm_add_epi8(i4_res_final_8x16b_r8, out_16x8b_r8);
_mm_storeu_si128((__m128i *) pu1_out, out_16x8b_r1);
_mm_storeu_si128((__m128i *) (pu1_out + i4_dst_stride), out_16x8b_r2);
_mm_storeu_si128((__m128i *) (pu1_out + (i4_dst_stride * 2)), out_16x8b_r3);
_mm_storeu_si128((__m128i *) (pu1_out + (i4_dst_stride * 3)), out_16x8b_r4);
_mm_storeu_si128((__m128i *) (pu1_out + (i4_dst_stride * 4)), out_16x8b_r5);
_mm_storeu_si128((__m128i *) (pu1_out + (i4_dst_stride * 5)), out_16x8b_r6);
_mm_storeu_si128((__m128i *) (pu1_out + (i4_dst_stride * 6)), out_16x8b_r7);
_mm_storeu_si128((__m128i *) (pu1_out + (i4_dst_stride * 7)), out_16x8b_r8);
}

View file

@ -0,0 +1,548 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
*******************************************************************************
* @file
* isvc_iquant_itrans_recon_dc_ssse3.c
*
* @brief
* Contains function definitions for inverse quantization, inverse
* transform and reconstruction
*
* @author
* Mohit [100664]
*
* @par List of Functions:
* - isvc_iquant_itrans_recon_4x4_dc_ssse3()
* - isvc_iquant_itrans_recon_8x8_dc_ssse3()
*
* @remarks
* None
*
*******************************************************************************
*/
#include <immintrin.h>
#include "ih264_typedefs.h"
#include "ih264_debug.h"
#include "ih264_defs.h"
#include "ih264_trans_macros.h"
#include "ih264_macros.h"
#include "ih264_platform_macros.h"
#include "ih264_trans_data.h"
#include "ih264_size_defs.h"
#include "isvc_structs.h"
#include "isvc_trans_quant_itrans_iquant.h"
/*
********************************************************************************
*
* @brief This function reconstructs a 4x4 sub block from quantized resiude and
* prediction buffer for dc input pattern only, i.e. only the (0,0) element of
*the input 4x4 block is non-zero. For complete function, refer
*isvc_iquant_itrans_recon_ssse3.c
*
* @par Description:
* The quantized residue is first inverse quantized, then inverse transformed.
* This inverse transformed content is added to the prediction buffer to recon-
* struct the end output
*
* @param[in] pi2_src
* quantized 4x4 block
*
* @param[in] pu1_pred
* prediction 4x4 block
*
* @param[out] pu1_out
* reconstructed 4x4 block
*
* @param[in] src_strd
* quantization buffer stride
*
* @param[in] i4_pred_stride,
* Prediction buffer stride
*
* @param[in] i4_out_stride
* recon buffer Stride
*
* @param[in] pu2_scaling_list
* pointer to scaling list
*
* @param[in] pu2_norm_adjust
* pointer to inverse scale matrix
*
* @param[in] u4_qp_div_6
* Floor (qp/6)
*
* @param[in] pi4_tmp
* temporary buffer of size 1*16
*
* @returns none
*
* @remarks none
*
*******************************************************************************
*/
void isvc_iquant_itrans_recon_4x4_dc_ssse3(buffer_container_t *ps_src, buffer_container_t *ps_pred,
buffer_container_t *ps_res_pred,
buffer_container_t *ps_res, buffer_container_t *ps_rec,
iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants,
WORD16 *pi2_tmp, WORD16 *pi2_dc_src,
WORD32 i4_iq_start_idx, UWORD8 u1_res_accumulate)
{
WORD16 *pi2_src = ps_src->pv_data;
WORD16 *pi2_res = ps_res->pv_data;
WORD16 *pi2_res_pred = ps_res_pred->pv_data;
UWORD8 *pu1_pred = ps_pred->pv_data;
UWORD8 *pu1_out = ps_rec->pv_data;
WORD32 i4_src_stride = ps_src->i4_data_stride;
WORD32 i4_res_stride = ps_res->i4_data_stride;
WORD32 i4_res_pred_stride = ps_res_pred->i4_data_stride;
WORD32 i4_pred_stride = ps_pred->i4_data_stride;
WORD32 i4_out_stride = ps_rec->i4_data_stride;
const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat;
const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat;
UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6;
UWORD32 *pu4_out = (UWORD32 *) pu1_out;
WORD32 q0 = pi2_src[0];
WORD16 i_macro, rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
__m128i predload_r, pred_r0, pred_r1, pred_r2, pred_r3;
__m128i sign_reg;
__m128i zero_8x16b = _mm_setzero_si128(); // all bits reset to zero
__m128i temp4, temp5, temp6, temp7;
__m128i value_add;
UNUSED(pi2_tmp);
UNUSED(u1_res_accumulate);
UNUSED(i4_src_stride);
UNUSED(i4_res_stride);
UNUSED(i4_res_pred_stride);
UNUSED(pi2_res);
UNUSED(pi2_res_pred);
UNUSED(i4_iq_start_idx);
/* Implement residue accumulation */
ASSERT(0);
INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4);
if(i4_iq_start_idx != 0) q0 = pi2_dc_src[0]; // Restoring dc value for intra case
i_macro = ((q0 + 32) >> 6);
value_add = _mm_set1_epi16(i_macro);
zero_8x16b = _mm_setzero_si128(); // all bits reset to zero
// Load pred buffer
predload_r = _mm_loadl_epi64((__m128i *) (&pu1_pred[0])); // p00 p01 p02 p03 0 0 0 0 0
// 0 0 0 -- all 8 bits
pred_r0 = _mm_unpacklo_epi8(predload_r, zero_8x16b); // p00 p01 p02 p03 0 0 0 0 -- all 16 bits
predload_r =
_mm_loadl_epi64((__m128i *) (&pu1_pred[i4_pred_stride])); // p10 p11 p12 p13 0 0 0 0 0 0
// 0 0 -- all 8 bits
pred_r1 = _mm_unpacklo_epi8(predload_r, zero_8x16b); // p10 p11 p12 p13 0 0 0 0 -- all 16 bits
predload_r =
_mm_loadl_epi64((__m128i *) (&pu1_pred[2 * i4_pred_stride])); // p20 p21 p22 p23 0 0 0 0
// 0 0 0 0 -- all 8 bits
pred_r2 = _mm_unpacklo_epi8(predload_r, zero_8x16b); // p20 p21 p22 p23 0 0 0 0 -- all 16 bits
predload_r =
_mm_loadl_epi64((__m128i *) (&pu1_pred[3 * i4_pred_stride])); // p30 p31 p32 p33 0 0 0 0
// 0 0 0 0 -- all 8 bits
pred_r3 = _mm_unpacklo_epi8(predload_r, zero_8x16b); // p30 p31 p32 p33 0 0 0 0 -- all 16 bits
pred_r0 = _mm_unpacklo_epi64(pred_r0, pred_r1); // p00 p01 p02 p03 p10 p11 p12 p13
pred_r2 = _mm_unpacklo_epi64(pred_r2, pred_r3); // p20 p21 p22p p23 p30 p31 p32 p33
temp4 = _mm_add_epi16(value_add, pred_r0);
temp5 = _mm_add_epi16(value_add, pred_r2);
/*------------------------------------------------------------------*/
// Clipping the results to 8 bits
sign_reg = _mm_cmpgt_epi16(temp4, zero_8x16b); // sign check
temp4 = _mm_and_si128(temp4, sign_reg);
sign_reg = _mm_cmpgt_epi16(temp5, zero_8x16b); // sign check
temp5 = _mm_and_si128(temp5, sign_reg);
temp4 = _mm_packus_epi16(temp4, temp5);
temp5 = _mm_srli_si128(temp4, 4);
temp6 = _mm_srli_si128(temp5, 4);
temp7 = _mm_srli_si128(temp6, 4);
*pu4_out = _mm_cvtsi128_si32(temp4);
pu1_out += i4_out_stride;
pu4_out = (UWORD32 *) (pu1_out);
*(pu4_out) = _mm_cvtsi128_si32(temp5);
pu1_out += i4_out_stride;
pu4_out = (UWORD32 *) (pu1_out);
*(pu4_out) = _mm_cvtsi128_si32(temp6);
pu1_out += i4_out_stride;
pu4_out = (UWORD32 *) (pu1_out);
*(pu4_out) = _mm_cvtsi128_si32(temp7);
}
/**
*******************************************************************************
*
* @brief
* This function performs inverse quant and Inverse transform type Ci4 for 8x8
*block for dc input pattern only, i.e. only the (0,0) element of the input 8x8
*block is non-zero. For complete function, refer
*isvc_iquant_itrans_recon_ssse3.c
*
* @par Description:
* Performs inverse transform Ci8 and adds the residue to get the
* reconstructed block
*
* @param[in] pi2_src
* Input 8x8coefficients
*
* @param[in] pu1_pred
* Prediction 8x8 block
*
* @param[out] pu1_recon
* Output 8x8 block
*
* @param[in] q_div
* QP/6
*
* @param[in] q_rem
* QP%6
*
* @param[in] q_lev
* Quantizer level
*
* @param[in] u4_src_stride
* Input stride
*
* @param[in] u4_pred_stride,
* Prediction stride
*
* @param[in] u4_out_stride
* Output Stride
*
* @param[in] pi4_tmp
* temporary buffer of size 1*64
* the tmp for each block
*
* @param[in] pu4_iquant_mat
* Pointer to the inverse quantization matrix
*
* @returns Void
*
* @remarks
* None
*
*******************************************************************************
*/
void isvc_iquant_itrans_recon_8x8_dc_ssse3(buffer_container_t *ps_src, buffer_container_t *ps_pred,
buffer_container_t *ps_res_pred,
buffer_container_t *ps_res, buffer_container_t *ps_rec,
iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants,
WORD16 *pi2_tmp, WORD16 *pi2_dc_src,
WORD32 i4_iq_start_idx, UWORD8 u1_res_accumulate)
{
WORD16 *pi2_src = ps_src->pv_data;
WORD16 *pi2_res = ps_res->pv_data;
WORD16 *pi2_res_pred = ps_res_pred->pv_data;
UWORD8 *pu1_pred = ps_pred->pv_data;
UWORD8 *pu1_out = ps_rec->pv_data;
WORD32 i4_src_stride = ps_src->i4_data_stride;
WORD32 i4_res_stride = ps_res->i4_data_stride;
WORD32 i4_res_pred_stride = ps_res_pred->i4_data_stride;
WORD32 i4_pred_stride = ps_pred->i4_data_stride;
WORD32 i4_out_stride = ps_rec->i4_data_stride;
const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat;
const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat;
UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6;
WORD32 q0 = pi2_src[0];
WORD16 i_macro, rnd_fact = (u4_qp_div_6 < 6) ? 1 << (5 - u4_qp_div_6) : 0;
__m128i predload_r, pred_r0, pred_r1, pred_r2, pred_r3, pred_r4, pred_r5, pred_r6, pred_r7;
__m128i sign_reg;
__m128i zero_8x16b = _mm_setzero_si128(); // all bits reset to zero
__m128i temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
__m128i value_add;
UNUSED(pi2_tmp);
UNUSED(pi2_dc_src);
UNUSED(u1_res_accumulate);
UNUSED(i4_src_stride);
UNUSED(i4_res_stride);
UNUSED(i4_res_pred_stride);
UNUSED(pi2_res);
UNUSED(pi2_res_pred);
UNUSED(i4_iq_start_idx);
/* Implement residue accumulation */
ASSERT(0);
INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 6);
i_macro = ((q0 + 32) >> 6);
value_add = _mm_set1_epi16(i_macro);
// Load pred buffer row 0
predload_r =
_mm_loadl_epi64((__m128i *) (&pu1_pred[0])); // p0 p1 p2 p3 p4 p5 p6 p7 0 0 0 0 0 0 0 0
// -- all 8 bits
pred_r0 = _mm_unpacklo_epi8(predload_r, zero_8x16b); // p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits
// Load pred buffer row 1
predload_r =
_mm_loadl_epi64((__m128i *) (&pu1_pred[i4_pred_stride])); // p0 p1 p2 p3 p4 p5 p6 p7 0 0
// 0 0 0 0 0 0 -- all 8 bits
pred_r1 = _mm_unpacklo_epi8(predload_r, zero_8x16b); // p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits
// Load pred buffer row 2
predload_r = _mm_loadl_epi64(
(__m128i *) (&pu1_pred[2 * i4_pred_stride])); // p0 p1 p2 p3 p4 p5 p6 p7 0 0
// 0 0 0 0 0 0 -- all 8 bits
pred_r2 = _mm_unpacklo_epi8(predload_r, zero_8x16b); // p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits
// Load pred buffer row 3
predload_r = _mm_loadl_epi64(
(__m128i *) (&pu1_pred[3 * i4_pred_stride])); // p0 p1 p2 p3 p4 p5 p6 p7 0 0
// 0 0 0 0 0 0 -- all 8 bits
pred_r3 = _mm_unpacklo_epi8(predload_r, zero_8x16b); // p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits
// Load pred buffer row 4
predload_r = _mm_loadl_epi64(
(__m128i *) (&pu1_pred[4 * i4_pred_stride])); // p0 p1 p2 p3 p4 p5 p6 p7 0 0
// 0 0 0 0 0 0 -- all 8 bits
pred_r4 = _mm_unpacklo_epi8(predload_r, zero_8x16b); // p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits
// Load pred buffer row 5
predload_r =
_mm_loadl_epi64((__m128i *) (&pu1_pred[5 * i4_pred_stride])); // p0 p1 p2 p3 p4 p5 p6 p7 0
// 0 0 0 0 0 0 0 -- all 8 bit
pred_r5 = _mm_unpacklo_epi8(predload_r, zero_8x16b); // p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits
// Load pred buffer row 6
predload_r = _mm_loadl_epi64(
(__m128i *) (&pu1_pred[6 * i4_pred_stride])); // p0 p1 p2 p3 p4 p5 p6 p7 0 0
// 0 0 0 0 0 0 -- all 8 bits
pred_r6 = _mm_unpacklo_epi8(predload_r, zero_8x16b); // p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits
// Load pred buffer row 7
predload_r = _mm_loadl_epi64(
(__m128i *) (&pu1_pred[7 * i4_pred_stride])); // p0 p1 p2 p3 p4 p5 p6 p7 0 0
// 0 0 0 0 0 0 -- all 8 bits
pred_r7 = _mm_unpacklo_epi8(predload_r, zero_8x16b); // p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits
temp1 = _mm_add_epi16(value_add, pred_r0);
temp2 = _mm_add_epi16(value_add, pred_r1);
temp3 = _mm_add_epi16(value_add, pred_r2);
temp4 = _mm_add_epi16(value_add, pred_r3);
temp5 = _mm_add_epi16(value_add, pred_r4);
temp6 = _mm_add_epi16(value_add, pred_r5);
temp7 = _mm_add_epi16(value_add, pred_r6);
temp8 = _mm_add_epi16(value_add, pred_r7);
/*------------------------------------------------------------------*/
// Clipping the results to 8 bits
sign_reg = _mm_cmpgt_epi16(temp1, zero_8x16b); // sign check
temp1 = _mm_and_si128(temp1, sign_reg);
sign_reg = _mm_cmpgt_epi16(temp2, zero_8x16b); // sign check
temp2 = _mm_and_si128(temp2, sign_reg);
sign_reg = _mm_cmpgt_epi16(temp3, zero_8x16b); // sign check
temp3 = _mm_and_si128(temp3, sign_reg);
sign_reg = _mm_cmpgt_epi16(temp4, zero_8x16b); // sign check
temp4 = _mm_and_si128(temp4, sign_reg);
sign_reg = _mm_cmpgt_epi16(temp5, zero_8x16b); // sign check
temp5 = _mm_and_si128(temp5, sign_reg);
sign_reg = _mm_cmpgt_epi16(temp6, zero_8x16b); // sign check
temp6 = _mm_and_si128(temp6, sign_reg);
sign_reg = _mm_cmpgt_epi16(temp7, zero_8x16b); // sign check
temp7 = _mm_and_si128(temp7, sign_reg);
sign_reg = _mm_cmpgt_epi16(temp8, zero_8x16b); // sign check
temp8 = _mm_and_si128(temp8, sign_reg);
temp1 = _mm_packus_epi16(temp1, zero_8x16b);
temp2 = _mm_packus_epi16(temp2, zero_8x16b);
temp3 = _mm_packus_epi16(temp3, zero_8x16b);
temp4 = _mm_packus_epi16(temp4, zero_8x16b);
temp5 = _mm_packus_epi16(temp5, zero_8x16b);
temp6 = _mm_packus_epi16(temp6, zero_8x16b);
temp7 = _mm_packus_epi16(temp7, zero_8x16b);
temp8 = _mm_packus_epi16(temp8, zero_8x16b);
_mm_storel_epi64((__m128i *) (&pu1_out[0]), temp1);
_mm_storel_epi64((__m128i *) (&pu1_out[i4_out_stride]), temp2);
_mm_storel_epi64((__m128i *) (&pu1_out[2 * i4_out_stride]), temp3);
_mm_storel_epi64((__m128i *) (&pu1_out[3 * i4_out_stride]), temp4);
_mm_storel_epi64((__m128i *) (&pu1_out[4 * i4_out_stride]), temp5);
_mm_storel_epi64((__m128i *) (&pu1_out[5 * i4_out_stride]), temp6);
_mm_storel_epi64((__m128i *) (&pu1_out[6 * i4_out_stride]), temp7);
_mm_storel_epi64((__m128i *) (&pu1_out[7 * i4_out_stride]), temp8);
}
/*
********************************************************************************
*
* @brief This function reconstructs a 4x4 sub block from quantized chroma
*resiude and prediction buffer
*
* @par Description:
* The quantized residue is first inverse quantized, then inverse transformed.
* This inverse transformed content is added to the prediction buffer to recon-
* struct the end output
*
* @param[in] pi2_src
* quantized 4x4 block
*
* @param[in] pu1_pred
* prediction 4x4 block
*
* @param[out] pu1_out
* reconstructed 4x4 block
*
* @param[in] src_strd
* quantization buffer stride
*
* @param[in] i4_pred_stride,
* Prediction buffer stride
*
* @param[in] i4_out_stride
* recon buffer Stride
*
* @param[in] pu2_scaling_list
* pointer to scaling list
*
* @param[in] pu2_norm_adjust
* pointer to inverse scale matrix
*
* @param[in] u4_qp_div_6
* Floor (qp/6)
*
* @param[in] pi4_tmp
* temporary buffer of size 1*16
*
* @returns none
*
* @remarks none
*
*******************************************************************************
*/
void isvc_iquant_itrans_recon_chroma_4x4_dc_ssse3(
buffer_container_t *ps_src, buffer_container_t *ps_pred, buffer_container_t *ps_res_pred,
buffer_container_t *ps_res, buffer_container_t *ps_rec,
iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants, WORD16 *pi2_tmp, WORD16 *pi2_dc_src,
WORD32 i4_iq_start_idx, UWORD8 u1_res_accumulate)
{
WORD16 *pi2_src = ps_src->pv_data;
WORD16 *pi2_res = ps_res->pv_data;
WORD16 *pi2_res_pred = ps_res_pred->pv_data;
UWORD8 *pu1_pred = ps_pred->pv_data;
UWORD8 *pu1_out = ps_rec->pv_data;
WORD32 i4_src_stride = ps_src->i4_data_stride;
WORD32 i4_res_stride = ps_res->i4_data_stride;
WORD32 i4_res_pred_stride = ps_res_pred->i4_data_stride;
WORD32 i4_pred_stride = ps_pred->i4_data_stride;
WORD32 i4_out_stride = ps_rec->i4_data_stride;
const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat;
const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat;
UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6;
WORD16 q0 = pi2_dc_src[0]; // DC value won't be dequantized for chroma
// inverse transform
WORD16 i_macro = ((q0 + 32) >> 6);
__m128i pred_r0, pred_r1, pred_r2, pred_r3, sign_reg;
__m128i zero_8x16b = _mm_setzero_si128(); // all bits reset to zero
__m128i chroma_mask = _mm_set1_epi16(0xFF);
__m128i value_add = _mm_set1_epi16(i_macro);
__m128i out_r0, out_r1, out_r2, out_r3;
UNUSED(pi2_src);
UNUSED(pu2_iscal_mat);
UNUSED(pu2_weigh_mat);
UNUSED(u4_qp_div_6);
UNUSED(pi2_tmp);
UNUSED(u1_res_accumulate);
UNUSED(i4_src_stride);
UNUSED(i4_res_stride);
UNUSED(i4_res_pred_stride);
UNUSED(pi2_res);
UNUSED(pi2_res_pred);
UNUSED(i4_iq_start_idx);
/* Implement residue accumulation */
ASSERT(0);
// Load pred buffer
pred_r0 = _mm_loadl_epi64((__m128i *) (&pu1_pred[0])); // p00 p01 p02 p03 0 0 0 0 0
// 0 0 0 -- all 8 bits
pred_r1 = _mm_loadl_epi64((__m128i *) (&pu1_pred[i4_pred_stride])); // p10 p11 p12 p13 0 0 0 0
// 0 0 0 0 -- all 8 bits
pred_r2 =
_mm_loadl_epi64((__m128i *) (&pu1_pred[2 * i4_pred_stride])); // p20 p21 p22 p23 0 0 0 0
// 0 0 0 0 -- all 8 bits
pred_r3 =
_mm_loadl_epi64((__m128i *) (&pu1_pred[3 * i4_pred_stride])); // p30 p31 p32 p33 0 0 0 0
// 0 0 0 0 -- all 8 bits
pred_r0 = _mm_and_si128(pred_r0, chroma_mask);
pred_r1 = _mm_and_si128(pred_r1, chroma_mask);
pred_r2 = _mm_and_si128(pred_r2, chroma_mask);
pred_r3 = _mm_and_si128(pred_r3, chroma_mask);
pred_r0 = _mm_unpacklo_epi64(pred_r0, pred_r1); // p00 p01 p02 p03 p10 p11 p12 p13
pred_r2 = _mm_unpacklo_epi64(pred_r2, pred_r3); // p20 p21 p22p p23 p30 p31 p32 p33
pred_r0 = _mm_add_epi16(value_add, pred_r0);
pred_r2 = _mm_add_epi16(value_add, pred_r2);
/*------------------------------------------------------------------*/
// Clipping the results to 8 bits
sign_reg = _mm_cmpgt_epi16(pred_r0, zero_8x16b); // sign check
pred_r0 = _mm_and_si128(pred_r0, sign_reg);
sign_reg = _mm_cmpgt_epi16(pred_r2, zero_8x16b);
pred_r2 = _mm_and_si128(pred_r2, sign_reg);
pred_r0 = _mm_packus_epi16(pred_r0, pred_r2);
pred_r1 = _mm_srli_si128(pred_r0, 4);
pred_r2 = _mm_srli_si128(pred_r1, 4);
pred_r3 = _mm_srli_si128(pred_r2, 4);
pred_r0 = _mm_unpacklo_epi8(pred_r0, zero_8x16b); // p00 p01 p02 p03 -- all 16 bits
pred_r1 = _mm_unpacklo_epi8(pred_r1, zero_8x16b); // p10 p11 p12 p13 -- all 16 bits
pred_r2 = _mm_unpacklo_epi8(pred_r2, zero_8x16b); // p20 p21 p22 p23 -- all 16 bits
pred_r3 = _mm_unpacklo_epi8(pred_r3, zero_8x16b); // p30 p31 p32 p33 -- all 16 bits
chroma_mask = _mm_set1_epi16(0xFF00);
out_r0 = _mm_loadl_epi64((__m128i *) (&pu1_out[0]));
out_r1 = _mm_loadl_epi64((__m128i *) (&pu1_out[i4_out_stride]));
out_r2 = _mm_loadl_epi64((__m128i *) (&pu1_out[2 * i4_out_stride]));
out_r3 = _mm_loadl_epi64((__m128i *) (&pu1_out[3 * i4_out_stride]));
out_r0 = _mm_and_si128(out_r0, chroma_mask);
out_r1 = _mm_and_si128(out_r1, chroma_mask);
out_r2 = _mm_and_si128(out_r2, chroma_mask);
out_r3 = _mm_and_si128(out_r3, chroma_mask);
out_r0 = _mm_add_epi8(out_r0, pred_r0);
out_r1 = _mm_add_epi8(out_r1, pred_r1);
out_r2 = _mm_add_epi8(out_r2, pred_r2);
out_r3 = _mm_add_epi8(out_r3, pred_r3);
_mm_storel_epi64((__m128i *) (&pu1_out[0]), out_r0);
_mm_storel_epi64((__m128i *) (&pu1_out[i4_out_stride]), out_r1);
_mm_storel_epi64((__m128i *) (&pu1_out[2 * i4_out_stride]), out_r2);
_mm_storel_epi64((__m128i *) (&pu1_out[3 * i4_out_stride]), out_r3);
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,157 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
* *******************************************************************************
* * @file
* isvc_mem_fns_sse42.c
*
* @brief
* SSE4.2 variants of
* functions used for memory operations
*
* *******************************************************************************
*/
#include <string.h>
#include <immintrin.h>
#include "ih264_typedefs.h"
#include "isvc_mem_fns.h"
void isvc_memset_2d_sse42(UWORD8 *pu1_dst, WORD32 i4_dst_stride, UWORD8 u1_val, WORD32 i4_blk_wd,
WORD32 i4_blk_ht)
{
WORD32 i, j;
if((i4_blk_wd == 4) && (i4_blk_ht == 4))
{
*((WORD32 *) (pu1_dst)) = _mm_cvtsi128_si32(_mm_set1_epi8(u1_val));
pu1_dst += i4_dst_stride;
*((WORD32 *) (pu1_dst)) = _mm_cvtsi128_si32(_mm_set1_epi8(u1_val));
pu1_dst += i4_dst_stride;
*((WORD32 *) (pu1_dst)) = _mm_cvtsi128_si32(_mm_set1_epi8(u1_val));
pu1_dst += i4_dst_stride;
*((WORD32 *) (pu1_dst)) = _mm_cvtsi128_si32(_mm_set1_epi8(u1_val));
}
else if((i4_blk_wd == 8) && (i4_blk_ht == 8))
{
_mm_storel_epi64((__m128i *) (&pu1_dst[0]), _mm_set1_epi8(u1_val));
pu1_dst += i4_dst_stride;
_mm_storel_epi64((__m128i *) (&pu1_dst[0]), _mm_set1_epi8(u1_val));
pu1_dst += i4_dst_stride;
_mm_storel_epi64((__m128i *) (&pu1_dst[0]), _mm_set1_epi8(u1_val));
pu1_dst += i4_dst_stride;
_mm_storel_epi64((__m128i *) (&pu1_dst[0]), _mm_set1_epi8(u1_val));
pu1_dst += i4_dst_stride;
_mm_storel_epi64((__m128i *) (&pu1_dst[0]), _mm_set1_epi8(u1_val));
pu1_dst += i4_dst_stride;
_mm_storel_epi64((__m128i *) (&pu1_dst[0]), _mm_set1_epi8(u1_val));
pu1_dst += i4_dst_stride;
_mm_storel_epi64((__m128i *) (&pu1_dst[0]), _mm_set1_epi8(u1_val));
pu1_dst += i4_dst_stride;
_mm_storel_epi64((__m128i *) (&pu1_dst[0]), _mm_set1_epi8(u1_val));
}
else if((i4_blk_wd % 16 == 0) && (i4_blk_ht % 16 == 0))
{
UWORD8 *pu1_dst_col_ptr, *pu1_dst_row_ptr;
WORD32 i4_width_by_16 = i4_blk_wd / 16;
WORD32 i4_height_by_16 = i4_blk_ht / 16;
for(i = 0; i < i4_height_by_16; i++)
{
pu1_dst_row_ptr = pu1_dst + i * 16 * i4_dst_stride;
for(j = 0; j < i4_width_by_16; j++)
{
pu1_dst_col_ptr = pu1_dst_row_ptr + (j << 4);
_mm_storeu_si128((__m128i *) (&pu1_dst_col_ptr[0]), _mm_set1_epi8(u1_val));
pu1_dst_col_ptr += i4_dst_stride;
_mm_storeu_si128((__m128i *) (&pu1_dst_col_ptr[0]), _mm_set1_epi8(u1_val));
pu1_dst_col_ptr += i4_dst_stride;
_mm_storeu_si128((__m128i *) (&pu1_dst_col_ptr[0]), _mm_set1_epi8(u1_val));
pu1_dst_col_ptr += i4_dst_stride;
_mm_storeu_si128((__m128i *) (&pu1_dst_col_ptr[0]), _mm_set1_epi8(u1_val));
pu1_dst_col_ptr += i4_dst_stride;
_mm_storeu_si128((__m128i *) (&pu1_dst_col_ptr[0]), _mm_set1_epi8(u1_val));
pu1_dst_col_ptr += i4_dst_stride;
_mm_storeu_si128((__m128i *) (&pu1_dst_col_ptr[0]), _mm_set1_epi8(u1_val));
pu1_dst_col_ptr += i4_dst_stride;
_mm_storeu_si128((__m128i *) (&pu1_dst_col_ptr[0]), _mm_set1_epi8(u1_val));
pu1_dst_col_ptr += i4_dst_stride;
_mm_storeu_si128((__m128i *) (&pu1_dst_col_ptr[0]), _mm_set1_epi8(u1_val));
pu1_dst_col_ptr += i4_dst_stride;
_mm_storeu_si128((__m128i *) (&pu1_dst_col_ptr[0]), _mm_set1_epi8(u1_val));
pu1_dst_col_ptr += i4_dst_stride;
_mm_storeu_si128((__m128i *) (&pu1_dst_col_ptr[0]), _mm_set1_epi8(u1_val));
pu1_dst_col_ptr += i4_dst_stride;
_mm_storeu_si128((__m128i *) (&pu1_dst_col_ptr[0]), _mm_set1_epi8(u1_val));
pu1_dst_col_ptr += i4_dst_stride;
_mm_storeu_si128((__m128i *) (&pu1_dst_col_ptr[0]), _mm_set1_epi8(u1_val));
pu1_dst_col_ptr += i4_dst_stride;
_mm_storeu_si128((__m128i *) (&pu1_dst_col_ptr[0]), _mm_set1_epi8(u1_val));
pu1_dst_col_ptr += i4_dst_stride;
_mm_storeu_si128((__m128i *) (&pu1_dst_col_ptr[0]), _mm_set1_epi8(u1_val));
pu1_dst_col_ptr += i4_dst_stride;
_mm_storeu_si128((__m128i *) (&pu1_dst_col_ptr[0]), _mm_set1_epi8(u1_val));
pu1_dst_col_ptr += i4_dst_stride;
_mm_storeu_si128((__m128i *) (&pu1_dst_col_ptr[0]), _mm_set1_epi8(u1_val));
}
}
}
else
{
for(i = 0; i < i4_blk_ht; i++)
{
memset(pu1_dst, u1_val, i4_blk_wd);
pu1_dst += i4_dst_stride;
}
}
}

View file

@ -0,0 +1,435 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
*******************************************************************************
* @file
* isvc_mem_fns_atom_intr.c
*
* @brief
* Functions used for memory operations
*
* @author
* Ittiam
*
* @par List of Functions:
*
* @remarks
* None
*
*******************************************************************************
*/
/*****************************************************************************/
/* File Includes */
/*****************************************************************************/
#include <stdio.h>
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include "ih264_typedefs.h"
#include "isvc_mem_fns.h"
#include <immintrin.h>
/**
********************************************************************************
* @brief copies a 2d blk from one location to another
*
* @param[out] pu1_dst : dst pointer
*
* @param[in] i4_dst_stride: stride of destination
*
* @param[in] pu1_src : src ptr
*
* @param[in] i4_src_stride: stride of src
*
* @param[in] i4_blk_wd : blk width
*
* @param[in] i4_blk_ht : blk height
*
* @return void
********************************************************************************
*/
void isvc_copy_2d_ssse3(UWORD8 *pu1_dst, WORD32 i4_dst_stride, UWORD8 *pu1_src,
WORD32 i4_src_stride, WORD32 i4_blk_wd, WORD32 i4_blk_ht)
{
WORD32 i, j;
/* all 128 bit registers are named with a suffix mxnb, where m is the */
/* number of n bits packed in the register */
if(0 == (i4_blk_wd & 31)) /* wd multiple of 32 case */
{
__m128i src0_16x8b, src1_16x8b, src2_16x8b, src3_16x8b;
__m128i src4_16x8b, src5_16x8b, src6_16x8b, src7_16x8b;
if(0 == (i4_blk_ht & 7)) /* ht multiple of 8 case */
{
__m128i src8_16x8b, src9_16x8b, src10_16x8b, src11_16x8b;
__m128i src12_16x8b, src13_16x8b, src14_16x8b, src15_16x8b;
for(i = 0; i < i4_blk_ht; i += 8)
{
for(j = 0; j < i4_blk_wd; j += 32)
{
src0_16x8b =
_mm_loadu_si128((__m128i *) (pu1_src)); // i = 0
src1_16x8b =
_mm_loadu_si128((__m128i *) (pu1_src + i4_src_stride)); // i = 1
src2_16x8b =
_mm_loadu_si128((__m128i *) (pu1_src + 2 * i4_src_stride)); // i = 2
src3_16x8b =
_mm_loadu_si128((__m128i *) (pu1_src + 3 * i4_src_stride)); // i = 3
src4_16x8b =
_mm_loadu_si128((__m128i *) (pu1_src + 4 * i4_src_stride)); // i = 4
src5_16x8b =
_mm_loadu_si128((__m128i *) (pu1_src + 5 * i4_src_stride)); // i = 5
src6_16x8b =
_mm_loadu_si128((__m128i *) (pu1_src + 6 * i4_src_stride)); // i = 6
src7_16x8b =
_mm_loadu_si128((__m128i *) (pu1_src + 7 * i4_src_stride)); // i = 7
/* Add 16 as offset */
src8_16x8b =
_mm_loadu_si128((__m128i *) (pu1_src + 16)); // i = 0
src9_16x8b =
_mm_loadu_si128((__m128i *) (pu1_src + i4_src_stride + 16)); // i = 1
src10_16x8b =
_mm_loadu_si128((__m128i *) (pu1_src + 2 * i4_src_stride + 16)); // i = 2
src11_16x8b =
_mm_loadu_si128((__m128i *) (pu1_src + 3 * i4_src_stride + 16)); // i = 3
src12_16x8b =
_mm_loadu_si128((__m128i *) (pu1_src + 4 * i4_src_stride + 16)); // i = 4
src13_16x8b =
_mm_loadu_si128((__m128i *) (pu1_src + 5 * i4_src_stride + 16)); // i = 5
src14_16x8b =
_mm_loadu_si128((__m128i *) (pu1_src + 6 * i4_src_stride + 16)); // i = 6
src15_16x8b =
_mm_loadu_si128((__m128i *) (pu1_src + 7 * i4_src_stride + 16)); // i = 7
_mm_storeu_si128((__m128i *) (pu1_dst), src0_16x8b);
_mm_storeu_si128((__m128i *) (pu1_dst + i4_dst_stride), src1_16x8b);
_mm_storeu_si128((__m128i *) (pu1_dst + 2 * i4_dst_stride), src2_16x8b);
_mm_storeu_si128((__m128i *) (pu1_dst + 3 * i4_dst_stride), src3_16x8b);
_mm_storeu_si128((__m128i *) (pu1_dst + 4 * i4_dst_stride), src4_16x8b);
_mm_storeu_si128((__m128i *) (pu1_dst + 5 * i4_dst_stride), src5_16x8b);
_mm_storeu_si128((__m128i *) (pu1_dst + 6 * i4_dst_stride), src6_16x8b);
_mm_storeu_si128((__m128i *) (pu1_dst + 7 * i4_dst_stride), src7_16x8b);
_mm_storeu_si128((__m128i *) (pu1_dst + 16), src8_16x8b);
_mm_storeu_si128((__m128i *) (pu1_dst + i4_dst_stride + 16), src9_16x8b);
_mm_storeu_si128((__m128i *) (pu1_dst + 2 * i4_dst_stride + 16), src10_16x8b);
_mm_storeu_si128((__m128i *) (pu1_dst + 3 * i4_dst_stride + 16), src11_16x8b);
_mm_storeu_si128((__m128i *) (pu1_dst + 4 * i4_dst_stride + 16), src12_16x8b);
_mm_storeu_si128((__m128i *) (pu1_dst + 5 * i4_dst_stride + 16), src13_16x8b);
_mm_storeu_si128((__m128i *) (pu1_dst + 6 * i4_dst_stride + 16), src14_16x8b);
_mm_storeu_si128((__m128i *) (pu1_dst + 7 * i4_dst_stride + 16), src15_16x8b);
pu1_src += 32;
pu1_dst += 32;
}
pu1_src = pu1_src - i4_blk_wd + 8 * i4_src_stride;
pu1_dst = pu1_dst - i4_blk_wd + 8 * i4_dst_stride;
}
}
else /* ht multiple of 4 case */
{
for(i = 0; i < i4_blk_ht; i += 4)
{
for(j = 0; j < i4_blk_wd; j += 32)
{
src0_16x8b =
_mm_loadu_si128((__m128i *) (pu1_src)); // i = 0
src1_16x8b =
_mm_loadu_si128((__m128i *) (pu1_src + i4_src_stride)); // i = 1
src2_16x8b =
_mm_loadu_si128((__m128i *) (pu1_src + 2 * i4_src_stride)); // i = 2
src3_16x8b =
_mm_loadu_si128((__m128i *) (pu1_src + 3 * i4_src_stride)); // i = 3
/* Add 16 as offset */
src4_16x8b =
_mm_loadu_si128((__m128i *) (pu1_src + 16)); // i = 0
src5_16x8b =
_mm_loadu_si128((__m128i *) (pu1_src + i4_src_stride + 16)); // i = 1
src6_16x8b =
_mm_loadu_si128((__m128i *) (pu1_src + 2 * i4_src_stride + 16)); // i = 2
src7_16x8b =
_mm_loadu_si128((__m128i *) (pu1_src + 3 * i4_src_stride + 16)); // i = 3
_mm_storeu_si128((__m128i *) (pu1_dst), src0_16x8b);
_mm_storeu_si128((__m128i *) (pu1_dst + i4_dst_stride), src1_16x8b);
_mm_storeu_si128((__m128i *) (pu1_dst + 2 * i4_dst_stride), src2_16x8b);
_mm_storeu_si128((__m128i *) (pu1_dst + 3 * i4_dst_stride), src3_16x8b);
_mm_storeu_si128((__m128i *) (pu1_dst + 16), src4_16x8b);
_mm_storeu_si128((__m128i *) (pu1_dst + i4_dst_stride + 16), src5_16x8b);
_mm_storeu_si128((__m128i *) (pu1_dst + 2 * i4_dst_stride + 16), src6_16x8b);
_mm_storeu_si128((__m128i *) (pu1_dst + 3 * i4_dst_stride + 16), src7_16x8b);
pu1_src += 32;
pu1_dst += 32;
}
pu1_src = pu1_src - i4_blk_wd + 4 * i4_src_stride;
pu1_dst = pu1_dst - i4_blk_wd + 4 * i4_dst_stride;
}
}
}
else if(0 == (i4_blk_wd & 15)) /* wd multiple of 16 case */
{
__m128i src0_16x8b, src1_16x8b, src2_16x8b, src3_16x8b;
if(0 == (i4_blk_ht & 7)) /* ht multiple of 8 case */
{
__m128i src4_16x8b, src5_16x8b, src6_16x8b, src7_16x8b;
for(i = 0; i < i4_blk_ht; i += 8)
{
for(j = 0; j < i4_blk_wd; j += 16)
{
src0_16x8b =
_mm_loadu_si128((__m128i *) (pu1_src + 0 * i4_src_stride)); // i = 0
src1_16x8b =
_mm_loadu_si128((__m128i *) (pu1_src + 1 * i4_src_stride)); // i = 1
src2_16x8b =
_mm_loadu_si128((__m128i *) (pu1_src + 2 * i4_src_stride)); // i = 2
src3_16x8b =
_mm_loadu_si128((__m128i *) (pu1_src + 3 * i4_src_stride)); // i = 3
src4_16x8b =
_mm_loadu_si128((__m128i *) (pu1_src + 4 * i4_src_stride)); // i = 4
src5_16x8b =
_mm_loadu_si128((__m128i *) (pu1_src + 5 * i4_src_stride)); // i = 5
src6_16x8b =
_mm_loadu_si128((__m128i *) (pu1_src + 6 * i4_src_stride)); // i = 6
src7_16x8b =
_mm_loadu_si128((__m128i *) (pu1_src + 7 * i4_src_stride)); // i = 7
_mm_storeu_si128((__m128i *) (pu1_dst + 0 * i4_dst_stride), src0_16x8b);
_mm_storeu_si128((__m128i *) (pu1_dst + 1 * i4_dst_stride), src1_16x8b);
_mm_storeu_si128((__m128i *) (pu1_dst + 2 * i4_dst_stride), src2_16x8b);
_mm_storeu_si128((__m128i *) (pu1_dst + 3 * i4_dst_stride), src3_16x8b);
_mm_storeu_si128((__m128i *) (pu1_dst + 4 * i4_dst_stride), src4_16x8b);
_mm_storeu_si128((__m128i *) (pu1_dst + 5 * i4_dst_stride), src5_16x8b);
_mm_storeu_si128((__m128i *) (pu1_dst + 6 * i4_dst_stride), src6_16x8b);
_mm_storeu_si128((__m128i *) (pu1_dst + 7 * i4_dst_stride), src7_16x8b);
pu1_src += 16;
pu1_dst += 16;
}
pu1_src = pu1_src - i4_blk_wd + 8 * i4_src_stride;
pu1_dst = pu1_dst - i4_blk_wd + 8 * i4_dst_stride;
}
}
else /* ht multiple of 4 case */
{
for(i = 0; i < i4_blk_ht; i += 4)
{
for(j = 0; j < i4_blk_wd; j += 16)
{
src0_16x8b =
_mm_loadu_si128((__m128i *) (pu1_src + 0 * i4_src_stride)); // i = 0
src1_16x8b =
_mm_loadu_si128((__m128i *) (pu1_src + 1 * i4_src_stride)); // i = 1
src2_16x8b =
_mm_loadu_si128((__m128i *) (pu1_src + 2 * i4_src_stride)); // i = 2
src3_16x8b =
_mm_loadu_si128((__m128i *) (pu1_src + 3 * i4_src_stride)); // i = 3
_mm_storeu_si128((__m128i *) (pu1_dst + 0 * i4_dst_stride), src0_16x8b);
_mm_storeu_si128((__m128i *) (pu1_dst + 1 * i4_dst_stride), src1_16x8b);
_mm_storeu_si128((__m128i *) (pu1_dst + 2 * i4_dst_stride), src2_16x8b);
_mm_storeu_si128((__m128i *) (pu1_dst + 3 * i4_dst_stride), src3_16x8b);
pu1_src += 16;
pu1_dst += 16;
}
pu1_src = pu1_src - i4_blk_wd + 4 * i4_src_stride;
pu1_dst = pu1_dst - i4_blk_wd + 4 * i4_dst_stride;
}
}
}
else if(0 == (i4_blk_wd & 7)) /* wd multiple of 8 case */
{
__m128i src0_16x8b, src1_16x8b, src2_16x8b, src3_16x8b;
if(0 == (i4_blk_ht & 7)) /* ht multiple of 8 case */
{
__m128i src4_16x8b, src5_16x8b, src6_16x8b, src7_16x8b;
for(i = 0; i < i4_blk_ht; i += 8)
{
for(j = 0; j < i4_blk_wd; j += 8)
{
src0_16x8b =
_mm_loadl_epi64((__m128i *) (pu1_src + 0 * i4_src_stride)); // i = 0
src1_16x8b =
_mm_loadl_epi64((__m128i *) (pu1_src + 1 * i4_src_stride)); // i = 1
src2_16x8b =
_mm_loadl_epi64((__m128i *) (pu1_src + 2 * i4_src_stride)); // i = 2
src3_16x8b =
_mm_loadl_epi64((__m128i *) (pu1_src + 3 * i4_src_stride)); // i = 3
src4_16x8b =
_mm_loadl_epi64((__m128i *) (pu1_src + 4 * i4_src_stride)); // i = 4
src5_16x8b =
_mm_loadl_epi64((__m128i *) (pu1_src + 5 * i4_src_stride)); // i = 5
src6_16x8b =
_mm_loadl_epi64((__m128i *) (pu1_src + 6 * i4_src_stride)); // i = 6
src7_16x8b =
_mm_loadl_epi64((__m128i *) (pu1_src + 7 * i4_src_stride)); // i = 7
_mm_storel_epi64((__m128i *) (pu1_dst + 0 * i4_dst_stride), src0_16x8b);
_mm_storel_epi64((__m128i *) (pu1_dst + 1 * i4_dst_stride), src1_16x8b);
_mm_storel_epi64((__m128i *) (pu1_dst + 2 * i4_dst_stride), src2_16x8b);
_mm_storel_epi64((__m128i *) (pu1_dst + 3 * i4_dst_stride), src3_16x8b);
_mm_storel_epi64((__m128i *) (pu1_dst + 4 * i4_dst_stride), src4_16x8b);
_mm_storel_epi64((__m128i *) (pu1_dst + 5 * i4_dst_stride), src5_16x8b);
_mm_storel_epi64((__m128i *) (pu1_dst + 6 * i4_dst_stride), src6_16x8b);
_mm_storel_epi64((__m128i *) (pu1_dst + 7 * i4_dst_stride), src7_16x8b);
pu1_src += 8;
pu1_dst += 8;
}
pu1_src = pu1_src - i4_blk_wd + 8 * i4_src_stride;
pu1_dst = pu1_dst - i4_blk_wd + 8 * i4_dst_stride;
}
}
else /* ht multiple of 4 case */
{
for(i = 0; i < i4_blk_ht; i += 4)
{
for(j = 0; j < i4_blk_wd; j += 8)
{
src0_16x8b =
_mm_loadl_epi64((__m128i *) (pu1_src + 0 * i4_src_stride)); // i = 0
src1_16x8b =
_mm_loadl_epi64((__m128i *) (pu1_src + 1 * i4_src_stride)); // i = 1
src2_16x8b =
_mm_loadl_epi64((__m128i *) (pu1_src + 2 * i4_src_stride)); // i = 2
src3_16x8b =
_mm_loadl_epi64((__m128i *) (pu1_src + 3 * i4_src_stride)); // i = 3
_mm_storel_epi64((__m128i *) (pu1_dst + 0 * i4_dst_stride), src0_16x8b);
_mm_storel_epi64((__m128i *) (pu1_dst + 1 * i4_dst_stride), src1_16x8b);
_mm_storel_epi64((__m128i *) (pu1_dst + 2 * i4_dst_stride), src2_16x8b);
_mm_storel_epi64((__m128i *) (pu1_dst + 3 * i4_dst_stride), src3_16x8b);
pu1_src += 8;
pu1_dst += 8;
}
pu1_src = pu1_src - i4_blk_wd + 4 * i4_src_stride;
pu1_dst = pu1_dst - i4_blk_wd + 4 * i4_dst_stride;
}
}
}
else /* wd multiple of 4 case */
{
__m128i src0_16x8b, src1_16x8b, src2_16x8b, src3_16x8b;
WORD32 src0, src1, src2, src3;
if(0 == (i4_blk_ht & 7)) /* ht multiple of 8 case */
{
__m128i src4_16x8b, src5_16x8b, src6_16x8b, src7_16x8b;
WORD32 src4, src5, src6, src7;
for(i = 0; i < i4_blk_ht; i += 8)
{
for(j = 0; j < i4_blk_wd; j += 4)
{
src0_16x8b =
_mm_loadl_epi64((__m128i *) (pu1_src + 0 * i4_src_stride)); // i = 0
src1_16x8b =
_mm_loadl_epi64((__m128i *) (pu1_src + 1 * i4_src_stride)); // i = 1
src2_16x8b =
_mm_loadl_epi64((__m128i *) (pu1_src + 2 * i4_src_stride)); // i = 2
src3_16x8b =
_mm_loadl_epi64((__m128i *) (pu1_src + 3 * i4_src_stride)); // i = 3
src4_16x8b =
_mm_loadl_epi64((__m128i *) (pu1_src + 4 * i4_src_stride)); // i = 4
src5_16x8b =
_mm_loadl_epi64((__m128i *) (pu1_src + 5 * i4_src_stride)); // i = 5
src6_16x8b =
_mm_loadl_epi64((__m128i *) (pu1_src + 6 * i4_src_stride)); // i = 6
src7_16x8b =
_mm_loadl_epi64((__m128i *) (pu1_src + 7 * i4_src_stride)); // i = 7
src0 = _mm_cvtsi128_si32(src0_16x8b);
src1 = _mm_cvtsi128_si32(src1_16x8b);
src2 = _mm_cvtsi128_si32(src2_16x8b);
src3 = _mm_cvtsi128_si32(src3_16x8b);
src4 = _mm_cvtsi128_si32(src4_16x8b);
src5 = _mm_cvtsi128_si32(src5_16x8b);
src6 = _mm_cvtsi128_si32(src6_16x8b);
src7 = _mm_cvtsi128_si32(src7_16x8b);
*(WORD32 *) (&pu1_dst[0 * i4_dst_stride]) = src0;
*(WORD32 *) (&pu1_dst[1 * i4_dst_stride]) = src1;
*(WORD32 *) (&pu1_dst[2 * i4_dst_stride]) = src2;
*(WORD32 *) (&pu1_dst[3 * i4_dst_stride]) = src3;
*(WORD32 *) (&pu1_dst[4 * i4_dst_stride]) = src4;
*(WORD32 *) (&pu1_dst[5 * i4_dst_stride]) = src5;
*(WORD32 *) (&pu1_dst[6 * i4_dst_stride]) = src6;
*(WORD32 *) (&pu1_dst[7 * i4_dst_stride]) = src7;
pu1_src += 4;
pu1_dst += 4;
}
pu1_src = pu1_src - i4_blk_wd + 8 * i4_src_stride;
pu1_dst = pu1_dst - i4_blk_wd + 8 * i4_dst_stride;
}
}
else /* ht multiple of 4 case */
{
for(i = 0; i < i4_blk_ht; i += 4)
{
for(j = 0; j < i4_blk_wd; j += 4)
{
src0_16x8b =
_mm_loadl_epi64((__m128i *) (pu1_src + 0 * i4_src_stride)); // i = 0
src1_16x8b =
_mm_loadl_epi64((__m128i *) (pu1_src + 1 * i4_src_stride)); // i = 1
src2_16x8b =
_mm_loadl_epi64((__m128i *) (pu1_src + 2 * i4_src_stride)); // i = 2
src3_16x8b =
_mm_loadl_epi64((__m128i *) (pu1_src + 3 * i4_src_stride)); // i = 3
src0 = _mm_cvtsi128_si32(src0_16x8b);
src1 = _mm_cvtsi128_si32(src1_16x8b);
src2 = _mm_cvtsi128_si32(src2_16x8b);
src3 = _mm_cvtsi128_si32(src3_16x8b);
*(WORD32 *) (&pu1_dst[0 * i4_dst_stride]) = src0;
*(WORD32 *) (&pu1_dst[1 * i4_dst_stride]) = src1;
*(WORD32 *) (&pu1_dst[2 * i4_dst_stride]) = src2;
*(WORD32 *) (&pu1_dst[3 * i4_dst_stride]) = src3;
pu1_src += 4;
pu1_dst += 4;
}
pu1_src = pu1_src - i4_blk_wd + 4 * i4_src_stride;
pu1_dst = pu1_dst - i4_blk_wd + 4 * i4_dst_stride;
}
}
}
}

View file

@ -0,0 +1,294 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
*******************************************************************************
* @file
* ih264_padding_atom_intr.c
*
* @brief
* Contains function definitions for Padding
*
* @author
* Srinivas T
*
* @par List of Functions:
* - isvc_pad_left_luma_ssse3()
* - isvc_pad_left_chroma_ssse3()
* - isvc_pad_right_luma_ssse3()
* - isvc_pad_right_chroma_ssse3()
*
* @remarks
* None
*
*******************************************************************************
*/
#include <string.h>
#include <assert.h>
#include "ih264_typedefs.h"
#include "ih264_platform_macros.h"
#include "isvc_mem_fns.h"
#include "ih264_debug.h"
#include <immintrin.h>
/**
*******************************************************************************
*
* @brief
* Padding (luma block) at the left of a 2d array
*
* @par Description:
* The left column of a 2d array is replicated for pad_size times at the left
*
*
* @param[in] pu1_src
* UWORD8 pointer to the source
*
* @param[in] src_strd
* integer source stride
*
* @param[in] ht
* integer height of the array
*
* @param[in] wd
* integer width of the array
*
* @param[in] pad_size
* integer -padding size of the array
*
* @param[in] ht
* integer height of the array
*
* @param[in] wd
* integer width of the array
*
* @returns
*
* @remarks
* None
*
*******************************************************************************
*/
void isvc_pad_left_luma_ssse3(UWORD8 *pu1_src, WORD32 src_strd, WORD32 ht, WORD32 pad_size)
{
WORD32 row;
WORD32 i;
UWORD8 *pu1_dst;
ASSERT(pad_size % 8 == 0);
for(row = 0; row < ht; row++)
{
__m128i src_temp0_16x8b;
pu1_dst = pu1_src - pad_size;
src_temp0_16x8b = _mm_set1_epi8(*pu1_src);
for(i = 0; i < pad_size; i += 8)
{
_mm_storel_epi64((__m128i *) (pu1_dst + i), src_temp0_16x8b);
}
pu1_src += src_strd;
}
}
/**
*******************************************************************************
*
* @brief
* Padding (chroma block) at the left of a 2d array
*
* @par Description:
* The left column of a 2d array is replicated for pad_size times at the left
*
*
* @param[in] pu1_src
* UWORD8 pointer to the source
*
* @param[in] src_strd
* integer source stride
*
* @param[in] ht
* integer height of the array
*
* @param[in] wd
* integer width of the array (each colour component)
*
* @param[in] pad_size
* integer -padding size of the array
*
* @param[in] ht
* integer height of the array
*
* @param[in] wd
* integer width of the array
*
* @returns
*
* @remarks
* None
*
*******************************************************************************
*/
void isvc_pad_left_chroma_ssse3(UWORD8 *pu1_src, WORD32 src_strd, WORD32 ht, WORD32 pad_size)
{
WORD32 row;
WORD32 col;
UWORD8 *pu1_dst;
ASSERT(pad_size % 8 == 0);
for(row = 0; row < ht; row++)
{
__m128i src_temp0_16x8b;
pu1_dst = pu1_src - pad_size;
src_temp0_16x8b = _mm_set1_epi16(*((UWORD16 *) pu1_src));
for(col = 0; col < pad_size; col += 8)
{
_mm_storel_epi64((__m128i *) (pu1_dst + col), src_temp0_16x8b);
}
pu1_src += src_strd;
}
}
/**
*******************************************************************************
*
* @brief
* Padding (luma block) at the right of a 2d array
*
* @par Description:
* The right column of a 2d array is replicated for pad_size times at the right
*
*
* @param[in] pu1_src
* UWORD8 pointer to the source
*
* @param[in] src_strd
* integer source stride
*
* @param[in] ht
* integer height of the array
*
* @param[in] wd
* integer width of the array
*
* @param[in] pad_size
* integer -padding size of the array
*
* @param[in] ht
* integer height of the array
*
* @param[in] wd
* integer width of the array
*
* @returns
*
* @remarks
* None
*
*******************************************************************************
*/
void isvc_pad_right_luma_ssse3(UWORD8 *pu1_src, WORD32 src_strd, WORD32 ht, WORD32 pad_size)
{
WORD32 row;
WORD32 col;
UWORD8 *pu1_dst;
ASSERT(pad_size % 8 == 0);
for(row = 0; row < ht; row++)
{
__m128i src_temp0_16x8b;
pu1_dst = pu1_src;
src_temp0_16x8b = _mm_set1_epi8(*(pu1_src - 1));
for(col = 0; col < pad_size; col += 8)
{
_mm_storel_epi64((__m128i *) (pu1_dst + col), src_temp0_16x8b);
}
pu1_src += src_strd;
}
}
/**
*******************************************************************************
*
* @brief
* Padding (chroma block) at the right of a 2d array
*
* @par Description:
* The right column of a 2d array is replicated for pad_size times at the right
*
*
* @param[in] pu1_src
* UWORD8 pointer to the source
*
* @param[in] src_strd
* integer source stride
*
* @param[in] ht
* integer height of the array
*
* @param[in] wd
* integer width of the array (each colour component)
*
* @param[in] pad_size
* integer -padding size of the array
*
* @param[in] ht
* integer height of the array
*
* @param[in] wd
* integer width of the array
*
* @returns
*
* @remarks
* None
*
*******************************************************************************
*/
void isvc_pad_right_chroma_ssse3(UWORD8 *pu1_src, WORD32 src_strd, WORD32 ht, WORD32 pad_size)
{
WORD32 row;
WORD32 col;
UWORD8 *pu1_dst;
ASSERT(pad_size % 8 == 0);
for(row = 0; row < ht; row++)
{
__m128i src_temp0_16x8b;
pu1_dst = pu1_src;
src_temp0_16x8b = _mm_set1_epi16(*((UWORD16 *) (pu1_src - 2)));
for(col = 0; col < pad_size; col += 8)
{
_mm_storel_epi64((__m128i *) (pu1_dst + col), src_temp0_16x8b);
}
pu1_src += src_strd;
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,927 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
******************************************************************************
* @file ih264e_downscaler_neon.c
*
* @brief
* This file contains the ARMV8 SIMD version of the function which does
* horizontal scaling and transpose
*
* @author
* Ittiam
*
* @par List of Functions:
* - ih264e_horizontal_downscale_and_transpose_av8()
*
* @remarks
* None
*
*******************************************************************************
*/
/*****************************************************************************/
/* File Includes */
/*****************************************************************************/
/* System include files */
#include <stdio.h>
#include <stdlib.h>
#include <arm_neon.h>
/* User include files */
#include "ih264_typedefs.h"
#include "ih264_macros.h"
#include "ih264_platform_macros.h"
#include "isvc_defs.h"
#include "isvce_defs.h"
#include "isvc_structs.h"
#include "isvce_downscaler_private_defs.h"
void isvce_horizontal_downscale_and_transpose_neon(
downscaler_ctxt_t *ps_scaler, buffer_container_t *ps_src, buffer_container_t *ps_dst,
FILTER_COEFF_ARRAY pai1_filters, UWORD32 u4_blk_wd, UWORD32 u4_blk_ht, UWORD8 u1_is_chroma)
{
WORD32 i, j;
UWORD8 u1_phase;
UWORD8 *pu1_src_j, *pu1_dst_j;
UWORD8 *pu1_in_pixel;
UWORD8 *pu1_out_pixel;
WORD8 *pi1_filter_grid;
UWORD16 u2_full_pixel_inc;
UWORD32 u4_num_iterations_vertical_by_16, u4_num_iterations_vertical_by_8;
UWORD32 u4_rem_vert_loop_by_8, u4_rem_vert_loop_by_4;
UWORD32 u4_rem_vert_loop;
UWORD32 u4_height_finished;
uint8x8_t reg_8x8_src_r0, reg_8x8_src_r1, reg_8x8_src_r2, reg_8x8_src_r3, reg_8x8_src_r4,
reg_8x8_src_r5, reg_8x8_src_r6, reg_8x8_src_r7;
uint16x8_t reg_16x8_src_r0, reg_16x8_src_r1, reg_16x8_src_r2, reg_16x8_src_r3, reg_16x8_src_r4,
reg_16x8_src_r5, reg_16x8_src_r6, reg_16x8_src_r7;
int16x8_t reg_16x8_mul_r0, reg_16x8_mul_r1, reg_16x8_mul_r2, reg_16x8_mul_r3, reg_16x8_mul_r4,
reg_16x8_mul_r5, reg_16x8_mul_r6, reg_16x8_mul_r7;
int32x4_t reg_32x4_sum_r0, reg_32x4_sum_r1, reg_32x4_sum_r2, reg_32x4_sum_r3, reg_32x4_sum_r4,
reg_32x4_sum_r5, reg_32x4_sum_r6, reg_32x4_sum_r7;
int32x4_t reg_32x4_sum_r01, reg_32x4_sum_r23, reg_32x4_sum_r45, reg_32x4_sum_r67,
reg_32x4_sum_r89, reg_32x4_sum_r1011, reg_32x4_sum_r1213, reg_32x4_sum_r1415;
uint8x8_t reg_8x8_src_r8, reg_8x8_src_r9, reg_8x8_src_r10, reg_8x8_src_r11, reg_8x8_src_r12,
reg_8x8_src_r13, reg_8x8_src_r14, reg_8x8_src_r15;
uint16x8_t reg_16x8_src_r8, reg_16x8_src_r9, reg_16x8_src_r10, reg_16x8_src_r11,
reg_16x8_src_r12, reg_16x8_src_r13, reg_16x8_src_r14, reg_16x8_src_r15;
int16x8_t reg_16x8_mul_r8, reg_16x8_mul_r9, reg_16x8_mul_r10, reg_16x8_mul_r11,
reg_16x8_mul_r12, reg_16x8_mul_r13, reg_16x8_mul_r14, reg_16x8_mul_r15;
int32x4_t reg_32x4_sum_r8, reg_32x4_sum_r9, reg_32x4_sum_r10, reg_32x4_sum_r11,
reg_32x4_sum_r12, reg_32x4_sum_r13, reg_32x4_sum_r14, reg_32x4_sum_r15;
uint8x16_t reg_8x16_src_r0, reg_8x16_src_r1, reg_8x16_src_r2, reg_8x16_src_r3, reg_8x16_src_r4,
reg_8x16_src_r5, reg_8x16_src_r6, reg_8x16_src_r7;
uint16x8_t reg_16x8_src_cb_r0, reg_16x8_src_cb_r1, reg_16x8_src_cb_r2, reg_16x8_src_cb_r3,
reg_16x8_src_cb_r4, reg_16x8_src_cb_r5, reg_16x8_src_cb_r6, reg_16x8_src_cb_r7;
uint16x8_t reg_16x8_src_cr_r0, reg_16x8_src_cr_r1, reg_16x8_src_cr_r2, reg_16x8_src_cr_r3,
reg_16x8_src_cr_r4, reg_16x8_src_cr_r5, reg_16x8_src_cr_r6, reg_16x8_src_cr_r7;
int16x8_t reg_16x8_mul_cb_r0, reg_16x8_mul_cb_r1, reg_16x8_mul_cb_r2, reg_16x8_mul_cb_r3,
reg_16x8_mul_cb_r4, reg_16x8_mul_cb_r5, reg_16x8_mul_cb_r6, reg_16x8_mul_cb_r7;
int16x8_t reg_16x8_mul_cr_r0, reg_16x8_mul_cr_r1, reg_16x8_mul_cr_r2, reg_16x8_mul_cr_r3,
reg_16x8_mul_cr_r4, reg_16x8_mul_cr_r5, reg_16x8_mul_cr_r6, reg_16x8_mul_cr_r7;
int32x4_t reg_32x4_sum_cb_r0, reg_32x4_sum_cb_r1, reg_32x4_sum_cb_r2, reg_32x4_sum_cb_r3,
reg_32x4_sum_cb_r4, reg_32x4_sum_cb_r5, reg_32x4_sum_cb_r6, reg_32x4_sum_cb_r7;
int32x4_t reg_32x4_sum_cr_r0, reg_32x4_sum_cr_r1, reg_32x4_sum_cr_r2, reg_32x4_sum_cr_r3,
reg_32x4_sum_cr_r4, reg_32x4_sum_cr_r5, reg_32x4_sum_cr_r6, reg_32x4_sum_cr_r7;
int32x4_t reg_32x4_sum_cb_r01, reg_32x4_sum_cb_r23, reg_32x4_sum_cb_r45, reg_32x4_sum_cb_r67;
uint16x4_t reg_16x4_sum_cb_r01_23, reg_16x4_sum_cb_r45_67;
uint16x8_t reg_16x8_sum_cb_r0_r7;
uint8x8_t reg_8x8_sum_cb_r0_r7;
int32x4_t reg_32x4_sum_cr_r01, reg_32x4_sum_cr_r23, reg_32x4_sum_cr_r45, reg_32x4_sum_cr_r67;
uint16x4_t reg_16x4_sum_cr_r01_23, reg_16x4_sum_cr_r45_67;
uint16x8_t reg_16x8_sum_cr_r0_r7;
uint8x8_t reg_8x8_sum_cr_r0_r7;
uint16x8_t reg_16x8_sum_cb_cr_r0_r3;
uint8x8_t reg_8x8_sum_cb_cr_r0_r3;
int32x4_t reg_32x4_sum_cb_cr_r0;
uint16x4_t reg_16x4_sum_cb_cr_r0;
int32x4_t reg_32x4_zero = vdupq_n_s32(0);
uint16x4_t reg_16x4_sum_r01_23, reg_16x4_sum_r45_67;
uint16x4_t reg_16x4_sum_r8_r11, reg_16x4_sum_r12_r15;
uint16x8_t reg_16x8_sum_r0_r7, reg_16x8_sum_r8_r15;
uint8x8_t reg_8x8_sum_r0_r7, reg_8x8_sum_r8_r15;
uint8x16_t reg_8x16_sum_r0_r15;
int8x8_t reg_8x8_filt_coeff_grid;
int16x8_t reg_16x8_filt_coeff_grid;
int32x4x2_t reg_32x4x2_sum_r01, reg_32x4x2_sum_r23, reg_32x4x2_sum_r45, reg_32x4x2_sum_r67;
int32x4x2_t reg_32x4x2_sum_r89, reg_32x4x2_sum_r1011, reg_32x4x2_sum_r1213,
reg_32x4x2_sum_r1415;
uint8x16x2_t reg_8x16x2_src_r0, reg_8x16x2_src_r1, reg_8x16x2_src_r2, reg_8x16x2_src_r3;
downscaler_state_t *ps_scaler_state = (downscaler_state_t *) ps_scaler->pv_scaler_state;
UWORD32 u4_center_pixel_pos = ps_scaler_state->i4_init_offset;
UWORD32 u4_src_vert_increments = ps_scaler_state->u4_vert_increment;
UWORD32 u4_src_horz_increments = ps_scaler_state->u4_horz_increment;
UWORD8 *pu1_src = (UWORD8 *) ps_src->pv_data;
UWORD32 u4_in_stride = ps_src->i4_data_stride;
UWORD8 *pu1_dst = (UWORD8 *) ps_dst->pv_data;
UWORD32 u4_out_stride = ps_dst->i4_data_stride;
UWORD32 u4_center_pixel_pos_src = u4_center_pixel_pos;
/* Offset the input so that the input pixel to be processed
co-incides with the centre of filter (4th coefficient)*/
pu1_src += (1 + u1_is_chroma);
ASSERT((1 << DOWNSCALER_Q) == u4_src_vert_increments);
if(!u1_is_chroma)
{
u4_num_iterations_vertical_by_16 = u4_blk_ht >> 4;
u4_rem_vert_loop = u4_blk_ht % 16;
for(j = 0; j < (WORD32) u4_num_iterations_vertical_by_16; j++)
{
pu1_src_j = pu1_src + ((j << 4) * u4_in_stride);
pu1_dst_j = pu1_dst + (j << 4);
u4_center_pixel_pos = u4_center_pixel_pos_src;
for(i = 0; i < (WORD32) u4_blk_wd; i++)
{
u1_phase = get_filter_phase(u4_center_pixel_pos);
pi1_filter_grid = pai1_filters[u1_phase];
/* Doing the Calculation for current Loop Count */
u2_full_pixel_inc = u4_center_pixel_pos >> DOWNSCALER_Q;
pu1_in_pixel = pu1_src_j + (u2_full_pixel_inc << u1_is_chroma);
pu1_out_pixel = pu1_dst_j + ((i << u1_is_chroma) * u4_out_stride);
reg_8x8_filt_coeff_grid = vld1_s8(pi1_filter_grid);
/******************************************************/
/* This loop is going vertically in bottom direction */
/* but the output pixels are stored in horizontal */
/* direction in transpose manner */
/******************************************************/
/* r0-r7 */
reg_8x8_src_r0 = vld1_u8(pu1_in_pixel);
reg_8x8_src_r1 = vld1_u8(pu1_in_pixel + u4_in_stride);
reg_8x8_src_r2 = vld1_u8(pu1_in_pixel + 2 * u4_in_stride);
reg_8x8_src_r3 = vld1_u8(pu1_in_pixel + 3 * u4_in_stride);
reg_8x8_src_r4 = vld1_u8(pu1_in_pixel + 4 * u4_in_stride);
reg_8x8_src_r5 = vld1_u8(pu1_in_pixel + 5 * u4_in_stride);
reg_8x8_src_r6 = vld1_u8(pu1_in_pixel + 6 * u4_in_stride);
reg_8x8_src_r7 = vld1_u8(pu1_in_pixel + 7 * u4_in_stride);
/* r0-r7 */
reg_16x8_src_r0 = vmovl_u8(reg_8x8_src_r0);
reg_16x8_src_r1 = vmovl_u8(reg_8x8_src_r1);
reg_16x8_src_r2 = vmovl_u8(reg_8x8_src_r2);
reg_16x8_src_r3 = vmovl_u8(reg_8x8_src_r3);
reg_16x8_src_r4 = vmovl_u8(reg_8x8_src_r4);
reg_16x8_src_r5 = vmovl_u8(reg_8x8_src_r5);
reg_16x8_src_r6 = vmovl_u8(reg_8x8_src_r6);
reg_16x8_src_r7 = vmovl_u8(reg_8x8_src_r7);
/* r8-r15 */
reg_8x8_src_r8 = vld1_u8(pu1_in_pixel + 8 * u4_in_stride);
reg_8x8_src_r9 = vld1_u8(pu1_in_pixel + 9 * u4_in_stride);
reg_8x8_src_r10 = vld1_u8(pu1_in_pixel + 10 * u4_in_stride);
reg_8x8_src_r11 = vld1_u8(pu1_in_pixel + 11 * u4_in_stride);
reg_8x8_src_r12 = vld1_u8(pu1_in_pixel + 12 * u4_in_stride);
reg_8x8_src_r13 = vld1_u8(pu1_in_pixel + 13 * u4_in_stride);
reg_8x8_src_r14 = vld1_u8(pu1_in_pixel + 14 * u4_in_stride);
reg_8x8_src_r15 = vld1_u8(pu1_in_pixel + 15 * u4_in_stride);
reg_16x8_filt_coeff_grid = vmovl_s8(reg_8x8_filt_coeff_grid);
/*r0-r7 */
reg_16x8_mul_r0 =
vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r0), reg_16x8_filt_coeff_grid);
reg_16x8_mul_r1 =
vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r1), reg_16x8_filt_coeff_grid);
reg_16x8_mul_r2 =
vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r2), reg_16x8_filt_coeff_grid);
reg_16x8_mul_r3 =
vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r3), reg_16x8_filt_coeff_grid);
reg_16x8_mul_r4 =
vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r4), reg_16x8_filt_coeff_grid);
reg_16x8_mul_r5 =
vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r5), reg_16x8_filt_coeff_grid);
reg_16x8_mul_r6 =
vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r6), reg_16x8_filt_coeff_grid);
reg_16x8_mul_r7 =
vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r7), reg_16x8_filt_coeff_grid);
/* r8-r15 */
reg_16x8_src_r8 = vmovl_u8(reg_8x8_src_r8);
reg_16x8_src_r9 = vmovl_u8(reg_8x8_src_r9);
reg_16x8_src_r10 = vmovl_u8(reg_8x8_src_r10);
reg_16x8_src_r11 = vmovl_u8(reg_8x8_src_r11);
reg_16x8_src_r12 = vmovl_u8(reg_8x8_src_r12);
reg_16x8_src_r13 = vmovl_u8(reg_8x8_src_r13);
reg_16x8_src_r14 = vmovl_u8(reg_8x8_src_r14);
reg_16x8_src_r15 = vmovl_u8(reg_8x8_src_r15);
/* r0-r7 */
reg_32x4_sum_r0 = vpaddlq_s16(reg_16x8_mul_r0);
reg_32x4_sum_r1 = vpaddlq_s16(reg_16x8_mul_r1);
reg_32x4_sum_r2 = vpaddlq_s16(reg_16x8_mul_r2);
reg_32x4_sum_r3 = vpaddlq_s16(reg_16x8_mul_r3);
reg_32x4_sum_r4 = vpaddlq_s16(reg_16x8_mul_r4);
reg_32x4_sum_r5 = vpaddlq_s16(reg_16x8_mul_r5);
reg_32x4_sum_r6 = vpaddlq_s16(reg_16x8_mul_r6);
reg_32x4_sum_r7 = vpaddlq_s16(reg_16x8_mul_r7);
/* r8-r15 */
reg_16x8_mul_r8 =
vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r8), reg_16x8_filt_coeff_grid);
reg_16x8_mul_r9 =
vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r9), reg_16x8_filt_coeff_grid);
reg_16x8_mul_r10 =
vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r10), reg_16x8_filt_coeff_grid);
reg_16x8_mul_r11 =
vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r11), reg_16x8_filt_coeff_grid);
reg_16x8_mul_r12 =
vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r12), reg_16x8_filt_coeff_grid);
reg_16x8_mul_r13 =
vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r13), reg_16x8_filt_coeff_grid);
reg_16x8_mul_r14 =
vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r14), reg_16x8_filt_coeff_grid);
reg_16x8_mul_r15 =
vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r15), reg_16x8_filt_coeff_grid);
/* r0-r7 */
reg_32x4x2_sum_r01 = vuzpq_s32(reg_32x4_sum_r0, reg_32x4_sum_r1);
reg_32x4x2_sum_r23 = vuzpq_s32(reg_32x4_sum_r2, reg_32x4_sum_r3);
reg_32x4x2_sum_r45 = vuzpq_s32(reg_32x4_sum_r4, reg_32x4_sum_r5);
reg_32x4x2_sum_r67 = vuzpq_s32(reg_32x4_sum_r6, reg_32x4_sum_r7);
reg_32x4_sum_r01 = vaddq_s32(reg_32x4x2_sum_r01.val[0], reg_32x4x2_sum_r01.val[1]);
reg_32x4_sum_r23 = vaddq_s32(reg_32x4x2_sum_r23.val[0], reg_32x4x2_sum_r23.val[1]);
reg_32x4_sum_r45 = vaddq_s32(reg_32x4x2_sum_r45.val[0], reg_32x4x2_sum_r45.val[1]);
reg_32x4_sum_r67 = vaddq_s32(reg_32x4x2_sum_r67.val[0], reg_32x4x2_sum_r67.val[1]);
/* r8-r15 */
reg_32x4_sum_r8 = vpaddlq_s16(reg_16x8_mul_r8);
reg_32x4_sum_r9 = vpaddlq_s16(reg_16x8_mul_r9);
reg_32x4_sum_r10 = vpaddlq_s16(reg_16x8_mul_r10);
reg_32x4_sum_r11 = vpaddlq_s16(reg_16x8_mul_r11);
reg_32x4_sum_r12 = vpaddlq_s16(reg_16x8_mul_r12);
reg_32x4_sum_r13 = vpaddlq_s16(reg_16x8_mul_r13);
reg_32x4_sum_r14 = vpaddlq_s16(reg_16x8_mul_r14);
reg_32x4_sum_r15 = vpaddlq_s16(reg_16x8_mul_r15);
/* r0-r7 */
reg_32x4x2_sum_r01 = vuzpq_s32(reg_32x4_sum_r01, reg_32x4_sum_r23);
reg_32x4x2_sum_r45 = vuzpq_s32(reg_32x4_sum_r45, reg_32x4_sum_r67);
reg_32x4_sum_r01 = vaddq_s32(reg_32x4x2_sum_r01.val[0], reg_32x4x2_sum_r01.val[1]);
reg_32x4_sum_r45 = vaddq_s32(reg_32x4x2_sum_r45.val[0], reg_32x4x2_sum_r45.val[1]);
/* r8-r15 */
reg_32x4x2_sum_r89 = vuzpq_s32(reg_32x4_sum_r8, reg_32x4_sum_r9);
reg_32x4x2_sum_r1011 = vuzpq_s32(reg_32x4_sum_r10, reg_32x4_sum_r11);
reg_32x4x2_sum_r1213 = vuzpq_s32(reg_32x4_sum_r12, reg_32x4_sum_r13);
reg_32x4x2_sum_r1415 = vuzpq_s32(reg_32x4_sum_r14, reg_32x4_sum_r15);
reg_32x4_sum_r89 = vaddq_s32(reg_32x4x2_sum_r89.val[0], reg_32x4x2_sum_r89.val[1]);
reg_32x4_sum_r1011 =
vaddq_s32(reg_32x4x2_sum_r1011.val[0], reg_32x4x2_sum_r1011.val[1]);
reg_32x4_sum_r1213 =
vaddq_s32(reg_32x4x2_sum_r1213.val[0], reg_32x4x2_sum_r1213.val[1]);
reg_32x4_sum_r1415 =
vaddq_s32(reg_32x4x2_sum_r1415.val[0], reg_32x4x2_sum_r1415.val[1]);
/* r0-r7 */
reg_16x4_sum_r01_23 = vqrshrun_n_s32(reg_32x4_sum_r01, 7);
reg_16x4_sum_r45_67 = vqrshrun_n_s32(reg_32x4_sum_r45, 7);
/* r8-r15 */
reg_32x4x2_sum_r89 = vuzpq_s32(reg_32x4_sum_r89, reg_32x4_sum_r1011);
reg_32x4x2_sum_r1213 = vuzpq_s32(reg_32x4_sum_r1213, reg_32x4_sum_r1415);
reg_32x4_sum_r89 = vaddq_s32(reg_32x4x2_sum_r89.val[0], reg_32x4x2_sum_r89.val[1]);
reg_32x4_sum_r1213 =
vaddq_s32(reg_32x4x2_sum_r1213.val[0], reg_32x4x2_sum_r1213.val[1]);
/* r0-r7 */
reg_16x8_sum_r0_r7 = vcombine_u16(reg_16x4_sum_r01_23, reg_16x4_sum_r45_67);
reg_8x8_sum_r0_r7 = vqmovn_u16(reg_16x8_sum_r0_r7);
reg_16x4_sum_r8_r11 = vqrshrun_n_s32(reg_32x4_sum_r89, 7);
reg_16x4_sum_r12_r15 = vqrshrun_n_s32(reg_32x4_sum_r1213, 7);
reg_16x8_sum_r8_r15 = vcombine_u16(reg_16x4_sum_r8_r11, reg_16x4_sum_r12_r15);
reg_8x8_sum_r8_r15 = vqmovn_u16(reg_16x8_sum_r8_r15);
reg_8x16_sum_r0_r15 = vcombine_u8(reg_8x8_sum_r0_r7, reg_8x8_sum_r8_r15);
/* r0-r7 */
vst1q_u8(pu1_out_pixel, reg_8x16_sum_r0_r15);
pu1_out_pixel += 16;
pu1_in_pixel += (u4_src_vert_increments * (u4_in_stride << 4)) >> DOWNSCALER_Q;
/* Update the context for next Loop Count */
u4_center_pixel_pos += u4_src_horz_increments;
}
}
/* Loop for the remaining height less than 16 */
if(u4_rem_vert_loop)
{
u4_rem_vert_loop_by_8 = u4_rem_vert_loop >> 3;
u4_rem_vert_loop = u4_rem_vert_loop % 8;
u4_height_finished = (u4_num_iterations_vertical_by_16 << 4);
pu1_src_j = pu1_src + ((u4_height_finished) *u4_in_stride);
pu1_dst_j = pu1_dst + u4_height_finished;
u4_center_pixel_pos = u4_center_pixel_pos_src;
/* 8 <= remaining height < 16 */
if(u4_rem_vert_loop_by_8)
{
for(i = 0; i < (WORD32) u4_blk_wd; i++)
{
u1_phase = get_filter_phase(u4_center_pixel_pos);
pi1_filter_grid = pai1_filters[u1_phase];
u2_full_pixel_inc = u4_center_pixel_pos >> DOWNSCALER_Q;
pu1_in_pixel = pu1_src_j + (u2_full_pixel_inc << u1_is_chroma);
pu1_out_pixel = pu1_dst_j + ((i << u1_is_chroma) * u4_out_stride);
reg_8x8_filt_coeff_grid = vld1_s8(pi1_filter_grid);
for(j = u4_rem_vert_loop_by_8; j > 0; j--)
{
/******************************************************/
/* This loop is going vertically in bottom direction */
/* but the output pixels are stored in horizontal */
/* direction in transpose manner */
/******************************************************/
reg_8x8_src_r0 = vld1_u8(pu1_in_pixel);
reg_8x8_src_r1 = vld1_u8(pu1_in_pixel + u4_in_stride);
reg_8x8_src_r2 = vld1_u8(pu1_in_pixel + 2 * u4_in_stride);
reg_8x8_src_r3 = vld1_u8(pu1_in_pixel + 3 * u4_in_stride);
reg_8x8_src_r4 = vld1_u8(pu1_in_pixel + 4 * u4_in_stride);
reg_8x8_src_r5 = vld1_u8(pu1_in_pixel + 5 * u4_in_stride);
reg_8x8_src_r6 = vld1_u8(pu1_in_pixel + 6 * u4_in_stride);
reg_8x8_src_r7 = vld1_u8(pu1_in_pixel + 7 * u4_in_stride);
reg_16x8_src_r0 = vmovl_u8(reg_8x8_src_r0);
reg_16x8_src_r1 = vmovl_u8(reg_8x8_src_r1);
reg_16x8_src_r2 = vmovl_u8(reg_8x8_src_r2);
reg_16x8_src_r3 = vmovl_u8(reg_8x8_src_r3);
reg_16x8_src_r4 = vmovl_u8(reg_8x8_src_r4);
reg_16x8_src_r5 = vmovl_u8(reg_8x8_src_r5);
reg_16x8_src_r6 = vmovl_u8(reg_8x8_src_r6);
reg_16x8_src_r7 = vmovl_u8(reg_8x8_src_r7);
reg_16x8_filt_coeff_grid = vmovl_s8(reg_8x8_filt_coeff_grid);
reg_16x8_mul_r0 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r0),
reg_16x8_filt_coeff_grid);
reg_16x8_mul_r1 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r1),
reg_16x8_filt_coeff_grid);
reg_16x8_mul_r2 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r2),
reg_16x8_filt_coeff_grid);
reg_16x8_mul_r3 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r3),
reg_16x8_filt_coeff_grid);
reg_16x8_mul_r4 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r4),
reg_16x8_filt_coeff_grid);
reg_16x8_mul_r5 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r5),
reg_16x8_filt_coeff_grid);
reg_16x8_mul_r6 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r6),
reg_16x8_filt_coeff_grid);
reg_16x8_mul_r7 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r7),
reg_16x8_filt_coeff_grid);
reg_32x4_sum_r0 = vpaddlq_s16(reg_16x8_mul_r0);
reg_32x4_sum_r1 = vpaddlq_s16(reg_16x8_mul_r1);
reg_32x4_sum_r2 = vpaddlq_s16(reg_16x8_mul_r2);
reg_32x4_sum_r3 = vpaddlq_s16(reg_16x8_mul_r3);
reg_32x4_sum_r4 = vpaddlq_s16(reg_16x8_mul_r4);
reg_32x4_sum_r5 = vpaddlq_s16(reg_16x8_mul_r5);
reg_32x4_sum_r6 = vpaddlq_s16(reg_16x8_mul_r6);
reg_32x4_sum_r7 = vpaddlq_s16(reg_16x8_mul_r7);
reg_32x4x2_sum_r01 = vuzpq_s32(reg_32x4_sum_r0, reg_32x4_sum_r1);
reg_32x4x2_sum_r23 = vuzpq_s32(reg_32x4_sum_r2, reg_32x4_sum_r3);
reg_32x4x2_sum_r45 = vuzpq_s32(reg_32x4_sum_r4, reg_32x4_sum_r5);
reg_32x4x2_sum_r67 = vuzpq_s32(reg_32x4_sum_r6, reg_32x4_sum_r7);
reg_32x4_sum_r01 =
vaddq_s32(reg_32x4x2_sum_r01.val[0], reg_32x4x2_sum_r01.val[1]);
reg_32x4_sum_r23 =
vaddq_s32(reg_32x4x2_sum_r23.val[0], reg_32x4x2_sum_r23.val[1]);
reg_32x4_sum_r45 =
vaddq_s32(reg_32x4x2_sum_r45.val[0], reg_32x4x2_sum_r45.val[1]);
reg_32x4_sum_r67 =
vaddq_s32(reg_32x4x2_sum_r67.val[0], reg_32x4x2_sum_r67.val[1]);
reg_32x4x2_sum_r01 = vuzpq_s32(reg_32x4_sum_r01, reg_32x4_sum_r23);
reg_32x4x2_sum_r45 = vuzpq_s32(reg_32x4_sum_r45, reg_32x4_sum_r67);
reg_32x4_sum_r01 =
vaddq_s32(reg_32x4x2_sum_r01.val[0], reg_32x4x2_sum_r01.val[1]);
reg_32x4_sum_r45 =
vaddq_s32(reg_32x4x2_sum_r45.val[0], reg_32x4x2_sum_r45.val[1]);
reg_16x4_sum_r01_23 = vqrshrun_n_s32(reg_32x4_sum_r01, 7);
reg_16x4_sum_r45_67 = vqrshrun_n_s32(reg_32x4_sum_r45, 7);
reg_16x8_sum_r0_r7 = vcombine_u16(reg_16x4_sum_r01_23, reg_16x4_sum_r45_67);
reg_8x8_sum_r0_r7 = vqmovn_u16(reg_16x8_sum_r0_r7);
vst1_u8(pu1_out_pixel, reg_8x8_sum_r0_r7);
pu1_out_pixel += 8;
pu1_in_pixel +=
(u4_src_vert_increments * (u4_in_stride << 3)) >> DOWNSCALER_Q;
}
/* Update the context for next Loop Count */
u4_center_pixel_pos += u4_src_horz_increments;
}
}
/* 1 <= remaining height < 8 */
if(u4_rem_vert_loop)
{
u4_height_finished =
((u4_num_iterations_vertical_by_16 << 4) + (u4_rem_vert_loop_by_8 << 3));
pu1_src_j = pu1_src + u4_height_finished * u4_in_stride;
pu1_dst_j = pu1_dst + u4_height_finished;
u4_center_pixel_pos = u4_center_pixel_pos_src;
for(i = 0; i < (WORD32) u4_blk_wd; i++)
{
u1_phase = get_filter_phase(u4_center_pixel_pos);
pi1_filter_grid = pai1_filters[u1_phase];
u2_full_pixel_inc = u4_center_pixel_pos >> DOWNSCALER_Q;
pu1_in_pixel = pu1_src_j + (u2_full_pixel_inc << u1_is_chroma);
pu1_out_pixel = pu1_dst_j + ((i << u1_is_chroma) * u4_out_stride);
reg_8x8_filt_coeff_grid = vld1_s8(pi1_filter_grid);
for(j = u4_rem_vert_loop; j > 0; j--)
{
/******************************************************/
/* This loop is going vertically in bottom direction */
/* but the output pixels are stored in horizontal */
/* direction in transpose manner */
/******************************************************/
reg_8x8_src_r0 = vld1_u8(pu1_in_pixel);
reg_16x8_src_r0 = vmovl_u8(reg_8x8_src_r0);
reg_16x8_filt_coeff_grid = vmovl_s8(reg_8x8_filt_coeff_grid);
reg_16x8_mul_r0 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r0),
reg_16x8_filt_coeff_grid);
reg_32x4_sum_r0 = vpaddlq_s16(reg_16x8_mul_r0);
reg_32x4x2_sum_r01 = vuzpq_s32(reg_32x4_sum_r0, reg_32x4_zero);
reg_32x4_sum_r01 =
vaddq_s32(reg_32x4x2_sum_r01.val[0], reg_32x4x2_sum_r01.val[1]);
reg_32x4x2_sum_r01 = vuzpq_s32(reg_32x4_sum_r01, reg_32x4_zero);
reg_32x4_sum_r01 =
vaddq_s32(reg_32x4x2_sum_r01.val[0], reg_32x4x2_sum_r01.val[1]);
reg_16x4_sum_r01_23 = vqrshrun_n_s32(reg_32x4_sum_r01, 7);
vst1_lane_u8(pu1_out_pixel, vreinterpret_u8_u16(reg_16x4_sum_r01_23), 0);
pu1_out_pixel += 1;
pu1_in_pixel += (u4_src_vert_increments * u4_in_stride) >> DOWNSCALER_Q;
}
/* Update the context for next Loop Count */
u4_center_pixel_pos += u4_src_horz_increments;
}
}
}
}
/* for chroma */
else
{
u4_num_iterations_vertical_by_8 = u4_blk_ht >> 3;
u4_rem_vert_loop = u4_blk_ht % 8;
for(j = 0; j < (WORD32) u4_num_iterations_vertical_by_8; j++)
{
pu1_src_j = pu1_src + ((j << 3) * u4_in_stride);
pu1_dst_j = pu1_dst + (j << 3);
u4_center_pixel_pos = u4_center_pixel_pos_src;
for(i = 0; i < (WORD32) u4_blk_wd; i++)
{
u1_phase = get_filter_phase(u4_center_pixel_pos);
pi1_filter_grid = pai1_filters[u1_phase];
/*Doing the Calculation for current Loop Count */
u2_full_pixel_inc = u4_center_pixel_pos >> DOWNSCALER_Q;
pu1_in_pixel = pu1_src_j + (u2_full_pixel_inc << u1_is_chroma);
pu1_out_pixel = pu1_dst_j + ((i << u1_is_chroma) * u4_out_stride);
reg_8x8_filt_coeff_grid = vld1_s8(pi1_filter_grid);
/******************************************************/
/* This loop is going vertically in bottom direction */
/* but the output pixels are stored in horizontal */
/* direction in transpose manner */
/******************************************************/
reg_8x16_src_r0 = vld1q_u8(pu1_in_pixel);
reg_8x16_src_r1 = vld1q_u8(pu1_in_pixel + u4_in_stride);
reg_8x16_src_r2 = vld1q_u8(pu1_in_pixel + 2 * u4_in_stride);
reg_8x16_src_r3 = vld1q_u8(pu1_in_pixel + 3 * u4_in_stride);
reg_8x16_src_r4 = vld1q_u8(pu1_in_pixel + 4 * u4_in_stride);
reg_8x16_src_r5 = vld1q_u8(pu1_in_pixel + 5 * u4_in_stride);
reg_8x16_src_r6 = vld1q_u8(pu1_in_pixel + 6 * u4_in_stride);
reg_8x16_src_r7 = vld1q_u8(pu1_in_pixel + 7 * u4_in_stride);
reg_8x16x2_src_r0 = vuzpq_u8(reg_8x16_src_r0, reg_8x16_src_r1);
reg_8x16x2_src_r1 = vuzpq_u8(reg_8x16_src_r2, reg_8x16_src_r3);
reg_8x16x2_src_r2 = vuzpq_u8(reg_8x16_src_r4, reg_8x16_src_r5);
reg_8x16x2_src_r3 = vuzpq_u8(reg_8x16_src_r6, reg_8x16_src_r7);
reg_16x8_src_cb_r0 = vmovl_u8(vget_low_u8(reg_8x16x2_src_r0.val[0]));
reg_16x8_src_cb_r1 = vmovl_u8(vget_high_u8(reg_8x16x2_src_r0.val[0]));
reg_16x8_src_cb_r2 = vmovl_u8(vget_low_u8(reg_8x16x2_src_r1.val[0]));
reg_16x8_src_cb_r3 = vmovl_u8(vget_high_u8(reg_8x16x2_src_r1.val[0]));
reg_16x8_src_cb_r4 = vmovl_u8(vget_low_u8(reg_8x16x2_src_r2.val[0]));
reg_16x8_src_cb_r5 = vmovl_u8(vget_high_u8(reg_8x16x2_src_r2.val[0]));
reg_16x8_src_cb_r6 = vmovl_u8(vget_low_u8(reg_8x16x2_src_r3.val[0]));
reg_16x8_src_cb_r7 = vmovl_u8(vget_high_u8(reg_8x16x2_src_r3.val[0]));
reg_16x8_src_cr_r0 = vmovl_u8(vget_low_u8(reg_8x16x2_src_r0.val[1]));
reg_16x8_src_cr_r1 = vmovl_u8(vget_high_u8(reg_8x16x2_src_r0.val[1]));
reg_16x8_src_cr_r2 = vmovl_u8(vget_low_u8(reg_8x16x2_src_r1.val[1]));
reg_16x8_src_cr_r3 = vmovl_u8(vget_high_u8(reg_8x16x2_src_r1.val[1]));
reg_16x8_src_cr_r4 = vmovl_u8(vget_low_u8(reg_8x16x2_src_r2.val[1]));
reg_16x8_src_cr_r5 = vmovl_u8(vget_high_u8(reg_8x16x2_src_r2.val[1]));
reg_16x8_src_cr_r6 = vmovl_u8(vget_low_u8(reg_8x16x2_src_r3.val[1]));
reg_16x8_src_cr_r7 = vmovl_u8(vget_high_u8(reg_8x16x2_src_r3.val[1]));
reg_16x8_filt_coeff_grid = vmovl_s8(reg_8x8_filt_coeff_grid);
reg_16x8_mul_cb_r0 =
vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cb_r0), reg_16x8_filt_coeff_grid);
reg_16x8_mul_cb_r1 =
vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cb_r1), reg_16x8_filt_coeff_grid);
reg_16x8_mul_cb_r2 =
vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cb_r2), reg_16x8_filt_coeff_grid);
reg_16x8_mul_cb_r3 =
vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cb_r3), reg_16x8_filt_coeff_grid);
reg_16x8_mul_cb_r4 =
vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cb_r4), reg_16x8_filt_coeff_grid);
reg_16x8_mul_cb_r5 =
vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cb_r5), reg_16x8_filt_coeff_grid);
reg_16x8_mul_cb_r6 =
vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cb_r6), reg_16x8_filt_coeff_grid);
reg_16x8_mul_cb_r7 =
vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cb_r7), reg_16x8_filt_coeff_grid);
reg_16x8_mul_cr_r0 =
vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cr_r0), reg_16x8_filt_coeff_grid);
reg_16x8_mul_cr_r1 =
vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cr_r1), reg_16x8_filt_coeff_grid);
reg_16x8_mul_cr_r2 =
vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cr_r2), reg_16x8_filt_coeff_grid);
reg_16x8_mul_cr_r3 =
vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cr_r3), reg_16x8_filt_coeff_grid);
reg_16x8_mul_cr_r4 =
vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cr_r4), reg_16x8_filt_coeff_grid);
reg_16x8_mul_cr_r5 =
vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cr_r5), reg_16x8_filt_coeff_grid);
reg_16x8_mul_cr_r6 =
vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cr_r6), reg_16x8_filt_coeff_grid);
reg_16x8_mul_cr_r7 =
vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cr_r7), reg_16x8_filt_coeff_grid);
reg_32x4_sum_cb_r0 = vpaddlq_s16(reg_16x8_mul_cb_r0);
reg_32x4_sum_cb_r1 = vpaddlq_s16(reg_16x8_mul_cb_r1);
reg_32x4_sum_cb_r2 = vpaddlq_s16(reg_16x8_mul_cb_r2);
reg_32x4_sum_cb_r3 = vpaddlq_s16(reg_16x8_mul_cb_r3);
reg_32x4_sum_cb_r4 = vpaddlq_s16(reg_16x8_mul_cb_r4);
reg_32x4_sum_cb_r5 = vpaddlq_s16(reg_16x8_mul_cb_r5);
reg_32x4_sum_cb_r6 = vpaddlq_s16(reg_16x8_mul_cb_r6);
reg_32x4_sum_cb_r7 = vpaddlq_s16(reg_16x8_mul_cb_r7);
reg_32x4_sum_cr_r0 = vpaddlq_s16(reg_16x8_mul_cr_r0);
reg_32x4_sum_cr_r1 = vpaddlq_s16(reg_16x8_mul_cr_r1);
reg_32x4_sum_cr_r2 = vpaddlq_s16(reg_16x8_mul_cr_r2);
reg_32x4_sum_cr_r3 = vpaddlq_s16(reg_16x8_mul_cr_r3);
reg_32x4_sum_cr_r4 = vpaddlq_s16(reg_16x8_mul_cr_r4);
reg_32x4_sum_cr_r5 = vpaddlq_s16(reg_16x8_mul_cr_r5);
reg_32x4_sum_cr_r6 = vpaddlq_s16(reg_16x8_mul_cr_r6);
reg_32x4_sum_cr_r7 = vpaddlq_s16(reg_16x8_mul_cr_r7);
reg_32x4x2_sum_r01 = vuzpq_s32(reg_32x4_sum_cb_r0, reg_32x4_sum_cb_r1);
reg_32x4x2_sum_r23 = vuzpq_s32(reg_32x4_sum_cb_r2, reg_32x4_sum_cb_r3);
reg_32x4x2_sum_r45 = vuzpq_s32(reg_32x4_sum_cb_r4, reg_32x4_sum_cb_r5);
reg_32x4x2_sum_r67 = vuzpq_s32(reg_32x4_sum_cb_r6, reg_32x4_sum_cb_r7);
reg_32x4_sum_cb_r01 =
vaddq_s32(reg_32x4x2_sum_r01.val[0], reg_32x4x2_sum_r01.val[1]);
reg_32x4_sum_cb_r23 =
vaddq_s32(reg_32x4x2_sum_r23.val[0], reg_32x4x2_sum_r23.val[1]);
reg_32x4_sum_cb_r45 =
vaddq_s32(reg_32x4x2_sum_r45.val[0], reg_32x4x2_sum_r45.val[1]);
reg_32x4_sum_cb_r67 =
vaddq_s32(reg_32x4x2_sum_r67.val[0], reg_32x4x2_sum_r67.val[1]);
reg_32x4x2_sum_r01 = vuzpq_s32(reg_32x4_sum_cb_r01, reg_32x4_sum_cb_r23);
reg_32x4x2_sum_r45 = vuzpq_s32(reg_32x4_sum_cb_r45, reg_32x4_sum_cb_r67);
reg_32x4_sum_cb_r01 =
vaddq_s32(reg_32x4x2_sum_r01.val[0], reg_32x4x2_sum_r01.val[1]);
reg_32x4_sum_cb_r45 =
vaddq_s32(reg_32x4x2_sum_r45.val[0], reg_32x4x2_sum_r45.val[1]);
reg_32x4x2_sum_r01 = vuzpq_s32(reg_32x4_sum_cr_r0, reg_32x4_sum_cr_r1);
reg_32x4x2_sum_r23 = vuzpq_s32(reg_32x4_sum_cr_r2, reg_32x4_sum_cr_r3);
reg_32x4x2_sum_r45 = vuzpq_s32(reg_32x4_sum_cr_r4, reg_32x4_sum_cr_r5);
reg_32x4x2_sum_r67 = vuzpq_s32(reg_32x4_sum_cr_r6, reg_32x4_sum_cr_r7);
reg_32x4_sum_cr_r01 =
vaddq_s32(reg_32x4x2_sum_r01.val[0], reg_32x4x2_sum_r01.val[1]);
reg_32x4_sum_cr_r23 =
vaddq_s32(reg_32x4x2_sum_r23.val[0], reg_32x4x2_sum_r23.val[1]);
reg_32x4_sum_cr_r45 =
vaddq_s32(reg_32x4x2_sum_r45.val[0], reg_32x4x2_sum_r45.val[1]);
reg_32x4_sum_cr_r67 =
vaddq_s32(reg_32x4x2_sum_r67.val[0], reg_32x4x2_sum_r67.val[1]);
reg_32x4x2_sum_r01 = vuzpq_s32(reg_32x4_sum_cr_r01, reg_32x4_sum_cr_r23);
reg_32x4x2_sum_r45 = vuzpq_s32(reg_32x4_sum_cr_r45, reg_32x4_sum_cr_r67);
reg_32x4_sum_cr_r01 =
vaddq_s32(reg_32x4x2_sum_r01.val[0], reg_32x4x2_sum_r01.val[1]);
reg_32x4_sum_cr_r45 =
vaddq_s32(reg_32x4x2_sum_r45.val[0], reg_32x4x2_sum_r45.val[1]);
reg_16x4_sum_cb_r01_23 = vqrshrun_n_s32(reg_32x4_sum_cb_r01, 7);
reg_16x4_sum_cb_r45_67 = vqrshrun_n_s32(reg_32x4_sum_cb_r45, 7);
reg_16x4_sum_cr_r01_23 = vqrshrun_n_s32(reg_32x4_sum_cr_r01, 7);
reg_16x4_sum_cr_r45_67 = vqrshrun_n_s32(reg_32x4_sum_cr_r45, 7);
reg_16x8_sum_cb_r0_r7 =
vcombine_u16(reg_16x4_sum_cb_r01_23, reg_16x4_sum_cb_r45_67);
reg_16x8_sum_cr_r0_r7 =
vcombine_u16(reg_16x4_sum_cr_r01_23, reg_16x4_sum_cr_r45_67);
reg_8x8_sum_cb_r0_r7 = vqmovn_u16(reg_16x8_sum_cb_r0_r7);
reg_8x8_sum_cr_r0_r7 = vqmovn_u16(reg_16x8_sum_cr_r0_r7);
vst1_u8(pu1_out_pixel, reg_8x8_sum_cb_r0_r7);
vst1_u8(pu1_out_pixel + u4_out_stride, reg_8x8_sum_cr_r0_r7);
pu1_out_pixel += 8;
pu1_in_pixel += (u4_src_vert_increments * (u4_in_stride << 3)) >> DOWNSCALER_Q;
/* Update the context for next Loop Count */
u4_center_pixel_pos += u4_src_horz_increments;
}
}
/* Loop for the remaining height less than 8 */
if(u4_rem_vert_loop)
{
u4_rem_vert_loop_by_4 = u4_rem_vert_loop >> 2;
u4_rem_vert_loop = u4_rem_vert_loop % 4;
u4_height_finished = (u4_num_iterations_vertical_by_8 << 3);
pu1_src_j = pu1_src + ((u4_height_finished) *u4_in_stride);
pu1_dst_j = pu1_dst + u4_height_finished;
u4_center_pixel_pos = u4_center_pixel_pos_src;
/* 4<= remaining height < 8 */
if(u4_rem_vert_loop_by_4)
{
for(i = 0; i < (WORD32) u4_blk_wd; i++)
{
u1_phase = get_filter_phase(u4_center_pixel_pos);
pi1_filter_grid = pai1_filters[u1_phase];
u2_full_pixel_inc = u4_center_pixel_pos >> DOWNSCALER_Q;
pu1_in_pixel = pu1_src_j + (u2_full_pixel_inc << u1_is_chroma);
pu1_out_pixel = pu1_dst_j + ((i << u1_is_chroma) * u4_out_stride);
reg_8x8_filt_coeff_grid = vld1_s8(pi1_filter_grid);
for(j = u4_rem_vert_loop_by_4; j > 0; j--)
{
/******************************************************/
/* This loop is going vertically in bottom direction */
/* but the output pixels are stored in horizontal */
/* direction in transpose manner */
/******************************************************/
reg_8x16_src_r0 = vld1q_u8(pu1_in_pixel);
reg_8x16_src_r1 = vld1q_u8(pu1_in_pixel + u4_in_stride);
reg_8x16_src_r2 = vld1q_u8(pu1_in_pixel + 2 * u4_in_stride);
reg_8x16_src_r3 = vld1q_u8(pu1_in_pixel + 3 * u4_in_stride);
reg_8x16x2_src_r0 = vuzpq_u8(reg_8x16_src_r0, reg_8x16_src_r1);
reg_8x16x2_src_r1 = vuzpq_u8(reg_8x16_src_r2, reg_8x16_src_r3);
reg_16x8_src_cb_r0 = vmovl_u8(vget_low_u8(reg_8x16x2_src_r0.val[0]));
reg_16x8_src_cb_r1 = vmovl_u8(vget_high_u8(reg_8x16x2_src_r0.val[0]));
reg_16x8_src_cb_r2 = vmovl_u8(vget_low_u8(reg_8x16x2_src_r1.val[0]));
reg_16x8_src_cb_r3 = vmovl_u8(vget_high_u8(reg_8x16x2_src_r1.val[0]));
reg_16x8_src_cr_r0 = vmovl_u8(vget_low_u8(reg_8x16x2_src_r0.val[1]));
reg_16x8_src_cr_r1 = vmovl_u8(vget_high_u8(reg_8x16x2_src_r0.val[1]));
reg_16x8_src_cr_r2 = vmovl_u8(vget_low_u8(reg_8x16x2_src_r1.val[1]));
reg_16x8_src_cr_r3 = vmovl_u8(vget_high_u8(reg_8x16x2_src_r1.val[1]));
reg_16x8_filt_coeff_grid = vmovl_s8(reg_8x8_filt_coeff_grid);
reg_16x8_mul_cb_r0 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cb_r0),
reg_16x8_filt_coeff_grid);
reg_16x8_mul_cb_r1 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cb_r1),
reg_16x8_filt_coeff_grid);
reg_16x8_mul_cb_r2 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cb_r2),
reg_16x8_filt_coeff_grid);
reg_16x8_mul_cb_r3 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cb_r3),
reg_16x8_filt_coeff_grid);
reg_16x8_mul_cr_r0 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cr_r0),
reg_16x8_filt_coeff_grid);
reg_16x8_mul_cr_r1 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cr_r1),
reg_16x8_filt_coeff_grid);
reg_16x8_mul_cr_r2 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cr_r2),
reg_16x8_filt_coeff_grid);
reg_16x8_mul_cr_r3 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cr_r3),
reg_16x8_filt_coeff_grid);
reg_32x4_sum_cb_r0 = vpaddlq_s16(reg_16x8_mul_cb_r0);
reg_32x4_sum_cb_r1 = vpaddlq_s16(reg_16x8_mul_cb_r1);
reg_32x4_sum_cb_r2 = vpaddlq_s16(reg_16x8_mul_cb_r2);
reg_32x4_sum_cb_r3 = vpaddlq_s16(reg_16x8_mul_cb_r3);
reg_32x4_sum_cr_r0 = vpaddlq_s16(reg_16x8_mul_cr_r0);
reg_32x4_sum_cr_r1 = vpaddlq_s16(reg_16x8_mul_cr_r1);
reg_32x4_sum_cr_r2 = vpaddlq_s16(reg_16x8_mul_cr_r2);
reg_32x4_sum_cr_r3 = vpaddlq_s16(reg_16x8_mul_cr_r3);
reg_32x4x2_sum_r01 = vuzpq_s32(reg_32x4_sum_cb_r0, reg_32x4_sum_cb_r1);
reg_32x4x2_sum_r23 = vuzpq_s32(reg_32x4_sum_cb_r2, reg_32x4_sum_cb_r3);
reg_32x4_sum_cb_r01 =
vaddq_s32(reg_32x4x2_sum_r01.val[0], reg_32x4x2_sum_r01.val[1]);
reg_32x4_sum_cb_r23 =
vaddq_s32(reg_32x4x2_sum_r23.val[0], reg_32x4x2_sum_r23.val[1]);
reg_32x4x2_sum_r01 = vuzpq_s32(reg_32x4_sum_cb_r01, reg_32x4_sum_cb_r23);
reg_32x4_sum_cb_r01 =
vaddq_s32(reg_32x4x2_sum_r01.val[0], reg_32x4x2_sum_r01.val[1]);
reg_32x4x2_sum_r01 = vuzpq_s32(reg_32x4_sum_cr_r0, reg_32x4_sum_cr_r1);
reg_32x4x2_sum_r23 = vuzpq_s32(reg_32x4_sum_cr_r2, reg_32x4_sum_cr_r3);
reg_32x4_sum_cr_r01 =
vaddq_s32(reg_32x4x2_sum_r01.val[0], reg_32x4x2_sum_r01.val[1]);
reg_32x4_sum_cr_r23 =
vaddq_s32(reg_32x4x2_sum_r23.val[0], reg_32x4x2_sum_r23.val[1]);
reg_32x4x2_sum_r01 = vuzpq_s32(reg_32x4_sum_cr_r01, reg_32x4_sum_cr_r23);
reg_32x4_sum_cr_r01 =
vaddq_s32(reg_32x4x2_sum_r01.val[0], reg_32x4x2_sum_r01.val[1]);
reg_16x4_sum_cb_r01_23 = vqrshrun_n_s32(reg_32x4_sum_cb_r01, 7);
reg_16x4_sum_cr_r01_23 = vqrshrun_n_s32(reg_32x4_sum_cr_r01, 7);
reg_16x8_sum_cb_cr_r0_r3 =
vcombine_u16(reg_16x4_sum_cb_r01_23, reg_16x4_sum_cr_r01_23);
reg_8x8_sum_cb_cr_r0_r3 = vmovn_u16(reg_16x8_sum_cb_cr_r0_r3);
vst1_lane_u32((uint32_t *) (pu1_out_pixel),
vreinterpret_u32_u8(reg_8x8_sum_cb_cr_r0_r3), 0);
vst1_lane_u32((uint32_t *) (pu1_out_pixel + u4_out_stride),
vreinterpret_u32_u8(reg_8x8_sum_cb_cr_r0_r3), 1);
pu1_out_pixel += 4;
pu1_in_pixel +=
(u4_src_vert_increments * (u4_in_stride << 2)) >> DOWNSCALER_Q;
}
/* Update the context for next Loop Count */
u4_center_pixel_pos += u4_src_horz_increments;
}
}
/* 1<= remaining height < 4 */
if(u4_rem_vert_loop)
{
u4_height_finished =
((u4_num_iterations_vertical_by_8 << 3) + (u4_rem_vert_loop_by_4 << 2));
pu1_src_j = pu1_src + u4_height_finished * u4_in_stride;
pu1_dst_j = pu1_dst + u4_height_finished;
u4_center_pixel_pos = u4_center_pixel_pos_src;
for(i = 0; i < (WORD32) u4_blk_wd; i++)
{
u1_phase = get_filter_phase(u4_center_pixel_pos);
pi1_filter_grid = pai1_filters[u1_phase];
u2_full_pixel_inc = u4_center_pixel_pos >> DOWNSCALER_Q;
pu1_in_pixel = pu1_src_j + (u2_full_pixel_inc << u1_is_chroma);
pu1_out_pixel = pu1_dst_j + ((i << u1_is_chroma) * u4_out_stride);
reg_8x8_filt_coeff_grid = vld1_s8(pi1_filter_grid);
for(j = u4_rem_vert_loop; j > 0; j--)
{
/******************************************************/
/* This loop is going vertically in bottom direction */
/* but the output pixels are stored in horizontal */
/* direction in transpose manner */
/******************************************************/
reg_8x16_src_r0 = vld1q_u8(pu1_in_pixel);
reg_8x16x2_src_r0 = vuzpq_u8(reg_8x16_src_r0, reg_8x16_src_r0);
reg_16x8_src_cb_r0 = vmovl_u8(vget_low_u8(reg_8x16x2_src_r0.val[0]));
reg_16x8_src_cr_r0 = vmovl_u8(vget_low_u8(reg_8x16x2_src_r0.val[1]));
reg_16x8_filt_coeff_grid = vmovl_s8(reg_8x8_filt_coeff_grid);
reg_16x8_mul_cb_r0 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cb_r0),
reg_16x8_filt_coeff_grid);
reg_16x8_mul_cr_r0 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cr_r0),
reg_16x8_filt_coeff_grid);
reg_32x4_sum_cb_r0 = vpaddlq_s16(reg_16x8_mul_cb_r0);
reg_32x4_sum_cr_r0 = vpaddlq_s16(reg_16x8_mul_cr_r0);
reg_32x4x2_sum_r01 = vuzpq_s32(reg_32x4_sum_cb_r0, reg_32x4_sum_cr_r0);
reg_32x4_sum_cb_cr_r0 =
vaddq_s32(reg_32x4x2_sum_r01.val[0], reg_32x4x2_sum_r01.val[1]);
reg_32x4x2_sum_r01 = vuzpq_s32(reg_32x4_sum_cb_cr_r0, reg_32x4_zero);
reg_32x4_sum_cb_cr_r0 =
vaddq_s32(reg_32x4x2_sum_r01.val[0], reg_32x4x2_sum_r01.val[1]);
reg_16x4_sum_cb_cr_r0 = vqrshrun_n_s32(reg_32x4_sum_cb_cr_r0, 7);
vst1_lane_u8((pu1_out_pixel), vreinterpret_u8_u16(reg_16x4_sum_cb_cr_r0),
0);
vst1_lane_u8((pu1_out_pixel + u4_out_stride),
vreinterpret_u8_u16(reg_16x4_sum_cb_cr_r0), 2);
pu1_out_pixel += 1;
pu1_in_pixel += (u4_src_vert_increments * (u4_in_stride)) >> DOWNSCALER_Q;
}
/* Update the context for next Loop Count */
u4_center_pixel_pos += u4_src_horz_increments;
}
}
}
}
}

View file

@ -0,0 +1,157 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
*******************************************************************************
* @file
* isvce_function_selector.c
*
* @brief
* Contains functions to initialize function pointers used in h264
*
* @author
* Ittiam
*
* @par List of Functions:
*
* @remarks
* None
*
*******************************************************************************
*/
/*****************************************************************************/
/* File Includes */
/*****************************************************************************/
/* System Include Files */
#include <stdio.h>
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
/* User Include Files */
#include "ih264_typedefs.h"
#include "iv2.h"
#include "ive2.h"
#include "isvc_defs.h"
#include "ih264_size_defs.h"
#include "isvce_defs.h"
#include "ih264e_error.h"
#include "ih264e_bitstream.h"
#include "ime_distortion_metrics.h"
#include "ime_defs.h"
#include "ime_structs.h"
#include "ih264_error.h"
#include "isvc_structs.h"
#include "isvc_trans_quant_itrans_iquant.h"
#include "isvc_inter_pred_filters.h"
#include "isvc_mem_fns.h"
#include "ih264_padding.h"
#include "ih264_intra_pred_filters.h"
#include "ih264_deblk_edge_filters.h"
#include "isvc_cabac_tables.h"
#include "isvc_macros.h"
#include "ih264_platform_macros.h"
#include "irc_cntrl_param.h"
#include "irc_frame_info_collector.h"
#include "isvce_rate_control.h"
#include "isvce_cabac_structs.h"
#include "isvce_structs.h"
#include "isvce_cabac.h"
#include "ih264e_platform_macros.h"
#include "isvce_platform_macros.h"
/**
*******************************************************************************
*
* @brief Initialize the intra/inter/transform/deblk function pointers of
* codec context
*
* @par Description: the current routine initializes the function pointers of
* codec context basing on the architecture in use
*
* @param[in] ps_codec
* Codec context pointer
*
* @returns none
*
* @remarks none
*
*******************************************************************************
*/
void isvce_init_function_ptr(void *pv_codec)
{
isvce_codec_t *ps_codec = (isvce_codec_t *) pv_codec;
isvce_init_function_ptr_generic(ps_codec);
switch(ps_codec->s_cfg.e_arch)
{
#if defined(ARMV8)
case ARCH_ARM_A53:
case ARCH_ARM_A57:
case ARCH_ARM_V8_NEON:
default:
isvce_init_function_ptr_neon_av8(ps_codec);
break;
#elif !defined(DISABLE_NEON)
case ARCH_ARM_A9Q:
case ARCH_ARM_A9A:
case ARCH_ARM_A9:
case ARCH_ARM_A7:
case ARCH_ARM_A5:
case ARCH_ARM_A15:
default:
isvce_init_function_ptr_neon_a9q(ps_codec);
break;
#else
default:
#endif
case ARCH_X86_GENERIC:
break;
}
}
/**
*******************************************************************************
*
* @brief Determine the architecture of the encoder executing environment
*
* @par Description: This routine returns the architecture of the enviro-
* ment in which the current encoder is being tested
*
* @param[in] void
*
* @returns IV_ARCH_T
* architecture
*
* @remarks none
*
*******************************************************************************
*/
IV_ARCH_T isvce_default_arch(void)
{
#if defined(ARMV8)
return ARCH_ARM_V8_NEON;
#elif !defined(DISABLE_NEON)
return ARCH_ARM_A9Q;
#else
return ARCH_GENERIC;
#endif
}

View file

@ -0,0 +1,270 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
*******************************************************************************
* @file
* isvce_function_selector_a9q.c
*
* @brief
* Contains functions to initialize function pointers of codec context
*
* @author
* Ittiam
*
* @par List of Functions:
* - isvce_init_function_ptr_generic
*
* @remarks
* None
*
*******************************************************************************
*/
/*****************************************************************************/
/* File Includes */
/*****************************************************************************/
/* System Include files */
#include <stdio.h>
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
/* User Include files */
#include "ih264_typedefs.h"
#include "iv2.h"
#include "ive2.h"
#include "isvc_defs.h"
#include "ih264_size_defs.h"
#include "isvce_defs.h"
#include "ih264e_error.h"
#include "ih264e_bitstream.h"
#include "ime_distortion_metrics.h"
#include "ime_defs.h"
#include "ime_structs.h"
#include "ih264_error.h"
#include "isvc_structs.h"
#include "isvc_trans_quant_itrans_iquant.h"
#include "ih264_inter_pred_filters.h"
#include "ih264_mem_fns.h"
#include "isvc_mem_fns.h"
#include "ih264_padding.h"
#include "ih264_intra_pred_filters.h"
#include "ih264_deblk_edge_filters.h"
#include "isvc_cabac_tables.h"
#include "irc_cntrl_param.h"
#include "irc_frame_info_collector.h"
#include "isvce_rate_control.h"
#include "isvce_cabac_structs.h"
#include "isvce_structs.h"
#include "ih264e_platform_macros.h"
#include "isvce_cabac.h"
#include "isvce_core_coding.h"
#include "ih264_cavlc_tables.h"
#include "isvce_cavlc.h"
#include "ih264e_intra_modes_eval.h"
#include "ih264e_fmt_conv.h"
#include "ih264e_half_pel.h"
/**
*******************************************************************************
*
* @brief Initialize the intra/inter/transform/deblk function pointers of
* codec context
*
* @par Description: the current routine initializes the function pointers of
* codec context basing on the architecture in use
*
* @param[in] ps_codec
* Codec context pointer
*
* @returns none
*
* @remarks none
*
*******************************************************************************
*/
void isvce_init_function_ptr_neon_a9q(isvce_codec_t *ps_codec)
{
WORD32 i = 0;
/* curr proc ctxt */
isvce_process_ctxt_t *ps_proc = NULL;
isvce_me_ctxt_t *ps_me_ctxt = NULL;
isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns;
enc_loop_fxns_t *ps_enc_loop_fxns = &ps_isa_dependent_fxns->s_enc_loop_fxns;
inter_pred_fxns_t *ps_inter_pred_fxns = &ps_isa_dependent_fxns->s_inter_pred_fxns;
mem_fxns_t *ps_mem_fxns = &ps_isa_dependent_fxns->s_mem_fxns;
/* Init function pointers for intra pred leaf level functions luma
* Intra 16x16 */
ps_codec->apf_intra_pred_16_l[0] = ih264_intra_pred_luma_16x16_mode_vert_a9q;
ps_codec->apf_intra_pred_16_l[1] = ih264_intra_pred_luma_16x16_mode_horz_a9q;
ps_codec->apf_intra_pred_16_l[2] = ih264_intra_pred_luma_16x16_mode_dc_a9q;
ps_codec->apf_intra_pred_16_l[3] = ih264_intra_pred_luma_16x16_mode_plane_a9q;
/* Init function pointers for intra pred leaf level functions luma
* Intra 4x4 */
ps_codec->apf_intra_pred_4_l[0] = ih264_intra_pred_luma_4x4_mode_vert_a9q;
ps_codec->apf_intra_pred_4_l[1] = ih264_intra_pred_luma_4x4_mode_horz_a9q;
ps_codec->apf_intra_pred_4_l[2] = ih264_intra_pred_luma_4x4_mode_dc_a9q;
ps_codec->apf_intra_pred_4_l[3] = ih264_intra_pred_luma_4x4_mode_diag_dl_a9q;
ps_codec->apf_intra_pred_4_l[4] = ih264_intra_pred_luma_4x4_mode_diag_dr_a9q;
ps_codec->apf_intra_pred_4_l[5] = ih264_intra_pred_luma_4x4_mode_vert_r_a9q;
ps_codec->apf_intra_pred_4_l[6] = ih264_intra_pred_luma_4x4_mode_horz_d_a9q;
ps_codec->apf_intra_pred_4_l[7] = ih264_intra_pred_luma_4x4_mode_vert_l_a9q;
ps_codec->apf_intra_pred_4_l[8] = ih264_intra_pred_luma_4x4_mode_horz_u_a9q;
/* Init function pointers for intra pred leaf level functions luma
* Intra 8x8 */
ps_codec->apf_intra_pred_8_l[0] = ih264_intra_pred_luma_8x8_mode_vert_a9q;
ps_codec->apf_intra_pred_8_l[2] = ih264_intra_pred_luma_8x8_mode_dc_a9q;
ps_codec->apf_intra_pred_8_l[3] = ih264_intra_pred_luma_8x8_mode_diag_dl_a9q;
ps_codec->apf_intra_pred_8_l[4] = ih264_intra_pred_luma_8x8_mode_diag_dr_a9q;
ps_codec->apf_intra_pred_8_l[5] = ih264_intra_pred_luma_8x8_mode_vert_r_a9q;
ps_codec->apf_intra_pred_8_l[6] = ih264_intra_pred_luma_8x8_mode_horz_d_a9q;
ps_codec->apf_intra_pred_8_l[7] = ih264_intra_pred_luma_8x8_mode_vert_l_a9q;
ps_codec->apf_intra_pred_8_l[8] = ih264_intra_pred_luma_8x8_mode_horz_u_a9q;
/* Init function pointers for intra pred leaf level functions chroma
* Intra 8x8 */
ps_codec->apf_intra_pred_c[0] = ih264_intra_pred_chroma_8x8_mode_dc_a9q;
ps_codec->apf_intra_pred_c[1] = ih264_intra_pred_chroma_8x8_mode_horz_a9q;
ps_codec->apf_intra_pred_c[2] = ih264_intra_pred_chroma_8x8_mode_vert_a9q;
ps_codec->apf_intra_pred_c[3] = ih264_intra_pred_chroma_8x8_mode_plane_a9q;
/* Init forward transform fn ptr */
ps_enc_loop_fxns->apf_resi_trans_quant_8x8[0] = isvc_resi_trans_quant_8x8;
ps_enc_loop_fxns->apf_resi_trans_quant_8x8[1] = isvc_resi_trans_quant_8x8;
ps_enc_loop_fxns->apf_resi_trans_quant_4x4[0] = isvc_resi_trans_quant_4x4_neon;
ps_enc_loop_fxns->apf_resi_trans_quant_4x4[1] =
isvc_resi_trans_quant_4x4_with_residual_sub_neon;
ps_enc_loop_fxns->apf_resi_trans_quant_chroma_4x4[0] = isvc_resi_trans_quant_chroma_4x4_neon;
ps_enc_loop_fxns->apf_resi_trans_quant_chroma_4x4[1] =
isvc_resi_trans_quant_chroma_4x4_with_residual_sub_neon;
/* Init inverse transform fn ptr */
ps_enc_loop_fxns->apf_iquant_itrans_recon_8x8[0] = isvc_iquant_itrans_recon_8x8;
ps_enc_loop_fxns->apf_iquant_itrans_recon_8x8[1] = isvc_iquant_itrans_recon_8x8;
ps_enc_loop_fxns->apf_iquant_itrans_recon_8x8[2] = isvc_iquant_itrans_recon_8x8;
ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4[0] =
isvc_iquant_itrans_recon_4x4_with_res_output_neon;
ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4[1] =
isvc_iquant_itrans_recon_4x4_with_res_accumulate_neon;
ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4[2] = isvc_iquant_itrans_recon_4x4_neon;
ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4_dc[0] =
isvc_iquant_itrans_recon_4x4_dc_with_res_output_neon;
ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4_dc[1] =
isvc_iquant_itrans_recon_4x4_dc_with_res_accumulate_neon;
ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4_dc[2] = isvc_iquant_itrans_recon_4x4_dc_neon;
ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4[0] =
isvc_iquant_itrans_recon_chroma_4x4_with_res_output_neon;
ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4[1] =
isvc_iquant_itrans_recon_chroma_4x4_with_res_accumulate_neon;
ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4[2] =
isvc_iquant_itrans_recon_chroma_4x4_neon;
ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4_dc[0] =
isvc_iquant_itrans_recon_chroma_4x4_dc_with_res_output_neon;
ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4_dc[1] =
isvc_iquant_itrans_recon_chroma_4x4_dc_with_res_accumulate_neon;
ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4_dc[2] =
isvc_iquant_itrans_recon_chroma_4x4_dc_neon;
ps_enc_loop_fxns->pf_ihadamard_scaling_4x4 = ih264_ihadamard_scaling_4x4_a9;
ps_enc_loop_fxns->pf_ihadamard_scaling_2x2_uv = ih264_ihadamard_scaling_2x2_uv_a9;
/* Init fn ptr luma core coding */
ps_enc_loop_fxns->apf_luma_energy_compaction[0] = isvce_code_luma_intra_macroblock_16x16;
ps_enc_loop_fxns->apf_luma_energy_compaction[1] = isvce_code_luma_intra_macroblock_4x4;
ps_enc_loop_fxns->apf_luma_energy_compaction[3] = isvce_code_luma_inter_macroblock_16x16;
/* Init fn ptr chroma core coding */
ps_enc_loop_fxns->apf_chroma_energy_compaction[0] = isvce_code_chroma_intra_macroblock_8x8;
ps_enc_loop_fxns->apf_chroma_energy_compaction[1] = isvce_code_chroma_inter_macroblock_8x8;
/* Init fn ptr luma deblocking */
ps_codec->pf_deblk_luma_vert_bs4 = ih264_deblk_luma_vert_bs4_a9;
ps_codec->pf_deblk_luma_vert_bslt4 = ih264_deblk_luma_vert_bslt4_a9;
ps_codec->pf_deblk_luma_horz_bs4 = ih264_deblk_luma_horz_bs4_a9;
ps_codec->pf_deblk_luma_horz_bslt4 = ih264_deblk_luma_horz_bslt4_a9;
/* Init fn ptr chroma deblocking */
ps_codec->pf_deblk_chroma_vert_bs4 = ih264_deblk_chroma_vert_bs4_a9;
ps_codec->pf_deblk_chroma_vert_bslt4 = ih264_deblk_chroma_vert_bslt4_a9;
ps_codec->pf_deblk_chroma_horz_bs4 = ih264_deblk_chroma_horz_bs4_a9;
ps_codec->pf_deblk_chroma_horz_bslt4 = ih264_deblk_chroma_horz_bslt4_a9;
/* write mb syntax layer */
ps_codec->pf_write_mb_syntax_layer[CAVLC][ISLICE] = isvce_write_islice_mb_cavlc;
ps_codec->pf_write_mb_syntax_layer[CAVLC][PSLICE] = isvce_write_pslice_mb_cavlc;
ps_codec->pf_write_mb_syntax_layer[CAVLC][BSLICE] = isvce_write_bslice_mb_cavlc;
ps_codec->pf_write_mb_syntax_layer[CABAC][ISLICE] = isvce_write_islice_mb_cabac;
ps_codec->pf_write_mb_syntax_layer[CABAC][PSLICE] = isvce_write_pslice_mb_cabac;
/* Padding Functions */
ps_codec->pf_pad_top = ih264_pad_top_a9q;
ps_codec->pf_pad_bottom = ih264_pad_bottom;
ps_codec->pf_pad_left_luma = ih264_pad_left_luma_a9q;
ps_codec->pf_pad_left_chroma = ih264_pad_left_chroma_a9q;
ps_codec->pf_pad_right_luma = ih264_pad_right_luma_a9q;
ps_codec->pf_pad_right_chroma = ih264_pad_right_chroma_a9q;
/* Inter pred leaf level functions */
ps_inter_pred_fxns->pf_inter_pred_luma_copy = ih264_inter_pred_luma_copy_a9q;
ps_inter_pred_fxns->pf_inter_pred_luma_horz = ih264_inter_pred_luma_horz_a9q;
ps_inter_pred_fxns->pf_inter_pred_luma_vert = ih264_inter_pred_luma_vert_a9q;
ps_inter_pred_fxns->pf_inter_pred_luma_bilinear = ih264_inter_pred_luma_bilinear_a9q;
ps_inter_pred_fxns->pf_inter_pred_chroma = ih264_inter_pred_chroma_a9q;
/* sad me level functions */
ps_codec->apf_compute_sad_16x16[0] = ime_compute_sad_16x16_a9q;
ps_codec->apf_compute_sad_16x16[1] = ime_compute_sad_16x16_fast_a9q;
ps_codec->pf_compute_sad_16x8 = ime_compute_sad_16x8_a9q;
/* memor handling operations */
ps_mem_fxns->pf_mem_cpy = ih264_memcpy_a9q;
ps_mem_fxns->pf_mem_cpy_mul8 = ih264_memcpy_mul_8_a9q;
ps_mem_fxns->pf_mem_set = ih264_memset_a9q;
ps_mem_fxns->pf_mem_set_mul8 = ih264_memset_mul_8_a9q;
/* sad me level functions */
for(i = 0; i < (MAX_PROCESS_CTXT); i++)
{
ps_proc = &ps_codec->as_process[i];
ps_me_ctxt = &ps_proc->s_me_ctxt;
ps_me_ctxt->pf_ime_compute_sad_16x16[0] = ime_compute_sad_16x16_a9q;
ps_me_ctxt->pf_ime_compute_sad_16x16[1] = ime_compute_sad_16x16_fast_a9q;
ps_me_ctxt->pf_ime_compute_sad_16x8 = ime_compute_sad_16x8_a9q;
ps_me_ctxt->pf_ime_compute_sad4_diamond = ime_calculate_sad4_prog_a9q;
ps_me_ctxt->pf_ime_compute_sad3_diamond = ime_calculate_sad3_prog_a9q;
ps_me_ctxt->pf_ime_compute_sad2_diamond = ime_calculate_sad2_prog_a9q;
ps_me_ctxt->pf_ime_sub_pel_compute_sad_16x16 = ime_sub_pel_compute_sad_16x16_a9q;
ps_me_ctxt->pf_ime_compute_sad_stat_luma_16x16 = ime_compute_satqd_16x16_lumainter_a9q;
}
/* intra mode eval -encoder level function */
ps_codec->pf_ih264e_evaluate_intra16x16_modes = ih264e_evaluate_intra16x16_modes_a9q;
ps_codec->pf_ih264e_evaluate_intra_chroma_modes = ih264e_evaluate_intra_chroma_modes_a9q;
ps_codec->pf_ih264e_evaluate_intra_4x4_modes = ih264e_evaluate_intra_4x4_modes_a9q;
}

View file

@ -0,0 +1,278 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
*******************************************************************************
* @file
* isvce_function_selector_av8.c
*
* @brief
* Contains functions to initialize function pointers of codec context
*
* @author
* Ittiam
*
* @par List of Functions:
* - isvce_init_function_ptr_generic
*
* @remarks
* None
*
*******************************************************************************
*/
/*****************************************************************************/
/* File Includes */
/*****************************************************************************/
/* System Include files */
#include <stdio.h>
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
/* User Include files */
#include "ih264_typedefs.h"
#include "iv2.h"
#include "ive2.h"
#include "isvc_defs.h"
#include "ih264_size_defs.h"
#include "isvce_defs.h"
#include "ih264e_error.h"
#include "ih264e_bitstream.h"
#include "ime_distortion_metrics.h"
#include "ime_defs.h"
#include "ime_structs.h"
#include "ih264_error.h"
#include "isvc_structs.h"
#include "isvc_trans_quant_itrans_iquant.h"
#include "ih264_inter_pred_filters.h"
#include "ih264_mem_fns.h"
#include "isvc_mem_fns.h"
#include "ih264_padding.h"
#include "ih264_intra_pred_filters.h"
#include "ih264_deblk_edge_filters.h"
#include "isvc_cabac_tables.h"
#include "irc_cntrl_param.h"
#include "irc_frame_info_collector.h"
#include "isvce_rate_control.h"
#include "isvce_cabac_structs.h"
#include "isvce_structs.h"
#include "ih264e_platform_macros.h"
#include "isvce_cabac.h"
#include "isvce_core_coding.h"
#include "ih264_cavlc_tables.h"
#include "isvce_cavlc.h"
#include "ih264e_intra_modes_eval.h"
#include "ih264e_fmt_conv.h"
#include "ih264e_half_pel.h"
/**
*******************************************************************************
*
* @brief Initialize the intra/inter/transform/deblk function pointers of
* codec context
*
* @par Description: the current routine initializes the function pointers of
* codec context basing on the architecture in use
*
* @param[in] ps_codec
* Codec context pointer
*
* @returns none
*
* @remarks none
*
*******************************************************************************
*/
void isvce_init_function_ptr_neon_av8(isvce_codec_t *ps_codec)
{
WORD32 i = 0;
/* curr proc ctxt */
isvce_process_ctxt_t *ps_proc = NULL;
isvce_me_ctxt_t *ps_me_ctxt = NULL;
isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns;
enc_loop_fxns_t *ps_enc_loop_fxns = &ps_isa_dependent_fxns->s_enc_loop_fxns;
inter_pred_fxns_t *ps_inter_pred_fxns = &ps_isa_dependent_fxns->s_inter_pred_fxns;
mem_fxns_t *ps_mem_fxns = &ps_isa_dependent_fxns->s_mem_fxns;
/* Init function pointers for intra pred leaf level functions luma
* Intra 16x16 */
ps_codec->apf_intra_pred_16_l[0] = ih264_intra_pred_luma_16x16_mode_vert_av8;
ps_codec->apf_intra_pred_16_l[1] = ih264_intra_pred_luma_16x16_mode_horz_av8;
ps_codec->apf_intra_pred_16_l[2] = ih264_intra_pred_luma_16x16_mode_dc_av8;
ps_codec->apf_intra_pred_16_l[3] = ih264_intra_pred_luma_16x16_mode_plane_av8;
/* Init function pointers for intra pred leaf level functions luma
* Intra 4x4 */
ps_codec->apf_intra_pred_4_l[0] = ih264_intra_pred_luma_4x4_mode_vert_av8;
ps_codec->apf_intra_pred_4_l[1] = ih264_intra_pred_luma_4x4_mode_horz_av8;
ps_codec->apf_intra_pred_4_l[2] = ih264_intra_pred_luma_4x4_mode_dc_av8;
ps_codec->apf_intra_pred_4_l[3] = ih264_intra_pred_luma_4x4_mode_diag_dl_av8;
ps_codec->apf_intra_pred_4_l[4] = ih264_intra_pred_luma_4x4_mode_diag_dr_av8;
ps_codec->apf_intra_pred_4_l[5] = ih264_intra_pred_luma_4x4_mode_vert_r_av8;
ps_codec->apf_intra_pred_4_l[6] = ih264_intra_pred_luma_4x4_mode_horz_d_av8;
ps_codec->apf_intra_pred_4_l[7] = ih264_intra_pred_luma_4x4_mode_vert_l_av8;
ps_codec->apf_intra_pred_4_l[8] = ih264_intra_pred_luma_4x4_mode_horz_u_av8;
/* Init function pointers for intra pred leaf level functions luma
* Intra 8x8 */
ps_codec->apf_intra_pred_8_l[0] = ih264_intra_pred_luma_8x8_mode_vert_av8;
ps_codec->apf_intra_pred_8_l[2] = ih264_intra_pred_luma_8x8_mode_dc_av8;
ps_codec->apf_intra_pred_8_l[3] = ih264_intra_pred_luma_8x8_mode_diag_dl_av8;
ps_codec->apf_intra_pred_8_l[4] = ih264_intra_pred_luma_8x8_mode_diag_dr_av8;
ps_codec->apf_intra_pred_8_l[5] = ih264_intra_pred_luma_8x8_mode_vert_r_av8;
ps_codec->apf_intra_pred_8_l[6] = ih264_intra_pred_luma_8x8_mode_horz_d_av8;
ps_codec->apf_intra_pred_8_l[7] = ih264_intra_pred_luma_8x8_mode_vert_l_av8;
ps_codec->apf_intra_pred_8_l[8] = ih264_intra_pred_luma_8x8_mode_horz_u_av8;
/* Init function pointers for intra pred leaf level functions chroma
* Intra 8x8 */
ps_codec->apf_intra_pred_c[0] = ih264_intra_pred_chroma_8x8_mode_dc_av8;
ps_codec->apf_intra_pred_c[1] = ih264_intra_pred_chroma_8x8_mode_horz_av8;
ps_codec->apf_intra_pred_c[2] = ih264_intra_pred_chroma_8x8_mode_vert_av8;
ps_codec->apf_intra_pred_c[3] = ih264_intra_pred_chroma_8x8_mode_plane_av8;
/* Init forward transform fn ptr */
ps_enc_loop_fxns->apf_resi_trans_quant_8x8[0] = isvc_resi_trans_quant_8x8;
ps_enc_loop_fxns->apf_resi_trans_quant_8x8[1] = isvc_resi_trans_quant_8x8;
ps_enc_loop_fxns->apf_resi_trans_quant_4x4[0] = isvc_resi_trans_quant_4x4_neon;
ps_enc_loop_fxns->apf_resi_trans_quant_4x4[1] =
isvc_resi_trans_quant_4x4_with_residual_sub_neon;
ps_enc_loop_fxns->apf_resi_trans_quant_chroma_4x4[0] = isvc_resi_trans_quant_chroma_4x4_neon;
ps_enc_loop_fxns->apf_resi_trans_quant_chroma_4x4[1] =
isvc_resi_trans_quant_chroma_4x4_with_residual_sub_neon;
/* Init inverse transform fn ptr */
ps_enc_loop_fxns->apf_iquant_itrans_recon_8x8[0] = isvc_iquant_itrans_recon_8x8;
ps_enc_loop_fxns->apf_iquant_itrans_recon_8x8[1] = isvc_iquant_itrans_recon_8x8;
ps_enc_loop_fxns->apf_iquant_itrans_recon_8x8[2] = isvc_iquant_itrans_recon_8x8;
ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4[0] =
isvc_iquant_itrans_recon_4x4_with_res_output_neon;
ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4[1] =
isvc_iquant_itrans_recon_4x4_with_res_accumulate_neon;
ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4[2] = isvc_iquant_itrans_recon_4x4_neon;
ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4_dc[0] =
isvc_iquant_itrans_recon_4x4_dc_with_res_output_neon;
ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4_dc[1] =
isvc_iquant_itrans_recon_4x4_dc_with_res_accumulate_neon;
ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4_dc[2] = isvc_iquant_itrans_recon_4x4_dc_neon;
ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4[0] =
isvc_iquant_itrans_recon_chroma_4x4_with_res_output_neon;
ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4[1] =
isvc_iquant_itrans_recon_chroma_4x4_with_res_accumulate_neon;
ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4[2] =
isvc_iquant_itrans_recon_chroma_4x4_neon;
ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4_dc[0] =
isvc_iquant_itrans_recon_chroma_4x4_dc_with_res_output_neon;
ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4_dc[1] =
isvc_iquant_itrans_recon_chroma_4x4_dc_with_res_accumulate_neon;
ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4_dc[2] =
isvc_iquant_itrans_recon_chroma_4x4_dc_neon;
ps_enc_loop_fxns->pf_ihadamard_scaling_4x4 = ih264_ihadamard_scaling_4x4_av8;
ps_enc_loop_fxns->pf_ihadamard_scaling_2x2_uv = ih264_ihadamard_scaling_2x2_uv_av8;
/* Init fn ptr luma core coding */
ps_enc_loop_fxns->apf_luma_energy_compaction[0] = isvce_code_luma_intra_macroblock_16x16;
ps_enc_loop_fxns->apf_luma_energy_compaction[1] = isvce_code_luma_intra_macroblock_4x4;
ps_enc_loop_fxns->apf_luma_energy_compaction[3] = isvce_code_luma_inter_macroblock_16x16;
/* Init fn ptr chroma core coding */
ps_enc_loop_fxns->apf_chroma_energy_compaction[0] = isvce_code_chroma_intra_macroblock_8x8;
ps_enc_loop_fxns->apf_chroma_energy_compaction[1] = isvce_code_chroma_inter_macroblock_8x8;
/* Init fn ptr luma deblocking */
ps_codec->pf_deblk_luma_vert_bs4 = ih264_deblk_luma_vert_bs4_av8;
ps_codec->pf_deblk_luma_vert_bslt4 = ih264_deblk_luma_vert_bslt4_av8;
ps_codec->pf_deblk_luma_horz_bs4 = ih264_deblk_luma_horz_bs4_av8;
ps_codec->pf_deblk_luma_horz_bslt4 = ih264_deblk_luma_horz_bslt4_av8;
/* Init fn ptr chroma deblocking */
ps_codec->pf_deblk_chroma_vert_bs4 = ih264_deblk_chroma_vert_bs4_av8;
ps_codec->pf_deblk_chroma_vert_bslt4 = ih264_deblk_chroma_vert_bslt4_av8;
ps_codec->pf_deblk_chroma_horz_bs4 = ih264_deblk_chroma_horz_bs4_av8;
ps_codec->pf_deblk_chroma_horz_bslt4 = ih264_deblk_chroma_horz_bslt4_av8;
/* write mb syntax layer */
ps_codec->pf_write_mb_syntax_layer[CAVLC][ISLICE] = isvce_write_islice_mb_cavlc;
ps_codec->pf_write_mb_syntax_layer[CAVLC][PSLICE] = isvce_write_pslice_mb_cavlc;
ps_codec->pf_write_mb_syntax_layer[CAVLC][BSLICE] = isvce_write_bslice_mb_cavlc;
ps_codec->pf_write_mb_syntax_layer[CABAC][ISLICE] = isvce_write_islice_mb_cabac;
ps_codec->pf_write_mb_syntax_layer[CABAC][PSLICE] = isvce_write_pslice_mb_cabac;
/* Padding Functions */
ps_codec->pf_pad_top = ih264_pad_top_av8;
ps_codec->pf_pad_bottom = ih264_pad_bottom;
ps_codec->pf_pad_left_luma = ih264_pad_left_luma_av8;
ps_codec->pf_pad_left_chroma = ih264_pad_left_chroma_av8;
ps_codec->pf_pad_right_luma = ih264_pad_right_luma_av8;
ps_codec->pf_pad_right_chroma = ih264_pad_right_chroma_av8;
/* Inter pred leaf level functions */
ps_inter_pred_fxns->pf_inter_pred_luma_copy = ih264_inter_pred_luma_copy_av8;
ps_inter_pred_fxns->pf_inter_pred_luma_horz = ih264_inter_pred_luma_horz_av8;
ps_inter_pred_fxns->pf_inter_pred_luma_vert = ih264_inter_pred_luma_vert_av8;
ps_inter_pred_fxns->pf_inter_pred_luma_bilinear = ih264_inter_pred_luma_bilinear;
ps_inter_pred_fxns->pf_inter_pred_chroma = ih264_inter_pred_chroma_av8;
/* sad me level functions */
ps_codec->apf_compute_sad_16x16[0] = ime_compute_sad_16x16_av8;
ps_codec->apf_compute_sad_16x16[1] = ime_compute_sad_16x16_fast_av8;
ps_codec->pf_compute_sad_16x8 = ime_compute_sad_16x8_av8;
/* memor handling operations */
ps_mem_fxns->pf_mem_cpy = ih264_memcpy_av8;
ps_mem_fxns->pf_mem_cpy_mul8 = ih264_memcpy_mul_8_av8;
ps_mem_fxns->pf_mem_set = ih264_memset_av8;
ps_mem_fxns->pf_mem_set_mul8 = ih264_memset_mul_8_av8;
/* sad me level functions */
for(i = 0; i < (MAX_PROCESS_CTXT); i++)
{
ps_proc = &ps_codec->as_process[i];
ps_me_ctxt = &ps_proc->s_me_ctxt;
ps_me_ctxt->pf_ime_compute_sad_16x16[0] = ime_compute_sad_16x16_av8;
ps_me_ctxt->pf_ime_compute_sad_16x16[1] = ime_compute_sad_16x16_fast_av8;
ps_me_ctxt->pf_ime_compute_sad_16x8 = ime_compute_sad_16x8_av8;
ps_me_ctxt->pf_ime_compute_sad4_diamond = ime_calculate_sad4_prog_av8;
ps_me_ctxt->pf_ime_compute_sad3_diamond = ime_calculate_sad3_prog_av8;
ps_me_ctxt->pf_ime_compute_sad2_diamond = ime_calculate_sad2_prog_av8;
ps_me_ctxt->pf_ime_sub_pel_compute_sad_16x16 = ime_sub_pel_compute_sad_16x16_av8;
ps_me_ctxt->pf_ime_compute_sad_stat_luma_16x16 = ime_compute_satqd_16x16_lumainter_av8;
}
/* intra mode eval -encoder level function */
ps_codec->pf_ih264e_evaluate_intra16x16_modes = ih264e_evaluate_intra16x16_modes_av8;
ps_codec->pf_ih264e_evaluate_intra_chroma_modes = ih264e_evaluate_intra_chroma_modes_av8;
ps_codec->pf_ih264e_evaluate_intra_4x4_modes = ih264e_evaluate_intra_4x4_modes;
/* csc */
ps_codec->pf_ih264e_conv_420p_to_420sp = ih264e_fmt_conv_420p_to_420sp;
ps_codec->pf_ih264e_fmt_conv_422i_to_420sp = ih264e_fmt_conv_422i_to_420sp;
/* Halp pel generation function - encoder level*/
ps_codec->pf_ih264e_sixtapfilter_horz = ih264e_sixtapfilter_horz_av8;
ps_codec->pf_ih264e_sixtap_filter_2dvh_vert = ih264e_sixtap_filter_2dvh_vert_av8;
}

View file

@ -0,0 +1,139 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
*******************************************************************************
* @file
* ih264e_platform_macros.h
*
* @brief
* Contains platform specific routines used for codec context intialization
*
* @author
* ittiam
*
* @remarks
* none
*
*******************************************************************************
*/
#ifndef _ISVCE_PLATFORM_MACROS_H_
#define _ISVCE_PLATFORM_MACROS_H_
/**
*******************************************************************************
*
* @brief Initialize the intra/inter/transform/deblk function pointers of
* codec context
*
* @par Description: the current routine initializes the function pointers of
* codec context basing on the architecture in use
*
* @param[in] ps_codec
* Codec context pointer
*
* @returns none
*
* @remarks none
*
*******************************************************************************
*/
void isvce_init_function_ptr_neon_a9q(isvce_codec_t *ps_codec);
/**
*******************************************************************************
*
* @brief Initialize the intra/inter/transform/deblk function pointers of
* codec context
*
* @par Description: the current routine initializes the function pointers of
* codec context basing on the architecture in use
*
* @param[in] ps_codec
* Codec context pointer
*
* @returns none
*
* @remarks none
*
*******************************************************************************
*/
void isvce_init_function_ptr_neon_av8(isvce_codec_t *ps_codec);
/**
*******************************************************************************
*
* @brief Initialize the intra/inter/transform/deblk function pointers of
* codec context
*
* @par Description: the current routine initializes the function pointers of
* codec context basing on the architecture in use
*
* @param[in] ps_codec
* Codec context pointer
*
* @returns none
*
* @remarks none
*
*******************************************************************************
*/
void isvce_init_function_ptr_generic(isvce_codec_t *ps_codec);
/**
*******************************************************************************
*
* @brief Initialize the intra/inter/transform/deblk function pointers of
* codec context
*
* @par Description: the current routine initializes the function pointers of
* codec context basing on the architecture in use
*
* @param[in] ps_codec
* Codec context pointer
*
* @returns none
*
* @remarks none
*
*******************************************************************************
*/
void isvce_init_function_ptr(void *pv_codec);
/**
*******************************************************************************
*
* @brief Determine the architecture of the encoder executing environment
*
* @par Description: This routine returns the architecture of the enviro-
* ment in which the current encoder is being tested
*
* @param[in] void
*
* @returns IV_ARCH_T
* architecture
*
* @remarks none
*
*******************************************************************************
*/
IV_ARCH_T isvce_default_arch(void);
#endif

View file

@ -0,0 +1,625 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
******************************************************************************
* @file isvce_svc_rc_utils_neon.c
*
* @brief
* This file contains the neom SIMD version of the function which computes
* gradient per pixel value being used in Init Qp
*
* @author
* Ittiam
*
* @par List of Functions:
* - isvce_get_gpp_neon()
*
* @remarks
* None
*
*******************************************************************************
*/
#include <arm_neon.h>
#include "ih264_typedefs.h"
#include "ih264_debug.h"
#include "isvc_structs.h"
#include "isvce_rc_utils_private_defs.h"
/**
*******************************************************************************
*
* @brief
* get gpp function
*
* @par Description:
* computes gradient per pixel value for a given frame
*
* @param[in] ps_input_buf
* pointer to yuv buffer properties
*
* @returns
* calculated gpp value
*
* @remarks
* none
*
*******************************************************************************
*/
DOUBLE isvce_get_gpp_neon(yuv_buf_props_t *ps_input_buf)
{
UWORD8 *pu1_input_buf;
UWORD32 i, j, k;
UWORD32 u4_width, u4_height, i4_input_stride;
DOUBLE d_gpp_y, d_gpp_u, d_gpp_v, d_gpp;
uint8x8_t reg_8x8_src_r0, reg_8x8_src_r1, reg_8x8_src_r2, reg_8x8_src_r3, reg_8x8_src_r4,
reg_8x8_src_r5, reg_8x8_src_r6, reg_8x8_src_r7, reg_8x8_src_r8;
uint8x8_t reg_8x8_src_right_r0, reg_8x8_src_right_r1, reg_8x8_src_right_r2,
reg_8x8_src_right_r3, reg_8x8_src_right_r4, reg_8x8_src_right_r5, reg_8x8_src_right_r6,
reg_8x8_src_right_r7;
uint16x8_t reg_16x8_abs_diff_y, reg_16x8_abs_diff_uv;
uint64x2_t reg_64x2_gpp_y, reg_64x2_gpp_uv;
uint8x8_t reg_8x8_shuffle = {0, 2, 4, 6, 1, 3, 5, 7};
uint16x8_t reg_16x8_and_mask_y = {0xffff, 0xffff, 0xffff, 0xffff,
0xffff, 0xffff, 0xffff, 0x0000};
uint16x8_t reg_16x8_and_mask_uv = {0xffff, 0xffff, 0xffff, 0x0000,
0xffff, 0xffff, 0xffff, 0x0000};
uint32x4_t reg_32x4_abs_diff_hadd_y = vdupq_n_u32(0);
uint32x4_t reg_32x4_abs_diff_hadd_uv = vdupq_n_u32(0);
d_gpp_y = 0;
d_gpp_u = 0;
d_gpp_v = 0;
d_gpp = 0;
pu1_input_buf = (UWORD8 *) ps_input_buf->as_component_bufs[0].pv_data;
i4_input_stride = ps_input_buf->as_component_bufs[0].i4_data_stride;
u4_width = ps_input_buf->u4_width;
u4_height = ps_input_buf->u4_height;
ASSERT((u4_width % 8) == 0);
/***********************************************************/
/* For Luma - */
/* This code block calculates gpp value for luma by adding */
/* the absolute difference between the current pixel and */
/* it's immediate right pixel with the absolute difference */
/* between the current pixel and it's immediate bottom */
/* pixel and accumulating for every pixel in the frame. */
/***********************************************************/
/* -8 in the checks below since right column and bottow row being used for gradients, */
/* and last row and column are ignored for gradient computation. */
/* Note that input is not required to be padded */
for(i = 0; i < u4_height - 8; i += 8)
{
for(j = 0; j < u4_width - 8; j += 8)
{
reg_8x8_src_r0 = vld1_u8(pu1_input_buf + j);
reg_8x8_src_r1 = vld1_u8(pu1_input_buf + i4_input_stride + j);
reg_8x8_src_r2 = vld1_u8(pu1_input_buf + (i4_input_stride * 2) + j);
reg_8x8_src_r3 = vld1_u8(pu1_input_buf + (i4_input_stride * 3) + j);
reg_8x8_src_r4 = vld1_u8(pu1_input_buf + (i4_input_stride * 4) + j);
reg_8x8_src_r5 = vld1_u8(pu1_input_buf + (i4_input_stride * 5) + j);
reg_8x8_src_r6 = vld1_u8(pu1_input_buf + (i4_input_stride * 6) + j);
reg_8x8_src_r7 = vld1_u8(pu1_input_buf + (i4_input_stride * 7) + j);
reg_8x8_src_r8 = vld1_u8(pu1_input_buf + (i4_input_stride * 8) + j);
reg_8x8_src_right_r0 = vld1_u8(pu1_input_buf + j + 1);
reg_8x8_src_right_r1 = vld1_u8(pu1_input_buf + i4_input_stride + j + 1);
reg_8x8_src_right_r2 = vld1_u8(pu1_input_buf + (i4_input_stride * 2) + j + 1);
reg_8x8_src_right_r3 = vld1_u8(pu1_input_buf + (i4_input_stride * 3) + j + 1);
reg_8x8_src_right_r4 = vld1_u8(pu1_input_buf + (i4_input_stride * 4) + j + 1);
reg_8x8_src_right_r5 = vld1_u8(pu1_input_buf + (i4_input_stride * 5) + j + 1);
reg_8x8_src_right_r6 = vld1_u8(pu1_input_buf + (i4_input_stride * 6) + j + 1);
reg_8x8_src_right_r7 = vld1_u8(pu1_input_buf + (i4_input_stride * 7) + j + 1);
reg_16x8_abs_diff_y = vabdl_u8(reg_8x8_src_r0, reg_8x8_src_r1);
reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r1, reg_8x8_src_r2);
reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r2, reg_8x8_src_r3);
reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r3, reg_8x8_src_r4);
reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r4, reg_8x8_src_r5);
reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r5, reg_8x8_src_r6);
reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r6, reg_8x8_src_r7);
reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r7, reg_8x8_src_r8);
reg_16x8_abs_diff_y =
vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r0, reg_8x8_src_right_r0);
reg_16x8_abs_diff_y =
vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r1, reg_8x8_src_right_r1);
reg_16x8_abs_diff_y =
vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r2, reg_8x8_src_right_r2);
reg_16x8_abs_diff_y =
vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r3, reg_8x8_src_right_r3);
reg_16x8_abs_diff_y =
vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r4, reg_8x8_src_right_r4);
reg_16x8_abs_diff_y =
vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r5, reg_8x8_src_right_r5);
reg_16x8_abs_diff_y =
vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r6, reg_8x8_src_right_r6);
reg_16x8_abs_diff_y =
vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r7, reg_8x8_src_right_r7);
reg_32x4_abs_diff_hadd_y = vpadalq_u16(reg_32x4_abs_diff_hadd_y, reg_16x8_abs_diff_y);
}
/************************************************************/
/* Remaining width - */
/* Since Last pixel is not getting processed, remaining 7 */
/* pixels are getting processed separately by performing */
/* and operations with reg_16x8_and_mask_y */
/************************************************************/
ASSERT((u4_width - j) == 8);
reg_8x8_src_r0 = vld1_u8(pu1_input_buf + j);
reg_8x8_src_r1 = vld1_u8(pu1_input_buf + i4_input_stride + j);
reg_8x8_src_r2 = vld1_u8(pu1_input_buf + (i4_input_stride * 2) + j);
reg_8x8_src_r3 = vld1_u8(pu1_input_buf + (i4_input_stride * 3) + j);
reg_8x8_src_r4 = vld1_u8(pu1_input_buf + (i4_input_stride * 4) + j);
reg_8x8_src_r5 = vld1_u8(pu1_input_buf + (i4_input_stride * 5) + j);
reg_8x8_src_r6 = vld1_u8(pu1_input_buf + (i4_input_stride * 6) + j);
reg_8x8_src_r7 = vld1_u8(pu1_input_buf + (i4_input_stride * 7) + j);
reg_8x8_src_r8 = vld1_u8(pu1_input_buf + (i4_input_stride * 8) + j);
reg_8x8_src_right_r0 = vext_u8(reg_8x8_src_r0, reg_8x8_src_r0, 1);
reg_8x8_src_right_r1 = vext_u8(reg_8x8_src_r1, reg_8x8_src_r1, 1);
reg_8x8_src_right_r2 = vext_u8(reg_8x8_src_r2, reg_8x8_src_r2, 1);
reg_8x8_src_right_r3 = vext_u8(reg_8x8_src_r3, reg_8x8_src_r3, 1);
reg_8x8_src_right_r4 = vext_u8(reg_8x8_src_r4, reg_8x8_src_r4, 1);
reg_8x8_src_right_r5 = vext_u8(reg_8x8_src_r5, reg_8x8_src_r5, 1);
reg_8x8_src_right_r6 = vext_u8(reg_8x8_src_r6, reg_8x8_src_r6, 1);
reg_8x8_src_right_r7 = vext_u8(reg_8x8_src_r7, reg_8x8_src_r7, 1);
reg_16x8_abs_diff_y = vabdl_u8(reg_8x8_src_r0, reg_8x8_src_r1);
reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r1, reg_8x8_src_r2);
reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r2, reg_8x8_src_r3);
reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r3, reg_8x8_src_r4);
reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r4, reg_8x8_src_r5);
reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r5, reg_8x8_src_r6);
reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r6, reg_8x8_src_r7);
reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r7, reg_8x8_src_r8);
reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r0, reg_8x8_src_right_r0);
reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r1, reg_8x8_src_right_r1);
reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r2, reg_8x8_src_right_r2);
reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r3, reg_8x8_src_right_r3);
reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r4, reg_8x8_src_right_r4);
reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r5, reg_8x8_src_right_r5);
reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r6, reg_8x8_src_right_r6);
reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r7, reg_8x8_src_right_r7);
reg_16x8_abs_diff_y = vandq_u16(reg_16x8_abs_diff_y, reg_16x8_and_mask_y);
reg_32x4_abs_diff_hadd_y = vpadalq_u16(reg_32x4_abs_diff_hadd_y, reg_16x8_abs_diff_y);
pu1_input_buf += (i4_input_stride * 8);
}
/* Loop for remaining height less than 8 */
/* 4 <= remaining_height < 8 */
for(k = i; k < u4_height - 4; k += 4, i += 4)
{
for(j = 0; j < u4_width - 8; j += 8)
{
reg_8x8_src_r0 = vld1_u8(pu1_input_buf + j);
reg_8x8_src_r1 = vld1_u8(pu1_input_buf + i4_input_stride + j);
reg_8x8_src_r2 = vld1_u8(pu1_input_buf + (i4_input_stride * 2) + j);
reg_8x8_src_r3 = vld1_u8(pu1_input_buf + (i4_input_stride * 3) + j);
reg_8x8_src_r4 = vld1_u8(pu1_input_buf + (i4_input_stride * 4) + j);
reg_8x8_src_right_r0 = vld1_u8(pu1_input_buf + j + 1);
reg_8x8_src_right_r1 = vld1_u8(pu1_input_buf + i4_input_stride + j + 1);
reg_8x8_src_right_r2 = vld1_u8(pu1_input_buf + (i4_input_stride * 2) + j + 1);
reg_8x8_src_right_r3 = vld1_u8(pu1_input_buf + (i4_input_stride * 3) + j + 1);
reg_16x8_abs_diff_y = vabdl_u8(reg_8x8_src_r0, reg_8x8_src_r1);
reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r1, reg_8x8_src_r2);
reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r2, reg_8x8_src_r3);
reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r3, reg_8x8_src_r4);
reg_16x8_abs_diff_y =
vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r0, reg_8x8_src_right_r0);
reg_16x8_abs_diff_y =
vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r1, reg_8x8_src_right_r1);
reg_16x8_abs_diff_y =
vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r2, reg_8x8_src_right_r2);
reg_16x8_abs_diff_y =
vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r3, reg_8x8_src_right_r3);
reg_32x4_abs_diff_hadd_y = vpadalq_u16(reg_32x4_abs_diff_hadd_y, reg_16x8_abs_diff_y);
}
/************************************************************/
/* Remaining width - */
/* Since Last pixel is not getting processed, remaining 7 */
/* pixels are getting processed separately by performing */
/* and operations with reg_16x8_and_mask_y */
/************************************************************/
ASSERT((u4_width - j) == 8);
reg_8x8_src_r0 = vld1_u8(pu1_input_buf + j);
reg_8x8_src_r1 = vld1_u8(pu1_input_buf + i4_input_stride + j);
reg_8x8_src_r2 = vld1_u8(pu1_input_buf + (i4_input_stride * 2) + j);
reg_8x8_src_r3 = vld1_u8(pu1_input_buf + (i4_input_stride * 3) + j);
reg_8x8_src_r4 = vld1_u8(pu1_input_buf + (i4_input_stride * 4) + j);
reg_8x8_src_right_r0 = vext_u8(reg_8x8_src_r0, reg_8x8_src_r0, 1);
reg_8x8_src_right_r1 = vext_u8(reg_8x8_src_r1, reg_8x8_src_r1, 1);
reg_8x8_src_right_r2 = vext_u8(reg_8x8_src_r2, reg_8x8_src_r2, 1);
reg_8x8_src_right_r3 = vext_u8(reg_8x8_src_r3, reg_8x8_src_r3, 1);
reg_16x8_abs_diff_y = vabdl_u8(reg_8x8_src_r0, reg_8x8_src_r1);
reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r1, reg_8x8_src_r2);
reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r2, reg_8x8_src_r3);
reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r3, reg_8x8_src_r4);
reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r0, reg_8x8_src_right_r0);
reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r1, reg_8x8_src_right_r1);
reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r2, reg_8x8_src_right_r2);
reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r3, reg_8x8_src_right_r3);
reg_16x8_abs_diff_y = vandq_u16(reg_16x8_abs_diff_y, reg_16x8_and_mask_y);
reg_32x4_abs_diff_hadd_y = vpadalq_u16(reg_32x4_abs_diff_hadd_y, reg_16x8_abs_diff_y);
pu1_input_buf += (i4_input_stride * 4);
}
/* Loop for remaining height less than 4 */
/* 0 <= remaining_height < 4 */
for(k = i; k < u4_height - 1; k++)
{
for(j = 0; j < u4_width - 8; j += 8)
{
reg_8x8_src_r0 = vld1_u8(pu1_input_buf + j);
reg_8x8_src_r1 = vld1_u8(pu1_input_buf + i4_input_stride + j);
reg_8x8_src_right_r0 = vld1_u8(pu1_input_buf + j + 1);
reg_16x8_abs_diff_y = vabdl_u8(reg_8x8_src_r0, reg_8x8_src_r1);
reg_16x8_abs_diff_y =
vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r0, reg_8x8_src_right_r0);
reg_32x4_abs_diff_hadd_y = vpadalq_u16(reg_32x4_abs_diff_hadd_y, reg_16x8_abs_diff_y);
}
/************************************************************/
/* Remaining width - */
/* Since Last pixel is not getting processed, remaining 7 */
/* pixels are getting processed separately by performing */
/* and operations with reg_16x8_and_mask_y */
/************************************************************/
reg_8x8_src_r0 = vld1_u8(pu1_input_buf + j);
reg_8x8_src_r1 = vld1_u8(pu1_input_buf + i4_input_stride + j);
reg_8x8_src_right_r0 = vext_u8(reg_8x8_src_r0, reg_8x8_src_r0, 1);
reg_16x8_abs_diff_y = vabdl_u8(reg_8x8_src_r0, reg_8x8_src_r1);
reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r0, reg_8x8_src_right_r0);
reg_16x8_abs_diff_y = vandq_u16(reg_16x8_abs_diff_y, reg_16x8_and_mask_y);
reg_32x4_abs_diff_hadd_y = vpadalq_u16(reg_32x4_abs_diff_hadd_y, reg_16x8_abs_diff_y);
pu1_input_buf += i4_input_stride;
}
/* Pairwise add reg_32x4_abs_diff_hadd_y to get final gpp value */
reg_64x2_gpp_y = vpaddlq_u32(reg_32x4_abs_diff_hadd_y);
d_gpp_y = vgetq_lane_u64(reg_64x2_gpp_y, 0);
d_gpp_y += vgetq_lane_u64(reg_64x2_gpp_y, 1);
pu1_input_buf = (UWORD8 *) ps_input_buf->as_component_bufs[1].pv_data;
i4_input_stride = ps_input_buf->as_component_bufs[1].i4_data_stride;
/***************************************************************/
/* For Chroma - */
/* This code block first deinterleaves the Cb and Cr values, */
/* calculates gpp value for both Cb and Cr separately by */
/* adding the absolute difference between the current pixel */
/* and it's immediate right pixel with the absolute */
/* difference between the current pixel and it's immediate */
/* bottom pixel and accumulating for every pixel in the frame. */
/***************************************************************/
for(i = 0; i < (u4_height >> 1) - 8; i += 8)
{
for(j = 0; j < u4_width - 8; j += 8)
{
reg_8x8_src_r0 = vld1_u8(pu1_input_buf + j);
reg_8x8_src_r1 = vld1_u8(pu1_input_buf + i4_input_stride + j);
reg_8x8_src_r2 = vld1_u8(pu1_input_buf + (i4_input_stride * 2) + j);
reg_8x8_src_r3 = vld1_u8(pu1_input_buf + (i4_input_stride * 3) + j);
reg_8x8_src_r4 = vld1_u8(pu1_input_buf + (i4_input_stride * 4) + j);
reg_8x8_src_r5 = vld1_u8(pu1_input_buf + (i4_input_stride * 5) + j);
reg_8x8_src_r6 = vld1_u8(pu1_input_buf + (i4_input_stride * 6) + j);
reg_8x8_src_r7 = vld1_u8(pu1_input_buf + (i4_input_stride * 7) + j);
reg_8x8_src_r8 = vld1_u8(pu1_input_buf + (i4_input_stride * 8) + j);
reg_8x8_src_right_r0 = vld1_u8(pu1_input_buf + j + 2);
reg_8x8_src_right_r1 = vld1_u8(pu1_input_buf + i4_input_stride + j + 2);
reg_8x8_src_right_r2 = vld1_u8(pu1_input_buf + (i4_input_stride * 2) + j + 2);
reg_8x8_src_right_r3 = vld1_u8(pu1_input_buf + (i4_input_stride * 3) + j + 2);
reg_8x8_src_right_r4 = vld1_u8(pu1_input_buf + (i4_input_stride * 4) + j + 2);
reg_8x8_src_right_r5 = vld1_u8(pu1_input_buf + (i4_input_stride * 5) + j + 2);
reg_8x8_src_right_r6 = vld1_u8(pu1_input_buf + (i4_input_stride * 6) + j + 2);
reg_8x8_src_right_r7 = vld1_u8(pu1_input_buf + (i4_input_stride * 7) + j + 2);
/* separating u and v */
reg_8x8_src_r0 = vtbl1_u8(reg_8x8_src_r0, reg_8x8_shuffle);
reg_8x8_src_r1 = vtbl1_u8(reg_8x8_src_r1, reg_8x8_shuffle);
reg_8x8_src_r2 = vtbl1_u8(reg_8x8_src_r2, reg_8x8_shuffle);
reg_8x8_src_r3 = vtbl1_u8(reg_8x8_src_r3, reg_8x8_shuffle);
reg_8x8_src_r4 = vtbl1_u8(reg_8x8_src_r4, reg_8x8_shuffle);
reg_8x8_src_r5 = vtbl1_u8(reg_8x8_src_r5, reg_8x8_shuffle);
reg_8x8_src_r6 = vtbl1_u8(reg_8x8_src_r6, reg_8x8_shuffle);
reg_8x8_src_r7 = vtbl1_u8(reg_8x8_src_r7, reg_8x8_shuffle);
reg_8x8_src_r8 = vtbl1_u8(reg_8x8_src_r8, reg_8x8_shuffle);
reg_8x8_src_right_r0 = vtbl1_u8(reg_8x8_src_right_r0, reg_8x8_shuffle);
reg_8x8_src_right_r1 = vtbl1_u8(reg_8x8_src_right_r1, reg_8x8_shuffle);
reg_8x8_src_right_r2 = vtbl1_u8(reg_8x8_src_right_r2, reg_8x8_shuffle);
reg_8x8_src_right_r3 = vtbl1_u8(reg_8x8_src_right_r3, reg_8x8_shuffle);
reg_8x8_src_right_r4 = vtbl1_u8(reg_8x8_src_right_r4, reg_8x8_shuffle);
reg_8x8_src_right_r5 = vtbl1_u8(reg_8x8_src_right_r5, reg_8x8_shuffle);
reg_8x8_src_right_r6 = vtbl1_u8(reg_8x8_src_right_r6, reg_8x8_shuffle);
reg_8x8_src_right_r7 = vtbl1_u8(reg_8x8_src_right_r7, reg_8x8_shuffle);
reg_16x8_abs_diff_uv = vabdl_u8(reg_8x8_src_r0, reg_8x8_src_r1);
reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r1, reg_8x8_src_r2);
reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r2, reg_8x8_src_r3);
reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r3, reg_8x8_src_r4);
reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r4, reg_8x8_src_r5);
reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r5, reg_8x8_src_r6);
reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r6, reg_8x8_src_r7);
reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r7, reg_8x8_src_r8);
reg_16x8_abs_diff_uv =
vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r0, reg_8x8_src_right_r0);
reg_16x8_abs_diff_uv =
vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r1, reg_8x8_src_right_r1);
reg_16x8_abs_diff_uv =
vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r2, reg_8x8_src_right_r2);
reg_16x8_abs_diff_uv =
vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r3, reg_8x8_src_right_r3);
reg_16x8_abs_diff_uv =
vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r4, reg_8x8_src_right_r4);
reg_16x8_abs_diff_uv =
vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r5, reg_8x8_src_right_r5);
reg_16x8_abs_diff_uv =
vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r6, reg_8x8_src_right_r6);
reg_16x8_abs_diff_uv =
vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r7, reg_8x8_src_right_r7);
reg_32x4_abs_diff_hadd_uv =
vpadalq_u16(reg_32x4_abs_diff_hadd_uv, reg_16x8_abs_diff_uv);
}
/************************************************************/
/* Remaining width - */
/* Since Last pixel is not getting processed, remaining 6 */
/* pixels are getting processed separately by performing */
/* and operations with reg_16x8_and_mask_uv */
/************************************************************/
ASSERT((u4_width - j) == 8);
reg_8x8_src_r0 = vld1_u8(pu1_input_buf + j);
reg_8x8_src_r1 = vld1_u8(pu1_input_buf + i4_input_stride + j);
reg_8x8_src_r2 = vld1_u8(pu1_input_buf + (i4_input_stride * 2) + j);
reg_8x8_src_r3 = vld1_u8(pu1_input_buf + (i4_input_stride * 3) + j);
reg_8x8_src_r4 = vld1_u8(pu1_input_buf + (i4_input_stride * 4) + j);
reg_8x8_src_r5 = vld1_u8(pu1_input_buf + (i4_input_stride * 5) + j);
reg_8x8_src_r6 = vld1_u8(pu1_input_buf + (i4_input_stride * 6) + j);
reg_8x8_src_r7 = vld1_u8(pu1_input_buf + (i4_input_stride * 7) + j);
reg_8x8_src_r8 = vld1_u8(pu1_input_buf + (i4_input_stride * 8) + j);
reg_8x8_src_right_r0 = vext_u8(reg_8x8_src_r0, reg_8x8_src_r0, 2);
reg_8x8_src_right_r1 = vext_u8(reg_8x8_src_r1, reg_8x8_src_r1, 2);
reg_8x8_src_right_r2 = vext_u8(reg_8x8_src_r2, reg_8x8_src_r2, 2);
reg_8x8_src_right_r3 = vext_u8(reg_8x8_src_r3, reg_8x8_src_r3, 2);
reg_8x8_src_right_r4 = vext_u8(reg_8x8_src_r4, reg_8x8_src_r4, 2);
reg_8x8_src_right_r5 = vext_u8(reg_8x8_src_r5, reg_8x8_src_r5, 2);
reg_8x8_src_right_r6 = vext_u8(reg_8x8_src_r6, reg_8x8_src_r6, 2);
reg_8x8_src_right_r7 = vext_u8(reg_8x8_src_r7, reg_8x8_src_r7, 2);
/* separating u and v */
reg_8x8_src_r0 = vtbl1_u8(reg_8x8_src_r0, reg_8x8_shuffle);
reg_8x8_src_r1 = vtbl1_u8(reg_8x8_src_r1, reg_8x8_shuffle);
reg_8x8_src_r2 = vtbl1_u8(reg_8x8_src_r2, reg_8x8_shuffle);
reg_8x8_src_r3 = vtbl1_u8(reg_8x8_src_r3, reg_8x8_shuffle);
reg_8x8_src_r4 = vtbl1_u8(reg_8x8_src_r4, reg_8x8_shuffle);
reg_8x8_src_r5 = vtbl1_u8(reg_8x8_src_r5, reg_8x8_shuffle);
reg_8x8_src_r6 = vtbl1_u8(reg_8x8_src_r6, reg_8x8_shuffle);
reg_8x8_src_r7 = vtbl1_u8(reg_8x8_src_r7, reg_8x8_shuffle);
reg_8x8_src_r8 = vtbl1_u8(reg_8x8_src_r8, reg_8x8_shuffle);
reg_8x8_src_right_r0 = vtbl1_u8(reg_8x8_src_right_r0, reg_8x8_shuffle);
reg_8x8_src_right_r1 = vtbl1_u8(reg_8x8_src_right_r1, reg_8x8_shuffle);
reg_8x8_src_right_r2 = vtbl1_u8(reg_8x8_src_right_r2, reg_8x8_shuffle);
reg_8x8_src_right_r3 = vtbl1_u8(reg_8x8_src_right_r3, reg_8x8_shuffle);
reg_8x8_src_right_r4 = vtbl1_u8(reg_8x8_src_right_r4, reg_8x8_shuffle);
reg_8x8_src_right_r5 = vtbl1_u8(reg_8x8_src_right_r5, reg_8x8_shuffle);
reg_8x8_src_right_r6 = vtbl1_u8(reg_8x8_src_right_r6, reg_8x8_shuffle);
reg_8x8_src_right_r7 = vtbl1_u8(reg_8x8_src_right_r7, reg_8x8_shuffle);
reg_16x8_abs_diff_uv = vabdl_u8(reg_8x8_src_r0, reg_8x8_src_r1);
reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r1, reg_8x8_src_r2);
reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r2, reg_8x8_src_r3);
reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r3, reg_8x8_src_r4);
reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r4, reg_8x8_src_r5);
reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r5, reg_8x8_src_r6);
reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r6, reg_8x8_src_r7);
reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r7, reg_8x8_src_r8);
reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r0, reg_8x8_src_right_r0);
reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r1, reg_8x8_src_right_r1);
reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r2, reg_8x8_src_right_r2);
reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r3, reg_8x8_src_right_r3);
reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r4, reg_8x8_src_right_r4);
reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r5, reg_8x8_src_right_r5);
reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r6, reg_8x8_src_right_r6);
reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r7, reg_8x8_src_right_r7);
reg_16x8_abs_diff_uv = vandq_u16(reg_16x8_abs_diff_uv, reg_16x8_and_mask_uv);
reg_32x4_abs_diff_hadd_uv = vpadalq_u16(reg_32x4_abs_diff_hadd_uv, reg_16x8_abs_diff_uv);
pu1_input_buf += (i4_input_stride * 8);
}
/* Loop for remaining height less than 8 */
/* 4 <= remaining_height < 8 */
for(k = i; k < (u4_height >> 1) - 4; k += 4, i += 4)
{
for(j = 0; j < u4_width - 8; j += 8)
{
reg_8x8_src_r0 = vld1_u8(pu1_input_buf + j);
reg_8x8_src_r1 = vld1_u8(pu1_input_buf + i4_input_stride + j);
reg_8x8_src_r2 = vld1_u8(pu1_input_buf + (i4_input_stride * 2) + j);
reg_8x8_src_r3 = vld1_u8(pu1_input_buf + (i4_input_stride * 3) + j);
reg_8x8_src_r4 = vld1_u8(pu1_input_buf + (i4_input_stride * 4) + j);
reg_8x8_src_right_r0 = vld1_u8(pu1_input_buf + j + 2);
reg_8x8_src_right_r1 = vld1_u8(pu1_input_buf + i4_input_stride + j + 2);
reg_8x8_src_right_r2 = vld1_u8(pu1_input_buf + (i4_input_stride * 2) + j + 2);
reg_8x8_src_right_r3 = vld1_u8(pu1_input_buf + (i4_input_stride * 3) + j + 2);
/* separating u and v */
reg_8x8_src_r0 = vtbl1_u8(reg_8x8_src_r0, reg_8x8_shuffle);
reg_8x8_src_r1 = vtbl1_u8(reg_8x8_src_r1, reg_8x8_shuffle);
reg_8x8_src_r2 = vtbl1_u8(reg_8x8_src_r2, reg_8x8_shuffle);
reg_8x8_src_r3 = vtbl1_u8(reg_8x8_src_r3, reg_8x8_shuffle);
reg_8x8_src_r4 = vtbl1_u8(reg_8x8_src_r4, reg_8x8_shuffle);
reg_8x8_src_right_r0 = vtbl1_u8(reg_8x8_src_right_r0, reg_8x8_shuffle);
reg_8x8_src_right_r1 = vtbl1_u8(reg_8x8_src_right_r1, reg_8x8_shuffle);
reg_8x8_src_right_r2 = vtbl1_u8(reg_8x8_src_right_r2, reg_8x8_shuffle);
reg_8x8_src_right_r3 = vtbl1_u8(reg_8x8_src_right_r3, reg_8x8_shuffle);
reg_16x8_abs_diff_uv = vabdl_u8(reg_8x8_src_r0, reg_8x8_src_r1);
reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r1, reg_8x8_src_r2);
reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r2, reg_8x8_src_r3);
reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r3, reg_8x8_src_r4);
reg_16x8_abs_diff_uv =
vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r0, reg_8x8_src_right_r0);
reg_16x8_abs_diff_uv =
vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r1, reg_8x8_src_right_r1);
reg_16x8_abs_diff_uv =
vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r2, reg_8x8_src_right_r2);
reg_16x8_abs_diff_uv =
vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r3, reg_8x8_src_right_r3);
reg_32x4_abs_diff_hadd_uv =
vpadalq_u16(reg_32x4_abs_diff_hadd_uv, reg_16x8_abs_diff_uv);
}
/************************************************************/
/* Remaining width - */
/* Since Last pixel is not getting processed, remaining 6 */
/* pixels are getting processed separately by performing */
/* and operations with reg_16x8_and_mask_uv */
/************************************************************/
reg_8x8_src_r0 = vld1_u8(pu1_input_buf + j);
reg_8x8_src_r1 = vld1_u8(pu1_input_buf + i4_input_stride + j);
reg_8x8_src_r2 = vld1_u8(pu1_input_buf + (i4_input_stride * 2) + j);
reg_8x8_src_r3 = vld1_u8(pu1_input_buf + (i4_input_stride * 3) + j);
reg_8x8_src_r4 = vld1_u8(pu1_input_buf + (i4_input_stride * 4) + j);
reg_8x8_src_right_r0 = vext_u8(reg_8x8_src_r0, reg_8x8_src_r0, 2);
reg_8x8_src_right_r1 = vext_u8(reg_8x8_src_r1, reg_8x8_src_r1, 2);
reg_8x8_src_right_r2 = vext_u8(reg_8x8_src_r2, reg_8x8_src_r2, 2);
reg_8x8_src_right_r3 = vext_u8(reg_8x8_src_r3, reg_8x8_src_r3, 2);
/* separating u and v */
reg_8x8_src_r0 = vtbl1_u8(reg_8x8_src_r0, reg_8x8_shuffle);
reg_8x8_src_r1 = vtbl1_u8(reg_8x8_src_r1, reg_8x8_shuffle);
reg_8x8_src_r2 = vtbl1_u8(reg_8x8_src_r2, reg_8x8_shuffle);
reg_8x8_src_r3 = vtbl1_u8(reg_8x8_src_r3, reg_8x8_shuffle);
reg_8x8_src_r4 = vtbl1_u8(reg_8x8_src_r4, reg_8x8_shuffle);
reg_8x8_src_right_r0 = vtbl1_u8(reg_8x8_src_right_r0, reg_8x8_shuffle);
reg_8x8_src_right_r1 = vtbl1_u8(reg_8x8_src_right_r1, reg_8x8_shuffle);
reg_8x8_src_right_r2 = vtbl1_u8(reg_8x8_src_right_r2, reg_8x8_shuffle);
reg_8x8_src_right_r3 = vtbl1_u8(reg_8x8_src_right_r3, reg_8x8_shuffle);
reg_16x8_abs_diff_uv = vabdl_u8(reg_8x8_src_r0, reg_8x8_src_r1);
reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r1, reg_8x8_src_r2);
reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r2, reg_8x8_src_r3);
reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r3, reg_8x8_src_r4);
reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r0, reg_8x8_src_right_r0);
reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r1, reg_8x8_src_right_r1);
reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r2, reg_8x8_src_right_r2);
reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r3, reg_8x8_src_right_r3);
reg_16x8_abs_diff_uv = vandq_u16(reg_16x8_abs_diff_uv, reg_16x8_and_mask_uv);
reg_32x4_abs_diff_hadd_uv = vpadalq_u16(reg_32x4_abs_diff_hadd_uv, reg_16x8_abs_diff_uv);
pu1_input_buf += (i4_input_stride * 4);
}
/* Loop for remaining height less than 4 */
/* 0 <= remaining_height < 4 */
for(k = i; k < (u4_height >> 1) - 1; k++)
{
for(j = 0; j < u4_width - 8; j += 8)
{
reg_8x8_src_r0 = vld1_u8(pu1_input_buf + j);
reg_8x8_src_r1 = vld1_u8(pu1_input_buf + i4_input_stride + j);
reg_8x8_src_right_r0 = vld1_u8(pu1_input_buf + j + 2);
/* separating u and v */
reg_8x8_src_r0 = vtbl1_u8(reg_8x8_src_r0, reg_8x8_shuffle);
reg_8x8_src_r1 = vtbl1_u8(reg_8x8_src_r1, reg_8x8_shuffle);
reg_8x8_src_right_r0 = vtbl1_u8(reg_8x8_src_right_r0, reg_8x8_shuffle);
reg_16x8_abs_diff_uv = vabdl_u8(reg_8x8_src_r0, reg_8x8_src_r1);
reg_16x8_abs_diff_uv =
vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r0, reg_8x8_src_right_r0);
reg_32x4_abs_diff_hadd_uv =
vpadalq_u16(reg_32x4_abs_diff_hadd_uv, reg_16x8_abs_diff_uv);
}
/************************************************************/
/* Remaining width - */
/* Since Last pixel is not getting processed, remaining 6 */
/* pixels are getting processed separately by performing */
/* and operations with reg_16x8_and_mask_uv */
/************************************************************/
reg_8x8_src_r0 = vld1_u8(pu1_input_buf + j);
reg_8x8_src_r1 = vld1_u8(pu1_input_buf + i4_input_stride + j);
reg_8x8_src_right_r0 = vext_u8(reg_8x8_src_r0, reg_8x8_src_r0, 2);
/* separating u and v */
reg_8x8_src_r0 = vtbl1_u8(reg_8x8_src_r0, reg_8x8_shuffle);
reg_8x8_src_r1 = vtbl1_u8(reg_8x8_src_r1, reg_8x8_shuffle);
reg_8x8_src_right_r0 = vtbl1_u8(reg_8x8_src_right_r0, reg_8x8_shuffle);
reg_16x8_abs_diff_uv = vabdl_u8(reg_8x8_src_r0, reg_8x8_src_r1);
reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r0, reg_8x8_src_right_r0);
reg_16x8_abs_diff_uv = vandq_u16(reg_16x8_abs_diff_uv, reg_16x8_and_mask_uv);
reg_32x4_abs_diff_hadd_uv = vpadalq_u16(reg_32x4_abs_diff_hadd_uv, reg_16x8_abs_diff_uv);
pu1_input_buf += i4_input_stride;
}
/* Pairwise add u4_abd_hadd_uv to get final gpp_u and gpp_v value */
reg_64x2_gpp_uv = vpaddlq_u32(reg_32x4_abs_diff_hadd_uv);
d_gpp_u = vgetq_lane_u64(reg_64x2_gpp_uv, 0);
d_gpp_v = vgetq_lane_u64(reg_64x2_gpp_uv, 1);
d_gpp_y /= (u4_width * u4_height);
d_gpp_u /= ((u4_width / 2) * (u4_height / 2));
d_gpp_v /= ((u4_width / 2) * (u4_height / 2));
d_gpp = (DOUBLE) ((WT_LUMA_GPP * d_gpp_y) + d_gpp_u + d_gpp_v) / WT_TOTAL_GPP;
return d_gpp;
}

View file

@ -0,0 +1,666 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
*******************************************************************************
*
* @file
* isvce_svc_residual_pred_neon.c
*
* @brief
* Contains functions
* used for SVC residual
* prediction
*
*******************************************************************************
*/
#include <arm_neon.h>
#include "ih264_typedefs.h"
#include "ih264_macros.h"
#include "ih264_size_defs.h"
#include "isvc_macros.h"
#include "isvc_structs.h"
void isvce_luma_residual_sampler_2x_neon(coordinates_t *ps_ref_array_positions,
coordinates_t *ps_ref_array_phases,
buffer_container_t *ps_inp, buffer_container_t *ps_out,
buffer_container_t *ps_scratch, UWORD32 u4_ref_nnz,
UWORD8 u1_ref_tx_size)
{
WORD16 *pi2_inp_data = (WORD16 *) ps_inp->pv_data;
WORD16 *pi2_out_res = (WORD16 *) ps_out->pv_data;
WORD32 i4_inp_data_stride = ps_inp->i4_data_stride;
WORD32 i4_out_res_stride = ps_out->i4_data_stride;
WORD16 *pi2_refarray_buffer = (WORD16 *) ps_scratch->pv_data;
WORD32 i4_blk_ctr;
UNUSED(ps_ref_array_positions);
UNUSED(ps_ref_array_phases);
/* For 2x scaling, offsets always point to TL pixel outside MB */
/* Hence, refTransBlkIdc will be different and since phase */
/* for first refArray pos for horiz filtering samples > 8, */
/* first row and first column from the refArray is never used */
pi2_inp_data += 1 + i4_inp_data_stride;
if((u1_ref_tx_size) && (0 != u4_ref_nnz))
{
WORD16 *pi2_ref_data_byte;
WORD32 *pi4_ref_array;
WORD32 i4_i, i4_j;
/* ----------- Horizontal Interpolation ---------------- */
int16x8_t i2_coeff_add_16x8_r0;
int16x8_t i2_coeff_16x8_r0_0, i2_coeff_16x8_r0_1;
int16x8_t i2_coeff_16x8_sl_r0_0, i2_coeff_16x8_sl_r0_1;
int16x8_t result_16x8_r0_0, result_16x8_r0_1;
int16x8_t i2_coeff_add_16x8_r1;
int16x8_t i2_coeff_16x8_r1_0, i2_coeff_16x8_r1_1;
int16x8_t i2_coeff_16x8_sl_r1_0, i2_coeff_16x8_sl_r1_1;
int16x8_t result_16x8_r1_0, result_16x8_r1_1;
int16x8x2_t final_result_16x8x2_r0, final_result_16x8x2_r1;
pi2_ref_data_byte = pi2_inp_data;
/* ----------- Horizontal Interpolation ---------------- */
pi4_ref_array = (WORD32 *) pi2_refarray_buffer;
for(i4_i = 0; i4_i < BLK8x8SIZE; i4_i += 2)
{
i2_coeff_16x8_r0_0 = vld1q_s16(pi2_ref_data_byte);
i2_coeff_16x8_r0_1 = vld1q_s16((pi2_ref_data_byte + 1));
i2_coeff_16x8_r1_0 = vld1q_s16(pi2_ref_data_byte + i4_inp_data_stride);
i2_coeff_16x8_r1_1 = vld1q_s16((pi2_ref_data_byte + i4_inp_data_stride + 1));
i2_coeff_add_16x8_r0 = vaddq_s16(i2_coeff_16x8_r0_0, i2_coeff_16x8_r0_1);
i2_coeff_16x8_sl_r0_0 = vshlq_n_s16(i2_coeff_16x8_r0_0, 1);
i2_coeff_16x8_sl_r0_1 = vshlq_n_s16(i2_coeff_16x8_r0_1, 1);
i2_coeff_add_16x8_r1 = vaddq_s16(i2_coeff_16x8_r1_0, i2_coeff_16x8_r1_1);
i2_coeff_16x8_sl_r1_0 = vshlq_n_s16(i2_coeff_16x8_r1_0, 1);
i2_coeff_16x8_sl_r1_1 = vshlq_n_s16(i2_coeff_16x8_r1_1, 1);
result_16x8_r0_0 = vaddq_s16(i2_coeff_16x8_sl_r0_0, i2_coeff_add_16x8_r0);
result_16x8_r0_1 = vaddq_s16(i2_coeff_16x8_sl_r0_1, i2_coeff_add_16x8_r0);
result_16x8_r1_0 = vaddq_s16(i2_coeff_16x8_sl_r1_0, i2_coeff_add_16x8_r1);
result_16x8_r1_1 = vaddq_s16(i2_coeff_16x8_sl_r1_1, i2_coeff_add_16x8_r1);
final_result_16x8x2_r0 = vzipq_s16(result_16x8_r0_0, result_16x8_r0_1);
final_result_16x8x2_r1 = vzipq_s16(result_16x8_r1_0, result_16x8_r1_1);
vst1q_s32(pi4_ref_array + 1, vmovl_s16(vget_low_s16(final_result_16x8x2_r0.val[0])));
vst1q_s32(pi4_ref_array + 5, vmovl_s16(vget_high_s16(final_result_16x8x2_r0.val[0])));
vst1q_s32(pi4_ref_array + 9, vmovl_s16(vget_low_s16(final_result_16x8x2_r0.val[1])));
vst1q_s32(pi4_ref_array + 13, vmovl_s16(vget_high_s16(final_result_16x8x2_r0.val[1])));
pi4_ref_array[0] = pi2_ref_data_byte[0] << 2;
pi4_ref_array[15] = pi2_ref_data_byte[7] << 2;
pi4_ref_array += 16;
pi2_ref_data_byte += i4_inp_data_stride;
vst1q_s32(pi4_ref_array + 1, vmovl_s16(vget_low_s16(final_result_16x8x2_r1.val[0])));
vst1q_s32(pi4_ref_array + 5, vmovl_s16(vget_high_s16(final_result_16x8x2_r1.val[0])));
vst1q_s32(pi4_ref_array + 9, vmovl_s16(vget_low_s16(final_result_16x8x2_r1.val[1])));
vst1q_s32(pi4_ref_array + 13, vmovl_s16(vget_high_s16(final_result_16x8x2_r1.val[1])));
pi4_ref_array[0] = pi2_ref_data_byte[0] << 2;
pi4_ref_array[15] = pi2_ref_data_byte[7] << 2;
pi4_ref_array += 16;
/* vertical loop updates */
pi2_ref_data_byte = pi2_inp_data + ((i4_i + 2) * i4_inp_data_stride);
}
/* ----------- Vertical Interpolation ---------------- */
pi4_ref_array = (WORD32 *) pi2_refarray_buffer;
{
WORD32 *pi4_ref_array_temp;
WORD16 *pi2_out;
int32x4_t i4_horz_samp_32x4_r1_1, i4_horz_samp_32x4_r1_2, i4_horz_samp_32x4_r1_3,
i4_horz_samp_32x4_r1_4;
int32x4_t i4_horz_samp_32x4_r2_1, i4_horz_samp_32x4_r2_2, i4_horz_samp_32x4_r2_3,
i4_horz_samp_32x4_r2_4;
int32x4_t i4_horz_res_32x4_r1_1, i4_horz_res_32x4_r1_2, i4_horz_res_32x4_r1_3,
i4_horz_res_32x4_r1_4;
int32x4_t i4_horz_res_32x4_r2_1, i4_horz_res_32x4_r2_2, i4_horz_res_32x4_r2_3,
i4_horz_res_32x4_r2_4;
int32x4_t i4_horz_res_32x4_r3_1, i4_horz_res_32x4_r3_2, i4_horz_res_32x4_r3_3,
i4_horz_res_32x4_r3_4;
int32x4_t horz_add_32x4_r2_1, horz_add_32x4_r2_2, horz_add_32x4_r2_3,
horz_add_32x4_r2_4;
int16x8_t comb_horz_16x8_1, comb_horz_16x8_2, comb_horz_16x8_3, comb_horz_16x8_4;
pi4_ref_array_temp = pi4_ref_array;
pi2_out = pi2_out_res;
i4_horz_samp_32x4_r1_1 = vld1q_s32(pi4_ref_array_temp);
i4_horz_samp_32x4_r1_2 = vld1q_s32(pi4_ref_array_temp + 4);
i4_horz_samp_32x4_r1_3 = vld1q_s32(pi4_ref_array_temp + 8);
i4_horz_samp_32x4_r1_4 = vld1q_s32(pi4_ref_array_temp + 12);
/* populate the first inter sample */
i4_horz_res_32x4_r1_1 = vrshrq_n_s32(i4_horz_samp_32x4_r1_1, 2);
i4_horz_res_32x4_r1_2 = vrshrq_n_s32(i4_horz_samp_32x4_r1_2, 2);
i4_horz_res_32x4_r1_3 = vrshrq_n_s32(i4_horz_samp_32x4_r1_3, 2);
i4_horz_res_32x4_r1_4 = vrshrq_n_s32(i4_horz_samp_32x4_r1_4, 2);
comb_horz_16x8_1 =
vcombine_s16(vmovn_s32(i4_horz_res_32x4_r1_1), vmovn_s32(i4_horz_res_32x4_r1_2));
comb_horz_16x8_2 =
vcombine_s16(vmovn_s32(i4_horz_res_32x4_r1_3), vmovn_s32(i4_horz_res_32x4_r1_4));
vst1q_s16(pi2_out, comb_horz_16x8_1);
vst1q_s16(pi2_out + 8, comb_horz_16x8_2);
pi2_out += i4_out_res_stride;
for(i4_j = 0; i4_j < 14; i4_j += 2)
{
pi4_ref_array_temp += MB_SIZE;
i4_horz_samp_32x4_r2_1 = vld1q_s32(pi4_ref_array_temp);
i4_horz_samp_32x4_r2_2 = vld1q_s32(pi4_ref_array_temp + 4);
i4_horz_samp_32x4_r2_3 = vld1q_s32(pi4_ref_array_temp + 8);
i4_horz_samp_32x4_r2_4 = vld1q_s32(pi4_ref_array_temp + 12);
horz_add_32x4_r2_1 = vaddq_s32(i4_horz_samp_32x4_r1_1, i4_horz_samp_32x4_r2_1);
horz_add_32x4_r2_2 = vaddq_s32(i4_horz_samp_32x4_r1_2, i4_horz_samp_32x4_r2_2);
horz_add_32x4_r2_3 = vaddq_s32(i4_horz_samp_32x4_r1_3, i4_horz_samp_32x4_r2_3);
horz_add_32x4_r2_4 = vaddq_s32(i4_horz_samp_32x4_r1_4, i4_horz_samp_32x4_r2_4);
i4_horz_res_32x4_r2_1 =
vaddq_s32(vshlq_n_s32(i4_horz_samp_32x4_r1_1, 1), horz_add_32x4_r2_1);
i4_horz_res_32x4_r2_2 =
vaddq_s32(vshlq_n_s32(i4_horz_samp_32x4_r1_2, 1), horz_add_32x4_r2_2);
i4_horz_res_32x4_r2_3 =
vaddq_s32(vshlq_n_s32(i4_horz_samp_32x4_r1_3, 1), horz_add_32x4_r2_3);
i4_horz_res_32x4_r2_4 =
vaddq_s32(vshlq_n_s32(i4_horz_samp_32x4_r1_4, 1), horz_add_32x4_r2_4);
i4_horz_res_32x4_r3_1 =
vaddq_s32(vshlq_n_s32(i4_horz_samp_32x4_r2_1, 1), horz_add_32x4_r2_1);
i4_horz_res_32x4_r3_2 =
vaddq_s32(vshlq_n_s32(i4_horz_samp_32x4_r2_2, 1), horz_add_32x4_r2_2);
i4_horz_res_32x4_r3_3 =
vaddq_s32(vshlq_n_s32(i4_horz_samp_32x4_r2_3, 1), horz_add_32x4_r2_3);
i4_horz_res_32x4_r3_4 =
vaddq_s32(vshlq_n_s32(i4_horz_samp_32x4_r2_4, 1), horz_add_32x4_r2_4);
i4_horz_res_32x4_r2_1 = vrshrq_n_s32(i4_horz_res_32x4_r2_1, 4);
i4_horz_res_32x4_r2_2 = vrshrq_n_s32(i4_horz_res_32x4_r2_2, 4);
i4_horz_res_32x4_r2_3 = vrshrq_n_s32(i4_horz_res_32x4_r2_3, 4);
i4_horz_res_32x4_r2_4 = vrshrq_n_s32(i4_horz_res_32x4_r2_4, 4);
i4_horz_res_32x4_r3_1 = vrshrq_n_s32(i4_horz_res_32x4_r3_1, 4);
i4_horz_res_32x4_r3_2 = vrshrq_n_s32(i4_horz_res_32x4_r3_2, 4);
i4_horz_res_32x4_r3_3 = vrshrq_n_s32(i4_horz_res_32x4_r3_3, 4);
i4_horz_res_32x4_r3_4 = vrshrq_n_s32(i4_horz_res_32x4_r3_4, 4);
comb_horz_16x8_1 = vcombine_s16(vmovn_s32(i4_horz_res_32x4_r2_1),
vmovn_s32(i4_horz_res_32x4_r2_2));
comb_horz_16x8_2 = vcombine_s16(vmovn_s32(i4_horz_res_32x4_r2_3),
vmovn_s32(i4_horz_res_32x4_r2_4));
comb_horz_16x8_3 = vcombine_s16(vmovn_s32(i4_horz_res_32x4_r3_1),
vmovn_s32(i4_horz_res_32x4_r3_2));
comb_horz_16x8_4 = vcombine_s16(vmovn_s32(i4_horz_res_32x4_r3_3),
vmovn_s32(i4_horz_res_32x4_r3_4));
/* populate 2 samples based on current coeffs */
vst1q_s16(pi2_out, comb_horz_16x8_1);
vst1q_s16(pi2_out + 8, comb_horz_16x8_2);
pi2_out += i4_out_res_stride;
vst1q_s16(pi2_out, comb_horz_16x8_3);
vst1q_s16(pi2_out + 8, comb_horz_16x8_4);
pi2_out += i4_out_res_stride;
/* store the coeff 2 to coeff 1 */
/* (used in next iteration) */
i4_horz_samp_32x4_r1_1 = i4_horz_samp_32x4_r2_1;
i4_horz_samp_32x4_r1_2 = i4_horz_samp_32x4_r2_2;
i4_horz_samp_32x4_r1_3 = i4_horz_samp_32x4_r2_3;
i4_horz_samp_32x4_r1_4 = i4_horz_samp_32x4_r2_4;
}
/* populate the first inter sample */
i4_horz_res_32x4_r1_1 = vrshrq_n_s32(i4_horz_samp_32x4_r1_1, 2);
i4_horz_res_32x4_r1_2 = vrshrq_n_s32(i4_horz_samp_32x4_r1_2, 2);
i4_horz_res_32x4_r1_3 = vrshrq_n_s32(i4_horz_samp_32x4_r1_3, 2);
i4_horz_res_32x4_r1_4 = vrshrq_n_s32(i4_horz_samp_32x4_r1_4, 2);
comb_horz_16x8_1 =
vcombine_s16(vmovn_s32(i4_horz_res_32x4_r1_1), vmovn_s32(i4_horz_res_32x4_r1_2));
comb_horz_16x8_2 =
vcombine_s16(vmovn_s32(i4_horz_res_32x4_r1_3), vmovn_s32(i4_horz_res_32x4_r1_4));
vst1q_s16(pi2_out, comb_horz_16x8_1);
vst1q_s16(pi2_out + 8, comb_horz_16x8_2);
/* horizontal loop updates */
pi4_ref_array++;
pi2_out_res++;
}
}
else
{
/* ----------------------------------------------------------------- */
/* LOOP over number of blocks */
/* ----------------------------------------------------------------- */
for(i4_blk_ctr = 0; i4_blk_ctr < 4; i4_blk_ctr++)
{
/* if reference layer is not coded then no processing */
if(0 != (u4_ref_nnz & 0x1))
{
int16x8_t i2_coeff1_16x8_r0_0, i2_coeff1_16x8_r0_1;
int16x8_t i2_coeff1_16x8_r1_0, i2_coeff1_16x8_r1_1;
int16x8_t i2_coeff1_16x8_r2_0, i2_coeff1_16x8_r2_1;
int16x8_t i2_coeff1_16x8_r3_0, i2_coeff1_16x8_r3_1;
int16x8_t i2_add_16x8_r0_0;
int16x8_t i2_add_16x8_r1_0;
int16x8_t i2_add_16x8_r2_0;
int16x8_t i2_add_16x8_r3_0;
int16x8_t i2_res_16x8_r0_0, i2_res_16x8_r0_1;
int16x8_t i2_res_16x8_r1_0, i2_res_16x8_r1_1;
int16x8_t i2_res_16x8_r2_0, i2_res_16x8_r2_1;
int16x8_t i2_res_16x8_r3_0, i2_res_16x8_r3_1;
int16x4_t i4_horz_samp_16x4_r0_1, i4_horz_samp_16x4_r0_2;
int16x4_t i4_horz_samp_16x4_r1_1, i4_horz_samp_16x4_r1_2;
int16x4_t i4_horz_samp_16x4_r2_1, i4_horz_samp_16x4_r2_2;
int16x4_t i4_horz_samp_16x4_r3_1, i4_horz_samp_16x4_r3_2;
int32x4_t i4_horz_samp_32x4_r0_1, i4_horz_samp_32x4_r0_2;
int32x4_t i4_horz_samp_32x4_r1_1, i4_horz_samp_32x4_r1_2;
int32x4_t i4_horz_samp_32x4_r2_1, i4_horz_samp_32x4_r2_2;
int32x4_t i4_horz_samp_32x4_r3_1, i4_horz_samp_32x4_r3_2;
int32x4_t i4_horz_add_32x4_r1_1, i4_horz_add_32x4_r1_2;
int32x4_t i4_horz_add_32x4_r2_1, i4_horz_add_32x4_r2_2;
int32x4_t i4_horz_add_32x4_r3_1, i4_horz_add_32x4_r3_2;
int16x4_t i4_horz_res_16x4_r0_1, i4_horz_res_16x4_r0_2;
int16x4_t i4_horz_res_16x4_r1_1, i4_horz_res_16x4_r1_2;
int16x4_t i4_horz_res_16x4_r2_1, i4_horz_res_16x4_r2_2;
int16x4_t i4_horz_res_16x4_r3_1, i4_horz_res_16x4_r3_2;
int16x4_t i4_horz_res_16x4_r4_1, i4_horz_res_16x4_r4_2;
int16x4_t i4_horz_res_16x4_r5_1, i4_horz_res_16x4_r5_2;
int16x4_t i4_horz_res_16x4_r6_1, i4_horz_res_16x4_r6_2;
int16x4_t i4_horz_res_16x4_r7_1, i4_horz_res_16x4_r7_2;
int32x4_t i4_horz_res_32x4_r1_1, i4_horz_res_32x4_r1_2;
int32x4_t i4_horz_res_32x4_r2_1, i4_horz_res_32x4_r2_2;
int32x4_t i4_horz_res_32x4_r3_1, i4_horz_res_32x4_r3_2;
int32x4_t i4_horz_res_32x4_r4_1, i4_horz_res_32x4_r4_2;
int32x4_t i4_horz_res_32x4_r5_1, i4_horz_res_32x4_r5_2;
int32x4_t i4_horz_res_32x4_r6_1, i4_horz_res_32x4_r6_2;
int16x8x2_t ti2_res_16x8x2_r0, ti2_res_16x8x2_r1;
int16x8x2_t ti2_res_16x8x2_r2, ti2_res_16x8x2_r3;
i2_coeff1_16x8_r0_0 = vld1q_s16(pi2_inp_data);
i2_coeff1_16x8_r1_0 = vld1q_s16(pi2_inp_data + i4_inp_data_stride);
i2_coeff1_16x8_r2_0 = vld1q_s16(pi2_inp_data + (i4_inp_data_stride << 1));
i2_coeff1_16x8_r3_0 =
vld1q_s16(pi2_inp_data + (i4_inp_data_stride << 1) + i4_inp_data_stride);
i2_coeff1_16x8_r0_1 = vextq_s16(i2_coeff1_16x8_r0_0, i2_coeff1_16x8_r0_0, 1);
i2_coeff1_16x8_r1_1 = vextq_s16(i2_coeff1_16x8_r1_0, i2_coeff1_16x8_r1_0, 1);
i2_coeff1_16x8_r2_1 = vextq_s16(i2_coeff1_16x8_r2_0, i2_coeff1_16x8_r2_0, 1);
i2_coeff1_16x8_r3_1 = vextq_s16(i2_coeff1_16x8_r3_0, i2_coeff1_16x8_r3_0, 1);
i2_add_16x8_r0_0 = vaddq_s16(i2_coeff1_16x8_r0_1, i2_coeff1_16x8_r0_0);
i2_add_16x8_r1_0 = vaddq_s16(i2_coeff1_16x8_r1_1, i2_coeff1_16x8_r1_0);
i2_add_16x8_r2_0 = vaddq_s16(i2_coeff1_16x8_r2_1, i2_coeff1_16x8_r2_0);
i2_add_16x8_r3_0 = vaddq_s16(i2_coeff1_16x8_r3_1, i2_coeff1_16x8_r3_0);
i2_coeff1_16x8_r0_0 = vshlq_n_s16(i2_coeff1_16x8_r0_0, 1);
i2_coeff1_16x8_r1_0 = vshlq_n_s16(i2_coeff1_16x8_r1_0, 1);
i2_coeff1_16x8_r2_0 = vshlq_n_s16(i2_coeff1_16x8_r2_0, 1);
i2_coeff1_16x8_r3_0 = vshlq_n_s16(i2_coeff1_16x8_r3_0, 1);
i2_coeff1_16x8_r0_1 = vshlq_n_s16(i2_coeff1_16x8_r0_1, 1);
i2_coeff1_16x8_r1_1 = vshlq_n_s16(i2_coeff1_16x8_r1_1, 1);
i2_coeff1_16x8_r2_1 = vshlq_n_s16(i2_coeff1_16x8_r2_1, 1);
i2_coeff1_16x8_r3_1 = vshlq_n_s16(i2_coeff1_16x8_r3_1, 1);
i2_res_16x8_r0_0 = vaddq_s16(i2_coeff1_16x8_r0_0, i2_add_16x8_r0_0);
i2_res_16x8_r1_0 = vaddq_s16(i2_coeff1_16x8_r1_0, i2_add_16x8_r1_0);
i2_res_16x8_r2_0 = vaddq_s16(i2_coeff1_16x8_r2_0, i2_add_16x8_r2_0);
i2_res_16x8_r3_0 = vaddq_s16(i2_coeff1_16x8_r3_0, i2_add_16x8_r3_0);
i2_res_16x8_r0_1 = vaddq_s16(i2_coeff1_16x8_r0_1, i2_add_16x8_r0_0);
i2_res_16x8_r1_1 = vaddq_s16(i2_coeff1_16x8_r1_1, i2_add_16x8_r1_0);
i2_res_16x8_r2_1 = vaddq_s16(i2_coeff1_16x8_r2_1, i2_add_16x8_r2_0);
i2_res_16x8_r3_1 = vaddq_s16(i2_coeff1_16x8_r3_1, i2_add_16x8_r3_0);
ti2_res_16x8x2_r0 = vzipq_s16(i2_res_16x8_r0_0, i2_res_16x8_r0_1);
ti2_res_16x8x2_r1 = vzipq_s16(i2_res_16x8_r1_0, i2_res_16x8_r1_1);
ti2_res_16x8x2_r2 = vzipq_s16(i2_res_16x8_r2_0, i2_res_16x8_r2_1);
ti2_res_16x8x2_r3 = vzipq_s16(i2_res_16x8_r3_0, i2_res_16x8_r3_1);
i2_coeff1_16x8_r0_0 = vshlq_n_s16(i2_coeff1_16x8_r0_0, 1);
i2_coeff1_16x8_r1_0 = vshlq_n_s16(i2_coeff1_16x8_r1_0, 1);
i2_coeff1_16x8_r2_0 = vshlq_n_s16(i2_coeff1_16x8_r2_0, 1);
i2_coeff1_16x8_r3_0 = vshlq_n_s16(i2_coeff1_16x8_r3_0, 1);
vst1q_s16(pi2_refarray_buffer + 1, ti2_res_16x8x2_r0.val[0]);
vst1q_lane_s16(pi2_refarray_buffer, i2_coeff1_16x8_r0_0, 0);
vst1q_lane_s16(pi2_refarray_buffer + 7, i2_coeff1_16x8_r0_0, 3);
vst1q_s16(pi2_refarray_buffer + 9, ti2_res_16x8x2_r1.val[0]);
vst1q_lane_s16(pi2_refarray_buffer + 8, i2_coeff1_16x8_r1_0, 0);
vst1q_lane_s16(pi2_refarray_buffer + 15, i2_coeff1_16x8_r1_0, 3);
vst1q_s16(pi2_refarray_buffer + 17, ti2_res_16x8x2_r2.val[0]);
vst1q_lane_s16(pi2_refarray_buffer + 16, i2_coeff1_16x8_r2_0, 0);
vst1q_lane_s16(pi2_refarray_buffer + 23, i2_coeff1_16x8_r2_0, 3);
vst1q_s16(pi2_refarray_buffer + 25, ti2_res_16x8x2_r3.val[0]);
vst1q_lane_s16(pi2_refarray_buffer + 24, i2_coeff1_16x8_r3_0, 0);
vst1q_lane_s16(pi2_refarray_buffer + 31, i2_coeff1_16x8_r3_0, 3);
i4_horz_samp_16x4_r0_1 = vld1_s16(pi2_refarray_buffer);
i4_horz_samp_16x4_r0_2 = vld1_s16(pi2_refarray_buffer + 4);
i4_horz_samp_16x4_r1_1 = vld1_s16(pi2_refarray_buffer + 8);
i4_horz_samp_16x4_r1_2 = vld1_s16(pi2_refarray_buffer + 12);
i4_horz_samp_16x4_r2_1 = vld1_s16(pi2_refarray_buffer + 16);
i4_horz_samp_16x4_r2_2 = vld1_s16(pi2_refarray_buffer + 20);
i4_horz_samp_16x4_r3_1 = vld1_s16(pi2_refarray_buffer + 24);
i4_horz_samp_16x4_r3_2 = vld1_s16(pi2_refarray_buffer + 28);
i4_horz_res_16x4_r0_1 = vrshr_n_s16(i4_horz_samp_16x4_r0_1, 2);
i4_horz_res_16x4_r0_2 = vrshr_n_s16(i4_horz_samp_16x4_r0_2, 2);
i4_horz_add_32x4_r1_1 = vaddl_s16(i4_horz_samp_16x4_r0_1, i4_horz_samp_16x4_r1_1);
i4_horz_add_32x4_r1_2 = vaddl_s16(i4_horz_samp_16x4_r0_2, i4_horz_samp_16x4_r1_2);
i4_horz_add_32x4_r2_1 = vaddl_s16(i4_horz_samp_16x4_r1_1, i4_horz_samp_16x4_r2_1);
i4_horz_add_32x4_r2_2 = vaddl_s16(i4_horz_samp_16x4_r1_2, i4_horz_samp_16x4_r2_2);
i4_horz_add_32x4_r3_1 = vaddl_s16(i4_horz_samp_16x4_r2_1, i4_horz_samp_16x4_r3_1);
i4_horz_add_32x4_r3_2 = vaddl_s16(i4_horz_samp_16x4_r2_2, i4_horz_samp_16x4_r3_2);
i4_horz_samp_32x4_r0_1 = vshll_n_s16(i4_horz_samp_16x4_r0_1, 1);
i4_horz_samp_32x4_r0_2 = vshll_n_s16(i4_horz_samp_16x4_r0_2, 1);
i4_horz_samp_32x4_r1_1 = vshll_n_s16(i4_horz_samp_16x4_r1_1, 1);
i4_horz_samp_32x4_r1_2 = vshll_n_s16(i4_horz_samp_16x4_r1_2, 1);
i4_horz_samp_32x4_r2_1 = vshll_n_s16(i4_horz_samp_16x4_r2_1, 1);
i4_horz_samp_32x4_r2_2 = vshll_n_s16(i4_horz_samp_16x4_r2_2, 1);
i4_horz_samp_32x4_r3_1 = vshll_n_s16(i4_horz_samp_16x4_r3_1, 1);
i4_horz_samp_32x4_r3_2 = vshll_n_s16(i4_horz_samp_16x4_r3_2, 1);
i4_horz_res_32x4_r1_1 = vaddq_s32(i4_horz_samp_32x4_r0_1, i4_horz_add_32x4_r1_1);
i4_horz_res_32x4_r1_2 = vaddq_s32(i4_horz_samp_32x4_r0_2, i4_horz_add_32x4_r1_2);
i4_horz_res_32x4_r2_1 = vaddq_s32(i4_horz_samp_32x4_r1_1, i4_horz_add_32x4_r1_1);
i4_horz_res_32x4_r2_2 = vaddq_s32(i4_horz_samp_32x4_r1_2, i4_horz_add_32x4_r1_2);
i4_horz_res_32x4_r3_1 = vaddq_s32(i4_horz_samp_32x4_r1_1, i4_horz_add_32x4_r2_1);
i4_horz_res_32x4_r3_2 = vaddq_s32(i4_horz_samp_32x4_r1_2, i4_horz_add_32x4_r2_2);
i4_horz_res_32x4_r4_1 = vaddq_s32(i4_horz_samp_32x4_r2_1, i4_horz_add_32x4_r2_1);
i4_horz_res_32x4_r4_2 = vaddq_s32(i4_horz_samp_32x4_r2_2, i4_horz_add_32x4_r2_2);
i4_horz_res_32x4_r5_1 = vaddq_s32(i4_horz_samp_32x4_r2_1, i4_horz_add_32x4_r3_1);
i4_horz_res_32x4_r5_2 = vaddq_s32(i4_horz_samp_32x4_r2_2, i4_horz_add_32x4_r3_2);
i4_horz_res_32x4_r6_1 = vaddq_s32(i4_horz_samp_32x4_r3_1, i4_horz_add_32x4_r3_1);
i4_horz_res_32x4_r6_2 = vaddq_s32(i4_horz_samp_32x4_r3_2, i4_horz_add_32x4_r3_2);
i4_horz_res_16x4_r1_1 = vqrshrn_n_s32(i4_horz_res_32x4_r1_1, 4);
i4_horz_res_16x4_r1_2 = vqrshrn_n_s32(i4_horz_res_32x4_r1_2, 4);
i4_horz_res_16x4_r2_1 = vqrshrn_n_s32(i4_horz_res_32x4_r2_1, 4);
i4_horz_res_16x4_r2_2 = vqrshrn_n_s32(i4_horz_res_32x4_r2_2, 4);
i4_horz_res_16x4_r3_1 = vqrshrn_n_s32(i4_horz_res_32x4_r3_1, 4);
i4_horz_res_16x4_r3_2 = vqrshrn_n_s32(i4_horz_res_32x4_r3_2, 4);
i4_horz_res_16x4_r4_1 = vqrshrn_n_s32(i4_horz_res_32x4_r4_1, 4);
i4_horz_res_16x4_r4_2 = vqrshrn_n_s32(i4_horz_res_32x4_r4_2, 4);
i4_horz_res_16x4_r5_1 = vqrshrn_n_s32(i4_horz_res_32x4_r5_1, 4);
i4_horz_res_16x4_r5_2 = vqrshrn_n_s32(i4_horz_res_32x4_r5_2, 4);
i4_horz_res_16x4_r6_1 = vqrshrn_n_s32(i4_horz_res_32x4_r6_1, 4);
i4_horz_res_16x4_r6_2 = vqrshrn_n_s32(i4_horz_res_32x4_r6_2, 4);
i4_horz_res_16x4_r7_1 = vrshr_n_s16(i4_horz_samp_16x4_r3_1, 2);
i4_horz_res_16x4_r7_2 = vrshr_n_s16(i4_horz_samp_16x4_r3_2, 2);
vst1_s16(pi2_out_res, i4_horz_res_16x4_r0_1);
vst1_s16(pi2_out_res + 4, i4_horz_res_16x4_r0_2);
vst1_s16(pi2_out_res + i4_out_res_stride, i4_horz_res_16x4_r1_1);
vst1_s16(pi2_out_res + i4_out_res_stride + 4, i4_horz_res_16x4_r1_2);
vst1_s16(pi2_out_res + (i4_out_res_stride << 1), i4_horz_res_16x4_r2_1);
vst1_s16(pi2_out_res + (i4_out_res_stride << 1) + 4, i4_horz_res_16x4_r2_2);
vst1_s16(pi2_out_res + (i4_out_res_stride * 3), i4_horz_res_16x4_r3_1);
vst1_s16(pi2_out_res + (i4_out_res_stride * 3) + 4, i4_horz_res_16x4_r3_2);
vst1_s16(pi2_out_res + (i4_out_res_stride << 2), i4_horz_res_16x4_r4_1);
vst1_s16(pi2_out_res + (i4_out_res_stride << 2) + 4, i4_horz_res_16x4_r4_2);
vst1_s16(pi2_out_res + (i4_out_res_stride * 5), i4_horz_res_16x4_r5_1);
vst1_s16(pi2_out_res + (i4_out_res_stride * 5) + 4, i4_horz_res_16x4_r5_2);
vst1_s16(pi2_out_res + (i4_out_res_stride * 6), i4_horz_res_16x4_r6_1);
vst1_s16(pi2_out_res + (i4_out_res_stride * 6) + 4, i4_horz_res_16x4_r6_2);
vst1_s16(pi2_out_res + (i4_out_res_stride * 7), i4_horz_res_16x4_r7_1);
vst1_s16(pi2_out_res + (i4_out_res_stride * 7) + 4, i4_horz_res_16x4_r7_2);
pi2_out_res += BLK8x8SIZE;
}
else
{
pi2_out_res += BLK8x8SIZE;
}
/* Block level loop updates */
if(1 == i4_blk_ctr)
{
pi2_inp_data -= SUB_BLK_WIDTH_4x4;
pi2_inp_data += (i4_inp_data_stride * SUB_BLK_HEIGHT_4x4);
pi2_out_res -= MB_SIZE;
pi2_out_res += (i4_out_res_stride * BLK8x8SIZE);
u4_ref_nnz >>= 2;
}
else
{
pi2_inp_data += SUB_BLK_HEIGHT_4x4;
}
u4_ref_nnz >>= 1;
}
/* The above loop iterates over all the blocks */
}
}
UWORD32 isvce_get_sad_with_residual_pred_neon(buffer_container_t *ps_src,
buffer_container_t *ps_pred,
buffer_container_t *ps_res, UWORD32 u4_mb_wd,
UWORD32 u4_mb_ht)
{
UWORD32 i, j, u4_sad = 0;
UWORD8 *pu1_src = (UWORD8 *) ps_src->pv_data;
UWORD8 *pu1_pred = (UWORD8 *) ps_pred->pv_data;
WORD16 *pi2_res = (WORD16 *) ps_res->pv_data;
WORD32 i4_src_stride = ps_src->i4_data_stride;
WORD32 i4_pred_stride = ps_pred->i4_data_stride;
WORD32 i4_res_stride = ps_res->i4_data_stride;
UWORD32 u4_num_rows_per_loop = 8;
UWORD32 u4_ht_by_8 = u4_mb_ht / u4_num_rows_per_loop;
uint8x8_t src0, src1, src2, src3;
uint8x8_t src4, src5, src6, src7;
uint8x8_t pred0, pred1, pred2, pred3;
uint8x8_t pred4, pred5, pred6, pred7;
int16x8_t res0_16x8, res1_16x8, res2_16x8, res3_16x8, res4_16x8, res5_16x8, res6_16x8,
res7_16x8;
uint16x8_t res0_u16x8, res1_u16x8, res2_u16x8, res3_u16x8, res4_u16x8, res5_u16x8, res6_u16x8,
res7_u16x8;
int16x8_t respred0_16x8, respred1_16x8, respred2_16x8, respred3_16x8, respred4_16x8,
respred5_16x8, respred6_16x8, respred7_16x8;
int16x8_t temp0_16x8, temp1_16x8, temp2_16x8, temp3_16x8, temp4_16x8, temp5_16x8, temp6_16x8,
temp7_16x8;
int32x4_t temp0_32x4;
int32x2_t temp0_32x2;
if((u4_mb_wd == 16) && (u4_mb_ht % 8 == 0))
{
for(i = 0; i < u4_ht_by_8; i++)
{
/* This loop processes 4 rows of 16 bytes each iteration */
/* So, 8 rows are processed across two iterations */
for(j = 0; j < 2; j++)
{
src0 = vld1_u8(pu1_src);
src1 = vld1_u8(pu1_src + 8);
pu1_src += i4_src_stride;
src2 = vld1_u8(pu1_src);
src3 = vld1_u8(pu1_src + 8);
pu1_src += i4_src_stride;
src4 = vld1_u8(pu1_src);
src5 = vld1_u8(pu1_src + 8);
pu1_src += i4_src_stride;
src6 = vld1_u8(pu1_src);
src7 = vld1_u8(pu1_src + 8);
pu1_src += i4_src_stride;
pred0 = vld1_u8(pu1_pred);
pred1 = vld1_u8(pu1_pred + 8);
pu1_pred += i4_pred_stride;
pred2 = vld1_u8(pu1_pred);
pred3 = vld1_u8(pu1_pred + 8);
pu1_pred += i4_pred_stride;
pred4 = vld1_u8(pu1_pred);
pred5 = vld1_u8(pu1_pred + 8);
pu1_pred += i4_pred_stride;
pred6 = vld1_u8(pu1_pred);
pred7 = vld1_u8(pu1_pred + 8);
pu1_pred += i4_pred_stride;
res0_u16x8 = vsubl_u8(src0, pred0);
res1_u16x8 = vsubl_u8(src1, pred1);
res2_u16x8 = vsubl_u8(src2, pred2);
res3_u16x8 = vsubl_u8(src3, pred3);
res4_u16x8 = vsubl_u8(src4, pred4);
res5_u16x8 = vsubl_u8(src5, pred5);
res6_u16x8 = vsubl_u8(src6, pred6);
res7_u16x8 = vsubl_u8(src7, pred7);
res0_16x8 = vreinterpretq_s16_u16(res0_u16x8);
res1_16x8 = vreinterpretq_s16_u16(res1_u16x8);
res2_16x8 = vreinterpretq_s16_u16(res2_u16x8);
res3_16x8 = vreinterpretq_s16_u16(res3_u16x8);
res4_16x8 = vreinterpretq_s16_u16(res4_u16x8);
res5_16x8 = vreinterpretq_s16_u16(res5_u16x8);
res6_16x8 = vreinterpretq_s16_u16(res6_u16x8);
res7_16x8 = vreinterpretq_s16_u16(res7_u16x8);
respred0_16x8 = vld1q_s16(pi2_res);
respred1_16x8 = vld1q_s16(pi2_res + 8);
pi2_res += i4_res_stride;
respred2_16x8 = vld1q_s16(pi2_res);
respred3_16x8 = vld1q_s16(pi2_res + 8);
pi2_res += i4_res_stride;
respred4_16x8 = vld1q_s16(pi2_res);
respred5_16x8 = vld1q_s16(pi2_res + 8);
pi2_res += i4_res_stride;
respred6_16x8 = vld1q_s16(pi2_res);
respred7_16x8 = vld1q_s16(pi2_res + 8);
pi2_res += i4_res_stride;
temp0_16x8 = vsubq_s16(res0_16x8, respred0_16x8);
temp1_16x8 = vsubq_s16(res1_16x8, respred1_16x8);
temp2_16x8 = vsubq_s16(res2_16x8, respred2_16x8);
temp3_16x8 = vsubq_s16(res3_16x8, respred3_16x8);
temp4_16x8 = vsubq_s16(res4_16x8, respred4_16x8);
temp5_16x8 = vsubq_s16(res5_16x8, respred5_16x8);
temp6_16x8 = vsubq_s16(res6_16x8, respred6_16x8);
temp7_16x8 = vsubq_s16(res7_16x8, respred7_16x8);
temp0_16x8 = vabsq_s16(temp0_16x8);
temp1_16x8 = vabsq_s16(temp1_16x8);
temp2_16x8 = vabsq_s16(temp2_16x8);
temp3_16x8 = vabsq_s16(temp3_16x8);
temp4_16x8 = vabsq_s16(temp4_16x8);
temp5_16x8 = vabsq_s16(temp5_16x8);
temp6_16x8 = vabsq_s16(temp6_16x8);
temp7_16x8 = vabsq_s16(temp7_16x8);
temp0_16x8 = vaddq_s16(temp0_16x8, temp1_16x8);
temp1_16x8 = vaddq_s16(temp2_16x8, temp3_16x8);
temp2_16x8 = vaddq_s16(temp4_16x8, temp5_16x8);
temp3_16x8 = vaddq_s16(temp6_16x8, temp7_16x8);
temp0_16x8 = vaddq_s16(temp0_16x8, temp1_16x8);
temp1_16x8 = vaddq_s16(temp2_16x8, temp3_16x8);
temp0_16x8 = vaddq_s16(temp0_16x8, temp1_16x8);
temp0_32x4 = vpaddlq_s16(temp0_16x8);
temp0_32x2 = vpadd_s32(vget_low_s32(temp0_32x4), vget_high_s32(temp0_32x4));
u4_sad += vget_lane_s32(temp0_32x2, 0);
u4_sad += vget_lane_s32(temp0_32x2, 1);
}
}
}
else
{
for(i = 0; i < u4_mb_ht; i++)
{
for(j = 0; j < u4_mb_wd; j++)
{
WORD16 i2_src = pu1_src[j + i * i4_src_stride];
WORD16 i2_pred = pu1_pred[j + i * i4_pred_stride];
WORD16 i2_res = pi2_res[j + i * i4_res_stride];
u4_sad += ABS(i2_src - i2_pred - i2_res);
}
}
}
return u4_sad;
}

View file

@ -16,7 +16,7 @@
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
*/
#ifndef _RATE_CONTROL_API_STRUCTS_H_
#define _RATE_CONTROL_API_STRUCTS_H_
@ -74,7 +74,9 @@ typedef struct rate_control_api_t
UWORD8 u1_is_first_frm;
UWORD8 au1_min_max_qp[(MAX_PIC_TYPE << 1)];
UWORD8 au1_min_max_qp[MAX_PIC_TYPE * 2];
UWORD8 au1_min_max_avc_qp[MAX_PIC_TYPE * 2];
WORD32 i4_prev_frm_est_bits;
@ -89,5 +91,4 @@ typedef struct rate_control_api_t
} rate_control_api_t;
#endif/*_RATE_CONTROL_API_STRUCTS_H_*/
#endif /*_RATE_CONTROL_API_STRUCTS_H_*/

View file

@ -0,0 +1,80 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
*******************************************************************************
* @file
* isvce_function_selector.c
*
* @brief
* Contains functions to initialize function pointers used in svc
*
* @author
* Ittiam
*
* @par List of Functions:
*
* @remarks
* None
*
*******************************************************************************
*/
#include "iv2.h"
#include "isvce_structs.h"
/**
*******************************************************************************
*
* @brief Initialize the intra/inter/transform/deblk function pointers of
* codec context
*
* @par Description: the current routine initializes the function pointers of
* codec context basing on the architecture in use
*
* @param[in] ps_codec
* Codec context pointer
*
* @returns none
*
* @remarks none
*
*******************************************************************************
*/
void isvce_init_function_ptr(isvce_codec_t *ps_codec) { isvce_init_function_ptr_generic(ps_codec); }
/**
*******************************************************************************
*
* @brief Determine the architecture of the encoder executing environment
*
* @par Description: This routine returns the architecture of the enviro-
* ment in which the current encoder is being tested
*
* @param[in] void
*
* @returns IV_ARCH_T
* architecture
*
* @remarks none
*
*******************************************************************************
*/
IV_ARCH_T isvce_default_arch(void) { return ARCH_NA; }

View file

@ -0,0 +1,103 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
*******************************************************************************
* @file
* isvce_platform_macros.h
*
* @brief
* Contains platform specific routines used for codec context intialization
*
* @author
* ittiam
*
* @remarks
* none
*
*******************************************************************************
*/
#ifndef _ISVCE_PLATFORM_MACROS_H_
#define _ISVCE_PLATFORM_MACROS_H_
/*****************************************************************************/
/* Extern Function Declarations */
/*****************************************************************************/
/**
*******************************************************************************
*
* @brief Initialize the intra/inter/transform/deblk function pointers of
* codec context
*
* @par Description: the current routine initializes the function pointers of
* codec context basing on the architecture in use
*
* @param[in] ps_codec
* Codec context pointer
*
* @returns none
*
* @remarks none
*
*******************************************************************************
*/
void isvce_init_function_ptr_generic(isvce_codec_t *ps_codec);
/**
*******************************************************************************
*
* @brief Initialize the intra/inter/transform/deblk function pointers of
* codec context
*
* @par Description: the current routine initializes the function pointers of
* codec context basing on the architecture in use
*
* @param[in] ps_codec
* Codec context pointer
*
* @returns none
*
* @remarks none
*
*******************************************************************************
*/
void isvce_init_function_ptr(isvce_codec_t *ps_codec);
/**
*******************************************************************************
*
* @brief Determine the architecture of the encoder executing environment
*
* @par Description: This routine returns the architecture of the enviro-
* ment in which the current encoder is being tested
*
* @param[in] void
*
* @returns IV_ARCH_T
* architecture
*
* @remarks none
*
*******************************************************************************
*/
IV_ARCH_T isvce_default_arch(void);
#endif

View file

@ -0,0 +1,116 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/*****************************************************************************/
/* Includes */
/*****************************************************************************/
/* System include files */
#include "stdio.h"
/* User include files */
#include "irc_datatypes.h"
#include "irc_common.h"
#include "irc_cntrl_param.h"
#include "irc_mem_req_and_acq.h"
#include "irc_rd_model.h"
#include "irc_est_sad.h"
#include "irc_fixed_point_error_bits.h"
#include "irc_vbr_storage_vbv.h"
#include "irc_picture_type.h"
#include "irc_bit_allocation.h"
#include "irc_mb_model_based.h"
#include "irc_cbr_buffer_control.h"
#include "irc_vbr_str_prms.h"
#include "irc_rate_control_api.h"
#include "irc_rate_control_api_structs.h"
#include "irc_trace_support.h"
#define MIN(a, b) (((a) < (b)) ? (a) : (b))
#define MAX(a, b) (((a) > (b)) ? (a) : (b))
#define DEV_Q 4 /*Q format(Shift) for Deviation range factor */
#define HI_DEV_FCTR 22 /* 1.4*16 */
#define LO_DEV_FCTR 12 /* 0.75*16 */
#define GET_HI_DEV_QP(Qprev) ((((WORD32) Qprev) * HI_DEV_FCTR + (1 << (DEV_Q - 1))) >> DEV_Q)
#define GET_LO_DEV_QP(Qprev) ((((WORD32) Qprev) * LO_DEV_FCTR + (1 << (DEV_Q - 1))) >> DEV_Q)
#define CLIP_QP(Qc, hi_d, lo_d) (((Qc) < (lo_d)) ? ((lo_d)) : (((Qc) > (hi_d)) ? (hi_d) : (Qc)))
/*******************************************************************************
* Description : Gets the frame level qp for the given picture type
* based on bits per pixel and gradient per pixel
******************************************************************************/
/* Get frame level QP based on BPP and GPP */
UWORD8 irc_get_frame_level_init_qp(rate_control_handle *ps_rate_control_api, rc_type_e e_rc_type,
picture_type_e e_pic_type, DOUBLE d_bpp, DOUBLE d_gpp)
{
DOUBLE d_frame_qp;
UWORD8 u1_min_qp =
((rate_control_api_t *) (ps_rate_control_api))->au1_min_max_avc_qp[(e_pic_type << 1)];
UWORD8 u1_max_qp =
((rate_control_api_t *) (ps_rate_control_api))->au1_min_max_avc_qp[(e_pic_type << 1) + 1];
if((e_rc_type != VBR_STORAGE) && (e_rc_type != VBR_STORAGE_DVD_COMP) &&
(e_rc_type != CBR_NLDRC) && (e_rc_type != CONST_QP) && (e_rc_type != VBR_STREAMING))
{
trace_printf(
(const WORD8 *) (const WORD8 *) " Only VBR,NLDRC and CONST QP supported for now \n");
return (0);
}
if(d_bpp <= 0.18)
{
d_frame_qp = 43.49 + (0.59 * d_gpp) - (106.45 * d_bpp);
}
else if(d_bpp <= 0.6)
{
d_frame_qp = 25.12 + (0.69 * d_gpp) - (29.23 * (d_bpp - 0.18));
}
else
{
d_frame_qp = 13.93 + (0.74 * d_gpp) - (18.4 * (d_bpp - 0.6));
}
/* Truncating the QP to the Max and Min Qp values possible */
if(d_frame_qp < u1_min_qp) d_frame_qp = u1_min_qp;
if(d_frame_qp > u1_max_qp) d_frame_qp = u1_max_qp;
return ((UWORD8) (d_frame_qp + 0.5));
}
void irc_change_qp_constraints(rate_control_api_t *ps_rate_control_api, UWORD8 *pu1_min_max_qp,
UWORD8 *pu1_min_max_avc_qp)
{
WORD32 i;
for(i = 0; i < MAX_PIC_TYPE; i++)
{
ps_rate_control_api->au1_min_max_qp[(i << 1)] = pu1_min_max_qp[(i << 1)];
ps_rate_control_api->au1_min_max_qp[(i << 1) + 1] = pu1_min_max_qp[(i << 1) + 1];
ps_rate_control_api->au1_min_max_avc_qp[(i << 1)] = pu1_min_max_avc_qp[(i << 1)];
ps_rate_control_api->au1_min_max_avc_qp[(i << 1) + 1] = pu1_min_max_avc_qp[(i << 1) + 1];
}
}
UWORD8 irc_is_scenecut(rate_control_api_t *ps_rate_control_api)
{
return ((rate_control_api_t *) (ps_rate_control_api))->u1_scd_detected;
}

View file

@ -0,0 +1,46 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
#ifndef _IRC_SVC_RATE_CONTROL_API_H_
#define _IRC_SVC_RATE_CONTROL_API_H_
/* Dependencies of 'irc_rate_control_api_structs' */
#include "irc_picture_type.h"
#include "irc_rd_model.h"
#include "irc_vbr_storage_vbv.h"
#include "irc_est_sad.h"
#include "irc_bit_allocation.h"
#include "irc_mb_model_based.h"
#include "irc_cbr_buffer_control.h"
#include "irc_vbr_str_prms.h"
#include "irc_common.h"
#include "irc_rate_control_api_structs.h"
/* Get frame level QP based on BPP and GPP */
UWORD8 irc_get_frame_level_init_qp(rate_control_api_t *ps_rate_control_api, rc_type_e e_rc_type,
picture_type_e e_pic_type, DOUBLE d_bpp, DOUBLE d_gpp);
void irc_change_qp_constraints(rate_control_api_t *ps_rate_control_api, UWORD8 *pu1_min_max_qp,
UWORD8 *pu1_min_max_avc_qp);
extern UWORD8 irc_is_scenecut(rate_control_api_t *ps_rate_control_api);
#endif

1023
encoder/svc/isvce.h Normal file

File diff suppressed because it is too large Load diff

6054
encoder/svc/isvce_api.c Normal file

File diff suppressed because it is too large Load diff

753
encoder/svc/isvce_cabac.c Normal file
View file

@ -0,0 +1,753 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
*******************************************************************************
* @file
* isvce_cabac.c
*
* @brief
* Contains all leaf level functions for CABAC entropy coding.
*
*
* @author
* Doney Alex
*
* @par List of Functions:
*
*
* @remarks
* None
*
*******************************************************************************
*/
/*****************************************************************************/
/* File Includes */
/*****************************************************************************/
/* System include files */
#include <stdio.h>
#include <assert.h>
#include <limits.h>
#include <string.h>
/* User include files */
#include "ih264e_config.h"
#include "ih264_typedefs.h"
#include "iv2.h"
#include "ive2.h"
#include "ih264_debug.h"
#include "ih264_macros.h"
#include "isvc_defs.h"
#include "isvce_defs.h"
#include "isvc_macros.h"
#include "ih264e_error.h"
#include "ih264e_bitstream.h"
#include "ime_distortion_metrics.h"
#include "ime_defs.h"
#include "ime_structs.h"
#include "ih264_error.h"
#include "isvc_structs.h"
#include "isvc_trans_quant_itrans_iquant.h"
#include "isvc_inter_pred_filters.h"
#include "isvc_mem_fns.h"
#include "ih264_padding.h"
#include "ih264_platform_macros.h"
#include "ih264_intra_pred_filters.h"
#include "ih264_deblk_edge_filters.h"
#include "isvc_cabac_tables.h"
#include "irc_cntrl_param.h"
#include "irc_frame_info_collector.h"
#include "isvce_rate_control.h"
#include "isvce_cabac_structs.h"
#include "isvce_structs.h"
#include "isvce_cabac.h"
#include "isvce_encode_header.h"
#include "ih264_cavlc_tables.h"
#include "ih264e_statistics.h"
#include "ih264e_trace.h"
/*****************************************************************************/
/* Function Definitions */
/*****************************************************************************/
/**
*******************************************************************************
*
* @brief
* k-th order Exp-Golomb (UEGk) binarization process: Implements concatenated
* unary/ k-th order Exp-Golomb (UEGk) binarization process,
* where k = 0 as defined in 9.3.2.3 of ITU_T_H264-201402
*
* @param[in] i2_sufs
* Suffix bit string
*
* @param[in] pi1_bins_len
* Pointer to length of tthe string
*
* @returns Binarized value
*
* @remarks
* None
*
*******************************************************************************
*/
UWORD32 isvce_cabac_UEGk0_binarization(WORD16 i2_sufs, WORD8 *pi1_bins_len)
{
WORD32 unary_length;
UWORD32 u4_sufs_shiftk_plus1, u4_egk, u4_unary_bins;
u4_sufs_shiftk_plus1 = i2_sufs + 1;
unary_length = (32 - CLZ(u4_sufs_shiftk_plus1) + (0 == u4_sufs_shiftk_plus1));
/* unary code with (unary_length-1) '1's and terminating '0' bin */
u4_unary_bins = (1 << unary_length) - 2;
/* insert the symbol prefix of (unary length - 1) bins */
u4_egk = (u4_unary_bins << (unary_length - 1)) |
(u4_sufs_shiftk_plus1 & ((1 << (unary_length - 1)) - 1));
/* length of the code = 2 *(unary_length - 1) + 1 + k */
*pi1_bins_len = (2 * unary_length) - 1;
return (u4_egk);
}
/**
*******************************************************************************
*
* @brief
* Get cabac context for the MB :calculates the pointers to Top and left
* cabac neighbor context depending upon neighbor availability.
*
* @param[in] ps_ent_ctxt
* Pointer to entropy context structure
*
* @param[in] u4_mb_type
* Type of MB
*
* @returns
*
* @remarks
* None
*
*******************************************************************************
*/
void isvce_get_cabac_context(isvce_entropy_ctxt_t *ps_ent_ctxt, WORD32 u4_mb_type)
{
/* CABAC context */
isvce_cabac_ctxt_t *ps_cabac_ctxt = ps_ent_ctxt->ps_cabac;
isvce_mb_info_ctxt_t *ps_ctx_inc_mb_map;
cab_csbp_t *ps_lft_csbp;
WORD32 i4_lft_avail, i4_top_avail, i4_is_intra;
WORD32 i4_mb_x, i4_mb_y;
UWORD8 *pu1_slice_idx = ps_ent_ctxt->pu1_slice_idx;
i4_is_intra = ((u4_mb_type == I16x16) || (u4_mb_type == I8x8) || (u4_mb_type == I4x4));
/* derive neighbor availability */
i4_mb_x = ps_ent_ctxt->i4_mb_x;
i4_mb_y = ps_ent_ctxt->i4_mb_y;
pu1_slice_idx += (i4_mb_y * ps_ent_ctxt->i4_wd_mbs);
/* left macroblock availability */
i4_lft_avail = (i4_mb_x == 0 || (pu1_slice_idx[i4_mb_x - 1] != pu1_slice_idx[i4_mb_x])) ? 0 : 1;
/* top macroblock availability */
i4_top_avail = (i4_mb_y == 0 ||
(pu1_slice_idx[i4_mb_x - ps_ent_ctxt->i4_wd_mbs] != pu1_slice_idx[i4_mb_x]))
? 0
: 1;
i4_mb_x = ps_ent_ctxt->i4_mb_x;
ps_ctx_inc_mb_map = ps_cabac_ctxt->ps_mb_map_ctxt_inc;
ps_cabac_ctxt->ps_curr_ctxt_mb_info = ps_ctx_inc_mb_map + i4_mb_x;
ps_cabac_ctxt->ps_left_ctxt_mb_info = ps_cabac_ctxt->ps_def_ctxt_mb_info;
ps_cabac_ctxt->ps_top_ctxt_mb_info = ps_cabac_ctxt->ps_def_ctxt_mb_info;
ps_lft_csbp = ps_cabac_ctxt->ps_lft_csbp;
ps_cabac_ctxt->pu1_left_y_ac_csbp = &ps_lft_csbp->u1_y_ac_csbp_top_mb;
ps_cabac_ctxt->pu1_left_uv_ac_csbp = &ps_lft_csbp->u1_uv_ac_csbp_top_mb;
ps_cabac_ctxt->pu1_left_yuv_dc_csbp = &ps_lft_csbp->u1_yuv_dc_csbp_top_mb;
ps_cabac_ctxt->pi1_left_ref_idx_ctxt_inc = &ps_cabac_ctxt->i1_left_ref_idx_ctx_inc_arr[0][0];
ps_cabac_ctxt->pu1_left_mv_ctxt_inc = ps_cabac_ctxt->u1_left_mv_ctxt_inc_arr[0];
if(i4_lft_avail) ps_cabac_ctxt->ps_left_ctxt_mb_info = ps_cabac_ctxt->ps_curr_ctxt_mb_info - 1;
if(i4_top_avail) ps_cabac_ctxt->ps_top_ctxt_mb_info = ps_cabac_ctxt->ps_curr_ctxt_mb_info;
if(!i4_lft_avail)
{
UWORD8 u1_def_csbp = i4_is_intra ? 0xf : 0;
*(ps_cabac_ctxt->pu1_left_y_ac_csbp) = u1_def_csbp;
*(ps_cabac_ctxt->pu1_left_uv_ac_csbp) = u1_def_csbp;
*(ps_cabac_ctxt->pu1_left_yuv_dc_csbp) = u1_def_csbp;
*((UWORD32 *) ps_cabac_ctxt->pi1_left_ref_idx_ctxt_inc) = 0;
memset(ps_cabac_ctxt->pu1_left_mv_ctxt_inc, 0, 16);
}
if(!i4_top_avail)
{
UWORD8 u1_def_csbp = i4_is_intra ? 0xff : 0;
ps_cabac_ctxt->ps_top_ctxt_mb_info->u1_yuv_ac_csbp = u1_def_csbp;
ps_cabac_ctxt->ps_top_ctxt_mb_info->u1_yuv_dc_csbp = u1_def_csbp;
ps_cabac_ctxt->ps_curr_ctxt_mb_info->i1_ref_idx[0] =
ps_cabac_ctxt->ps_curr_ctxt_mb_info->i1_ref_idx[1] =
ps_cabac_ctxt->ps_curr_ctxt_mb_info->i1_ref_idx[2] =
ps_cabac_ctxt->ps_curr_ctxt_mb_info->i1_ref_idx[3] = 0;
memset(ps_cabac_ctxt->ps_curr_ctxt_mb_info->u1_mv, 0, 16);
}
}
/**
*******************************************************************************
* @brief
* flushing at termination: Explained in flowchart 9-12(ITU_T_H264-201402).
*
* @param[in] ps_cabac_ctxt
* pointer to cabac context (handle)
*
* @returns none
*
* @remarks
* None
*
*******************************************************************************
*/
void isvce_cabac_flush(isvce_cabac_ctxt_t *ps_cabac_ctxt)
{
/* bit stream ptr */
bitstrm_t *ps_stream = ps_cabac_ctxt->ps_bitstrm;
encoding_envirnoment_t *ps_cab_enc_env = &(ps_cabac_ctxt->s_cab_enc_env);
UWORD32 u4_low = ps_cab_enc_env->u4_code_int_low;
UWORD32 u4_bits_gen = ps_cab_enc_env->u4_bits_gen;
UWORD8 *pu1_strm_buf = ps_stream->pu1_strm_buffer;
UWORD32 u4_strm_buf_offset = ps_stream->u4_strm_buf_offset;
WORD32 zero_run = ps_stream->i4_zero_bytes_run;
UWORD32 u4_out_standing_bytes = ps_cab_enc_env->u4_out_standing_bytes;
/************************************************************************/
/* Insert the carry (propogated in previous byte) along with */
/* outstanding bytes (if any) and flush remaining bits */
/************************************************************************/
{
/* carry = 1 => putbit(1); carry propogated due to L renorm */
WORD32 carry = (u4_low >> (u4_bits_gen + CABAC_BITS)) & 0x1;
WORD32 last_byte;
WORD32 bits_left;
WORD32 rem_bits;
if(carry)
{
/* CORNER CASE: if the previous data is 0x000003, then EPB will be
inserted and the data will become 0x00000303 and if the carry is present,
it will be added with the last byte and it will become 0x00000304 which
is not correct as per standard */
/* so check for previous four bytes and if it is equal to 0x00000303
then subtract u4_strm_buf_offset by 1 */
if(pu1_strm_buf[u4_strm_buf_offset - 1] == 0x03 &&
pu1_strm_buf[u4_strm_buf_offset - 2] == 0x03 &&
pu1_strm_buf[u4_strm_buf_offset - 3] == 0x00 &&
pu1_strm_buf[u4_strm_buf_offset - 4] == 0x00)
{
u4_strm_buf_offset -= 1;
}
/* previous byte carry add will not result in overflow to */
/* u4_strm_buf_offset - 2 as we track 0xff as outstanding bytes */
pu1_strm_buf[u4_strm_buf_offset - 1] += carry;
zero_run = 0;
}
/* Insert outstanding bytes (if any) */
while(u4_out_standing_bytes)
{
UWORD8 u1_0_or_ff = carry ? 0 : 0xFF;
PUTBYTE_EPB(pu1_strm_buf, u4_strm_buf_offset, u1_0_or_ff, zero_run);
u4_out_standing_bytes--;
}
/* clear the carry in low */
u4_low &= ((1 << (u4_bits_gen + CABAC_BITS)) - 1);
/* extract the remaining bits; */
/* includes additional msb bit of low as per Figure 9-12 */
bits_left = u4_bits_gen + 1;
rem_bits = (u4_low >> (u4_bits_gen + CABAC_BITS - bits_left));
if(bits_left >= 8)
{
last_byte = (rem_bits >> (bits_left - 8)) & 0xFF;
PUTBYTE_EPB(pu1_strm_buf, u4_strm_buf_offset, last_byte, zero_run);
bits_left -= 8;
}
/* insert last byte along with rbsp stop bit(1) and 0's in the end */
last_byte =
(rem_bits << (8 - bits_left)) | (1 << (7 - bits_left) | (1 << (7 - bits_left - 1)));
last_byte &= 0xFF;
PUTBYTE_EPB(pu1_strm_buf, u4_strm_buf_offset, last_byte, zero_run);
/* update the state variables and return success */
ps_stream->u4_strm_buf_offset = u4_strm_buf_offset;
ps_stream->i4_zero_bytes_run = 0;
/* Default init values for scratch variables of bitstream context */
ps_stream->u4_cur_word = 0;
ps_stream->i4_bits_left_in_cw = WORD_SIZE;
}
}
/**
******************************************************************************
*
* @brief Puts new byte (and outstanding bytes) into bitstream after cabac
* renormalization
*
* @par Description
* 1. Extract the leading byte of low(L)
* 2. If leading byte=0xff increment outstanding bytes and return
* (as the actual bits depend on carry propogation later)
* 3. If leading byte is not 0xff check for any carry propogation
* 4. Insert the carry (propogated in previous byte) along with outstanding
* bytes (if any) and leading byte
*
*
* @param[in] ps_cabac_ctxt
* pointer to cabac context (handle)
*
* @return
*
******************************************************************************
*/
void isvce_cabac_put_byte(isvce_cabac_ctxt_t *ps_cabac_ctxt)
{
/* bit stream ptr */
bitstrm_t *ps_stream = ps_cabac_ctxt->ps_bitstrm;
encoding_envirnoment_t *ps_cab_enc_env = &(ps_cabac_ctxt->s_cab_enc_env);
UWORD32 u4_low = ps_cab_enc_env->u4_code_int_low;
UWORD32 u4_bits_gen = ps_cab_enc_env->u4_bits_gen;
WORD32 lead_byte = u4_low >> (u4_bits_gen + CABAC_BITS - 8);
/* Sanity checks */
ASSERT((ps_cab_enc_env->u4_code_int_range >= 256) && (ps_cab_enc_env->u4_code_int_range < 512));
ASSERT((u4_bits_gen >= 8));
/* update bits generated and low after extracting leading byte */
u4_bits_gen -= 8;
ps_cab_enc_env->u4_code_int_low &= ((1 << (CABAC_BITS + u4_bits_gen)) - 1);
ps_cab_enc_env->u4_bits_gen = u4_bits_gen;
/************************************************************************/
/* 1. Extract the leading byte of low(L) */
/* 2. If leading byte=0xff increment outstanding bytes and return */
/* (as the actual bits depend on carry propogation later) */
/* 3. If leading byte is not 0xff check for any carry propogation */
/* 4. Insert the carry (propogated in previous byte) along with */
/* outstanding bytes (if any) and leading byte */
/************************************************************************/
if(lead_byte == 0xff)
{
/* actual bits depend on carry propogration */
ps_cab_enc_env->u4_out_standing_bytes++;
return;
}
else
{
UWORD8 *pu1_strm_buf = ps_stream->pu1_strm_buffer;
UWORD32 u4_strm_buf_offset = ps_stream->u4_strm_buf_offset;
/* carry = 1 => putbit(1); carry propogated due to L renorm */
WORD32 carry = (lead_byte >> 8) & 0x1;
WORD32 zero_run = ps_stream->i4_zero_bytes_run;
UWORD32 u4_out_standing_bytes = ps_cab_enc_env->u4_out_standing_bytes;
/*********************************************************************/
/* Insert the carry propogated in previous byte */
/* */
/* Note : Do not worry about corruption into slice header align byte */
/* This is because the first bin cannot result in overflow */
/*********************************************************************/
if(carry)
{
/* CORNER CASE: if the previous data is 0x000003, then EPB will be
inserted and the data will become 0x00000303 and if the carry is present,
it will be added with the last byte and it will become 0x00000304 which
is not correct as per standard */
/* so check for previous four bytes and if it is equal to 0x00000303
then subtract u4_strm_buf_offset by 1 */
if((u4_strm_buf_offset > 3) && (pu1_strm_buf[u4_strm_buf_offset - 1] == 0x03) &&
(pu1_strm_buf[u4_strm_buf_offset - 2] == 0x03) &&
(pu1_strm_buf[u4_strm_buf_offset - 3] == 0x00) &&
(pu1_strm_buf[u4_strm_buf_offset - 4] == 0x00))
{
u4_strm_buf_offset -= 1;
}
/* previous byte carry add will not result in overflow to */
/* u4_strm_buf_offset - 2 as we track 0xff as outstanding bytes */
if(u4_strm_buf_offset > 0)
{
pu1_strm_buf[u4_strm_buf_offset - 1] += carry;
zero_run = 0;
}
}
/* Insert outstanding bytes (if any) */
while(u4_out_standing_bytes)
{
UWORD8 u1_0_or_ff = carry ? 0 : 0xFF;
PUTBYTE_EPB(pu1_strm_buf, u4_strm_buf_offset, u1_0_or_ff, zero_run);
u4_out_standing_bytes--;
}
ps_cab_enc_env->u4_out_standing_bytes = 0;
/* Insert the leading byte */
lead_byte &= 0xFF;
PUTBYTE_EPB(pu1_strm_buf, u4_strm_buf_offset, lead_byte, zero_run);
/* update the state variables and return success */
ps_stream->u4_strm_buf_offset = u4_strm_buf_offset;
ps_stream->i4_zero_bytes_run = zero_run;
}
}
/**
******************************************************************************
*
* @brief Codes a bin based on probablilty and mps packed context model
*
* @par Description
* 1. Apart from encoding bin, context model is updated as per state transition
* 2. Range and Low renormalization is done based on bin and original state
* 3. After renorm bistream is updated (if required)
*
* @param[in] ps_cabac
* pointer to cabac context (handle)
*
* @param[in] bin
* bin(boolean) to be encoded
*
* @param[in] pu1_bin_ctxts
* index of cabac context model containing pState[bits 5-0] | MPS[bit6]
*
* @return
*
******************************************************************************
*/
void isvce_cabac_encode_bin(isvce_cabac_ctxt_t *ps_cabac, WORD32 bin, bin_ctxt_model *pu1_bin_ctxts)
{
encoding_envirnoment_t *ps_cab_enc_env = &(ps_cabac->s_cab_enc_env);
UWORD32 u4_range = ps_cab_enc_env->u4_code_int_range;
UWORD32 u4_low = ps_cab_enc_env->u4_code_int_low;
UWORD32 u4_rlps;
UWORD8 state_mps = (*pu1_bin_ctxts) & 0x3F;
UWORD8 u1_mps = !!((*pu1_bin_ctxts) & (0x40));
WORD32 shift;
UWORD32 u4_table_val;
/* Sanity checks */
ASSERT((bin == 0) || (bin == 1));
ASSERT((u4_range >= 256) && (u4_range < 512));
/* Get the lps range from LUT based on quantized range and state */
u4_table_val = gau4_isvc_cabac_table[state_mps][(u4_range >> 6) & 0x3];
u4_rlps = u4_table_val & 0xFF;
u4_range -= u4_rlps;
/* check if bin is mps or lps */
if(u1_mps ^ bin)
{
/* lps path; L= L + R; R = RLPS */
u4_low += u4_range;
u4_range = u4_rlps;
if(state_mps == 0)
{
/* MPS(CtxIdx) = 1 - MPS(CtxIdx) */
u1_mps = 1 - u1_mps;
} /* update the context model from state transition LUT */
state_mps = (u4_table_val >> 15) & 0x3F;
}
else
{ /* update the context model from state transition LUT */
state_mps = (u4_table_val >> 8) & 0x3F;
}
(*pu1_bin_ctxts) = (u1_mps << 6) | state_mps;
/*****************************************************************/
/* Renormalization; calculate bits generated based on range(R) */
/* Note : 6 <= R < 512; R is 2 only for terminating encode */
/*****************************************************************/
GETRANGE(shift, u4_range);
shift = 9 - shift;
u4_low <<= shift;
u4_range <<= shift;
/* bits to be inserted in the bitstream */
ps_cab_enc_env->u4_bits_gen += shift;
ps_cab_enc_env->u4_code_int_range = u4_range;
ps_cab_enc_env->u4_code_int_low = u4_low;
/* generate stream when a byte is ready */
if(ps_cab_enc_env->u4_bits_gen > CABAC_BITS)
{
isvce_cabac_put_byte(ps_cabac);
}
}
/**
*******************************************************************************
*
* @brief
* Encoding process for a binary decision :implements encoding process of a
decision
* as defined in 9.3.4.2 . This function encodes multiple bins, of a symbol.
Implements
* flowchart Figure 9-7( ITU_T_H264-201402)
*
* @param[in] u4_bins
* array of bin values
*
* @param[in] i1_bins_len
* Length of bins, maximum 32
*
* @param[in] u4_ctx_inc
* CtxInc, byte0- bin0, byte1-bin1 ..
*
* @param[in] i1_valid_len
* valid length of bins, after that CtxInc is constant
*
* @param[in] pu1_bin_ctxt_type
* Pointer to binary contexts
* @param[in] ps_cabac
* Pointer to cabac_context_structure
*
* @returns
*
* @remarks
* None
*
*******************************************************************************
*/
void isvce_encode_decision_bins(UWORD32 u4_bins, WORD8 i1_bins_len, UWORD32 u4_ctx_inc,
WORD8 i1_valid_len, bin_ctxt_model *pu1_bin_ctxt_type,
isvce_cabac_ctxt_t *ps_cabac)
{
WORD8 i;
UWORD8 u1_ctx_inc, u1_bin;
for(i = 0; i < i1_bins_len; i++)
{
u1_bin = (u4_bins & 0x01);
u4_bins = u4_bins >> 1;
u1_ctx_inc = u4_ctx_inc & 0x0f;
if(i < i1_valid_len) u4_ctx_inc = u4_ctx_inc >> 4;
/* Encode the bin */
isvce_cabac_encode_bin(ps_cabac, u1_bin, pu1_bin_ctxt_type + u1_ctx_inc);
}
}
/**
*******************************************************************************
* @brief
* Encoding process for a binary decision before termination:Encoding process
* of a termination(9.3.4.5 :ITU_T_H264-201402) . Explained in flowchart 9-11.
*
* @param[in] ps_cabac
* Pointer to cabac structure
*
* @param[in] term_bin
* Symbol value, end of slice or not, term_bin is binary
*
* @returns
*
* @remarks
* None
*
*******************************************************************************
*/
void isvce_cabac_encode_terminate(isvce_cabac_ctxt_t *ps_cabac, WORD32 term_bin)
{
encoding_envirnoment_t *ps_cab_enc_env = &(ps_cabac->s_cab_enc_env);
UWORD32 u4_range = ps_cab_enc_env->u4_code_int_range;
UWORD32 u4_low = ps_cab_enc_env->u4_code_int_low;
UWORD32 u4_rlps;
WORD32 shift;
/* Sanity checks */
ASSERT((u4_range >= 256) && (u4_range < 512));
ASSERT((term_bin == 0) || (term_bin == 1));
/* term_bin = 1 has lps range = 2 */
u4_rlps = 2;
u4_range -= u4_rlps;
/* if terminate L is incremented by curR and R=2 */
if(term_bin)
{
/* lps path; L= L + R; R = RLPS */
u4_low += u4_range;
u4_range = u4_rlps;
}
/*****************************************************************/
/* Renormalization; calculate bits generated based on range(R) */
/* Note : 6 <= R < 512; R is 2 only for terminating encode */
/*****************************************************************/
GETRANGE(shift, u4_range);
shift = 9 - shift;
u4_low <<= shift;
u4_range <<= shift;
/* bits to be inserted in the bitstream */
ps_cab_enc_env->u4_bits_gen += shift;
ps_cab_enc_env->u4_code_int_range = u4_range;
ps_cab_enc_env->u4_code_int_low = u4_low;
/* generate stream when a byte is ready */
if(ps_cab_enc_env->u4_bits_gen > CABAC_BITS)
{
isvce_cabac_put_byte(ps_cabac);
}
if(term_bin)
{
isvce_cabac_flush(ps_cabac);
}
}
/**
*******************************************************************************
* @brief
* Bypass encoding process for binary decisions: Explained (9.3.4.4
*:ITU_T_H264-201402) , flowchart 9-10.
*
* @param[ino] ps_cabac : pointer to cabac context (handle)
*
* @param[in] bin : bypass bin(0/1) to be encoded
*
* @returns
*
* @remarks
* None
*
*******************************************************************************
*/
void isvce_cabac_encode_bypass_bin(isvce_cabac_ctxt_t *ps_cabac, WORD32 bin)
{
encoding_envirnoment_t *ps_cab_enc_env = &(ps_cabac->s_cab_enc_env);
UWORD32 u4_range = ps_cab_enc_env->u4_code_int_range;
UWORD32 u4_low = ps_cab_enc_env->u4_code_int_low;
/* Sanity checks */
ASSERT((u4_range >= 256) && (u4_range < 512));
ASSERT((bin == 0) || (bin == 1));
u4_low <<= 1;
/* add range if bin is 1 */
if(bin)
{
u4_low += u4_range;
}
/* 1 bit to be inserted in the bitstream */
ps_cab_enc_env->u4_bits_gen++;
ps_cab_enc_env->u4_code_int_low = u4_low;
/* generate stream when a byte is ready */
if(ps_cab_enc_env->u4_bits_gen > CABAC_BITS)
{
isvce_cabac_put_byte(ps_cabac);
}
}
/**
******************************************************************************
*
* @brief Encodes a series of bypass bins (FLC bypass bins)
*
* @par Description
* This function is more optimal than calling isvce_cabac_encode_bypass_bin()
* in a loop as cabac low, renorm and generating the stream (8bins at a time)
* can be done in one operation
*
* @param[inout]ps_cabac
* pointer to cabac context (handle)
*
* @param[in] u4_bins
* syntax element to be coded (as FLC bins)
*
* @param[in] num_bins
* This is the FLC length for u4_sym
*
* @return
*
******************************************************************************
*/
void isvce_cabac_encode_bypass_bins(isvce_cabac_ctxt_t *ps_cabac, UWORD32 u4_bins, WORD32 num_bins)
{
encoding_envirnoment_t *ps_cab_enc_env = &(ps_cabac->s_cab_enc_env);
UWORD32 u4_range = ps_cab_enc_env->u4_code_int_range;
WORD32 next_byte;
/* Sanity checks */
ASSERT((num_bins < 33) && (num_bins > 0));
ASSERT((u4_range >= 256) && (u4_range < 512));
/* Compute bit always to populate the trace */
/* increment bits generated by num_bins */
/* Encode 8bins at a time and put in the bit-stream */
while(num_bins > 8)
{
num_bins -= 8;
next_byte = (u4_bins >> (num_bins)) & 0xff;
/* L = (L << 8) + (R * next_byte) */
ps_cab_enc_env->u4_code_int_low <<= 8;
ps_cab_enc_env->u4_code_int_low += (next_byte * u4_range);
ps_cab_enc_env->u4_bits_gen += 8;
if(ps_cab_enc_env->u4_bits_gen > CABAC_BITS)
{
/* insert the leading byte of low into stream */
isvce_cabac_put_byte(ps_cabac);
}
}
/* Update low with remaining bins and return */
next_byte = (u4_bins & ((1 << num_bins) - 1));
ps_cab_enc_env->u4_code_int_low <<= num_bins;
ps_cab_enc_env->u4_code_int_low += (next_byte * u4_range);
ps_cab_enc_env->u4_bits_gen += num_bins;
if(ps_cab_enc_env->u4_bits_gen > CABAC_BITS)
{
/* insert the leading byte of low into stream */
isvce_cabac_put_byte(ps_cabac);
}
}

380
encoder/svc/isvce_cabac.h Normal file
View file

@ -0,0 +1,380 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
*******************************************************************************
* @file
* isvce_cabac_structs.h
*
* @brief
* This file contains cabac related macros, enums, tables and function
*declarations.
*
* @author
* Doney Alex
*
* @remarks
* none
*
*******************************************************************************
*/
#ifndef _ISVCE_CABAC_H_
#define _ISVCE_CABAC_H_
#include "ih264e_cabac.h"
#include "isvce_cabac_structs.h"
#include "isvce_defs.h"
#include "isvce_structs.h"
/*****************************************************************************/
/* Function Declarations */
/*****************************************************************************/
/**
*******************************************************************************
*
* @brief
* Initialize default context values and pointers.
*
* @param[in] ps_ent_ctxt
* Pointer to entropy context structure
*
* @returns
*
* @remarks
* None
*
*******************************************************************************
*/
void isvce_init_cabac_table(isvce_entropy_ctxt_t *ps_ent_ctxt);
/**
*******************************************************************************
*
* @brief
* Initialize cabac context: Intitalize all contest with init values given in
*the spec. Called at the beginning of entropy coding of each slice for CABAC
*encoding.
*
* @param[in] ps_ent_ctxt
* Pointer to entropy context structure
*
* @returns
*
* @remarks
* None
*
*******************************************************************************
*/
extern void isvce_init_cabac_ctxt(isvce_entropy_ctxt_t *ps_ent_ctxt, slice_header_t *ps_slice_hdr);
/**
*******************************************************************************
*
* @brief
* k-th order Exp-Golomb (UEGk) binarization process: Implements concatenated
* unary/ k-th order Exp-Golomb (UEGk) binarization process,
* where k = 0 as defined in 9.3.2.3 of ITU_T_H264-201402
*
* @param[in] i2_sufs
* Suffix bit string
*
* @param[in] pi1_bins_len
* Pointer to length of the string
*
* @returns Binarized value
*
* @remarks
* None
*
*******************************************************************************
*/
UWORD32 isvce_cabac_UEGk0_binarization(WORD16 i2_sufs, WORD8 *pi1_bins_len);
/**
*******************************************************************************
*
* @brief
* Get cabac context for the MB :calculates the pointers to Top and left
* cabac neighbor context depending upon neighbor availability.
*
* @param[in] ps_ent_ctxt
* Pointer to entropy context structure
*
* @param[in] u4_mb_type
* Type of MB
*
* @returns
*
* @remarks
* None
*
*******************************************************************************
*/
void isvce_get_cabac_context(isvce_entropy_ctxt_t *ps_ent_ctxt, WORD32 u4_mb_type);
/**
*******************************************************************************
* @brief
* flushing at termination: Explained in flowchart 9-12(ITU_T_H264-201402).
*
* @param[in] ps_cabac_ctxt
* pointer to cabac context (handle)
*
* @returns none
*
* @remarks
* None
*
*******************************************************************************
*/
void isvce_cabac_flush(isvce_cabac_ctxt_t *ps_cabac_ctxt);
/**
******************************************************************************
*
* @brief Puts new byte (and outstanding bytes) into bitstream after cabac
* renormalization
*
* @par Description
* 1. Extract the leading byte of low(L)
* 2. If leading byte=0xff increment outstanding bytes and return
* (as the actual bits depend on carry propogation later)
* 3. If leading byte is not 0xff check for any carry propogation
* 4. Insert the carry (propogated in previous byte) along with outstanding
* bytes (if any) and leading byte
*
*
* @param[inout] ps_cabac_ctxt
* pointer to cabac context (handle)
*
* @return
*
******************************************************************************
*/
void isvce_cabac_put_byte(isvce_cabac_ctxt_t *ps_cabac_ctxt);
/**
******************************************************************************
*
* @brief Codes a bin based on probablilty and mps packed context model
*
* @par Description
* 1. Apart from encoding bin, context model is updated as per state transition
* 2. Range and Low renormalization is done based on bin and original state
* 3. After renorm bistream is updated (if required)
*
* @param[inout] ps_cabac
* pointer to cabac context (handle)
*
* @param[in] bin
* bin(boolean) to be encoded
*
* @param[in] pu1_bin_ctxts
* index of cabac context model containing pState[bits 5-0] | MPS[bit6]
*
* @return
*
******************************************************************************
*/
void isvce_cabac_encode_bin(isvce_cabac_ctxt_t *ps_cabac, WORD32 bin,
bin_ctxt_model *pu1_bin_ctxts);
/**
*******************************************************************************
*
* @brief
* Encoding process for a binary decision :implements encoding process of a
decision
* as defined in 9.3.4.2 . This function encodes multiple bins, of a symbol.
Implements
* flowchart Figure 9-7( ITU_T_H264-201402)
*
* @param[in] u4_bins
* array of bin values
*
* @param[in] i1_bins_len
* Length of bins, maximum 32
*
* @param[in] u4_ctx_inc
* CtxInc, byte0- bin0, byte1-bin1 ..
*
* @param[in] i1_valid_len
* valid length of bins, after that CtxInc is constant
*
* @param[in] pu1_bin_ctxt_type
* Pointer to binary contexts
* @param[in] ps_cabac
* Pointer to cabac_context_structure
*
* @returns
*
* @remarks
* None
*
*******************************************************************************
*/
void isvce_encode_decision_bins(UWORD32 u4_bins, WORD8 i1_bins_len, UWORD32 u4_ctx_inc,
WORD8 i1_valid_len, bin_ctxt_model *pu1_bin_ctxt_type,
isvce_cabac_ctxt_t *ps_cabac);
/**
*******************************************************************************
* @brief
* Encoding process for a binary decision before termination:Encoding process
* of a termination(9.3.4.5 :ITU_T_H264-201402) . Explained in flowchart 9-11.
*
* @param[in] ps_cabac
* Pointer to cabac structure
*
* @param[in] term_bin
* Symbol value, end of slice or not, term_bin is binary
*
* @returns
*
* @remarks
* None
*
*******************************************************************************
*/
void isvce_cabac_encode_terminate(isvce_cabac_ctxt_t *ps_cabac, WORD32 term_bin);
/**
*******************************************************************************
* @brief
* Bypass encoding process for binary decisions: Explained (9.3.4.4
*:ITU_T_H264-201402) , flowchart 9-10.
*
* @param[in] ps_cabac : pointer to cabac context (handle)
*
* @param[in] bin : bypass bin(0/1) to be encoded
*
* @returns
*
* @remarks
* None
*
*******************************************************************************
*/
void isvce_cabac_encode_bypass_bin(isvce_cabac_ctxt_t *ps_cabac, WORD32 bin);
/**
******************************************************************************
*
* @brief Encodes a series of bypass bins (FLC bypass bins)
*
* @par Description
* This function is more optimal than calling isvce_cabac_encode_bypass_bin()
* in a loop as cabac low, renorm and generating the stream (8bins at a time)
* can be done in one operation
*
* @param[inout]ps_cabac
* pointer to cabac context (handle)
*
* @param[in] u4_bins
* syntax element to be coded (as FLC bins)
*
* @param[in] num_bins
* This is the FLC length for u4_sym
*
* @return
*
******************************************************************************
*/
void isvce_cabac_encode_bypass_bins(isvce_cabac_ctxt_t *ps_cabac, UWORD32 u4_bins, WORD32 num_bins);
/**
*******************************************************************************
*
* @brief
* This function generates CABAC coded bit stream for an Intra Slice.
*
* @description
* The mb syntax layer for intra slices constitutes luma mb mode, luma sub
*modes (if present), mb qp delta, coded block pattern, chroma mb mode and
* luma/chroma residue. These syntax elements are written as directed by table
* 7.3.5 of h264 specification.
*
* @param[in] ps_ent_ctxt
* pointer to entropy context
*
* @returns error code
*
* @remarks none
*
*******************************************************************************
*/
IH264E_ERROR_T isvce_write_islice_mb_cabac(isvce_entropy_ctxt_t *ps_ent_ctxt);
/**
*******************************************************************************
*
* @brief
* This function generates CABAC coded bit stream for Inter slices
*
* @description
* The mb syntax layer for inter slices constitutes luma mb mode, luma sub
*modes (if present), mb qp delta, coded block pattern, chroma mb mode and
* luma/chroma residue. These syntax elements are written as directed by table
* 7.3.5 of h264 specification
*
* @param[in] ps_ent_ctxt
* pointer to entropy context
*
* @returns error code
*
* @remarks none
*
*******************************************************************************
*/
IH264E_ERROR_T isvce_write_pslice_mb_cabac(isvce_entropy_ctxt_t *ps_ent_ctxt);
/**
*******************************************************************************
*
* @brief
* This function generates CABAC coded bit stream for B slices
*
* @description
* The mb syntax layer for inter slices constitutes luma mb mode,
* mb qp delta, coded block pattern, chroma mb mode and
* luma/chroma residue. These syntax elements are written as directed by table
* 7.3.5 of h264 specification
*
* @param[in] ps_ent_ctxt
* pointer to entropy context
*
* @returns error code
*
* @remarks none
*
*******************************************************************************
*/
IH264E_ERROR_T isvce_write_bslice_mb_cabac(isvce_entropy_ctxt_t *ps_ent_ctxt);
#if ENABLE_RE_ENC_AS_SKIP
IH264E_ERROR_T isvce_reencode_as_skip_frame_cabac(isvce_entropy_ctxt_t *ps_ent_ctxt);
#endif
#endif

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,215 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
*******************************************************************************
* @file
* isvce_cabac_init.c
*
* @brief
* Contains all initialization functions for cabac contexts
*
* @author
* Doney Alex
*
* @par List of Functions:
*
*
* @remarks
* None
*
*******************************************************************************
*/
/*****************************************************************************/
/* File Includes */
/*****************************************************************************/
/* System include files */
#include <stdio.h>
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
#include <limits.h>
#include <assert.h>
/* User include files */
#include "ih264_typedefs.h"
#include "iv2.h"
#include "ive2.h"
#include "isvc_defs.h"
#include "ih264_debug.h"
#include "ime_distortion_metrics.h"
#include "ime_defs.h"
#include "ime_structs.h"
#include "ih264_error.h"
#include "isvc_structs.h"
#include "isvc_trans_quant_itrans_iquant.h"
#include "isvc_inter_pred_filters.h"
#include "isvc_mem_fns.h"
#include "ih264_padding.h"
#include "ih264_intra_pred_filters.h"
#include "ih264_deblk_edge_filters.h"
#include "ih264_platform_macros.h"
#include "isvc_macros.h"
#include "ih264_buf_mgr.h"
#include "ih264e_error.h"
#include "ih264e_bitstream.h"
#include "isvc_common_tables.h"
#include "isvc_cabac_tables.h"
#include "ih264_list.h"
#include "isvce_defs.h"
#include "irc_cntrl_param.h"
#include "irc_frame_info_collector.h"
#include "isvce_rate_control.h"
#include "isvce_cabac_structs.h"
#include "isvce_structs.h"
#include "isvce_cabac.h"
#include "isvce_process.h"
#include "ithread.h"
#include "isvce_encode_header.h"
#include "isvce_globals.h"
#include "ih264e_config.h"
#include "ih264e_trace.h"
#include "ih264e_statistics.h"
#include "ih264_cavlc_tables.h"
#include "isvce_deblk.h"
#include "isvce_me.h"
#include "ih264e_debug.h"
#include "ih264e_master.h"
#include "isvce_utils.h"
#include "irc_mem_req_and_acq.h"
#include "irc_rate_control_api.h"
#include "ih264e_platform_macros.h"
#include "ime_statistics.h"
/*****************************************************************************/
/* Function definitions . */
/*****************************************************************************/
/**
*******************************************************************************
*
* @brief
* Initialize cabac encoding environment
*
* @param[in] ps_cab_enc_env
* Pointer to encoding_envirnoment_t structure
*
* @returns
*
* @remarks
* None
*
*******************************************************************************
*/
static void isvce_init_cabac_enc_envirnoment(encoding_envirnoment_t *ps_cab_enc_env)
{
ps_cab_enc_env->u4_code_int_low = 0;
ps_cab_enc_env->u4_code_int_range = 0x1fe;
ps_cab_enc_env->u4_out_standing_bytes = 0;
ps_cab_enc_env->u4_bits_gen = 0;
}
/**
*******************************************************************************
*
* @brief
* Initialize default context values and pointers (Called once at the beginning
*of encoding).
*
* @param[in] ps_ent_ctxt
* Pointer to entropy context structure
*
* @returns
*
* @remarks
* None
*
*******************************************************************************
*/
void isvce_init_cabac_table(isvce_entropy_ctxt_t *ps_ent_ctxt)
{
/* CABAC context */
isvce_cabac_ctxt_t *ps_cabac_ctxt = ps_ent_ctxt->ps_cabac;
ps_cabac_ctxt->ps_mb_map_ctxt_inc = ps_cabac_ctxt->ps_mb_map_ctxt_inc_base + 1;
ps_cabac_ctxt->ps_lft_csbp = &ps_cabac_ctxt->s_lft_csbp;
ps_cabac_ctxt->ps_bitstrm = ps_ent_ctxt->ps_bitstrm;
{
/* 0th entry of mb_map_ctxt_inc will be always be containing default values
*/
/* for CABAC context representing MB not available */
isvce_mb_info_ctxt_t *ps_def_ctxt = ps_cabac_ctxt->ps_mb_map_ctxt_inc - 1;
ps_def_ctxt->u1_mb_type = CAB_SKIP;
ps_def_ctxt->u1_cbp = 0x0f;
ps_def_ctxt->u1_intrapred_chroma_mode = 0;
ps_def_ctxt->u1_base_mode_flag = 0;
memset(ps_def_ctxt->i1_ref_idx, 0, sizeof(ps_def_ctxt->i1_ref_idx));
memset(ps_def_ctxt->u1_mv, 0, sizeof(ps_def_ctxt->u1_mv));
ps_cabac_ctxt->ps_def_ctxt_mb_info = ps_def_ctxt;
}
}
/**
*******************************************************************************
*
* @brief
* Initialize cabac context: Initialize all contest with init values given in
*the spec. Called at the beginning of entropy coding of each slice for CABAC
*encoding.
*
* @param[in] ps_ent_ctxt
* Pointer to entropy context structure
*
* @returns
*
* @remarks
* None
*
*******************************************************************************
*/
void isvce_init_cabac_ctxt(isvce_entropy_ctxt_t *ps_ent_ctxt, slice_header_t *ps_slice_hdr)
{
isvce_cabac_ctxt_t *ps_cabac_ctxt = ps_ent_ctxt->ps_cabac;
const UWORD8 u1_slice_type = ps_slice_hdr->u1_slice_type;
WORD8 i1_cabac_init_idc = 0;
bin_ctxt_model *au1_cabac_ctxt_table = ps_cabac_ctxt->au1_cabac_ctxt_table;
UWORD8 u1_qp_y = ps_slice_hdr->i1_slice_qp;
isvce_init_cabac_enc_envirnoment(&ps_cabac_ctxt->s_cab_enc_env);
ps_cabac_ctxt->i1_prevps_mb_qp_delta_ctxt = 0;
if(ISLICE != u1_slice_type)
{
i1_cabac_init_idc = ps_slice_hdr->i1_cabac_init_idc;
}
else
{
i1_cabac_init_idc = 3;
}
memcpy(au1_cabac_ctxt_table, gau1_isvc_cabac_ctxt_init_table[i1_cabac_init_idc][u1_qp_y],
NUM_SVC_CABAC_CTXTS * sizeof(bin_ctxt_model));
}

View file

@ -0,0 +1,142 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
*******************************************************************************
* @file
* isvce_cabac_structs.h
*
* @brief
* This file contains cabac related structure definitions.
*
* @author
* Doney Alex
*
* @remarks
* none
*
*******************************************************************************
*/
#ifndef _ISVCE_CABAC_STRUCTS_H_
#define _ISVCE_CABAC_STRUCTS_H_
#include "ih264_typedefs.h"
#include "isvc_cabac_tables.h"
#include "ih264e_bitstream.h"
#include "ih264e_cabac_structs.h"
/**
******************************************************************************
* @brief MB info for cabac
******************************************************************************
*/
typedef struct isvce_mb_info_ctxt_t
{
/* Neighbour availability Variables needed to get CtxtInc, for CABAC */
UWORD8 u1_mb_type; /* !< macroblock type: I/P/B/SI/SP */
UWORD8 u1_cbp; /* !< Coded Block Pattern */
UWORD8 u1_intrapred_chroma_mode;
/*************************************************************************/
/* Arrangnment of AC CSBP */
/* bits: b7 b6 b5 b4 b3 b2 b1 b0 */
/* CSBP: V1 V0 U1 U0 Y3 Y2 Y1 Y0 */
/*************************************************************************/
UWORD8 u1_yuv_ac_csbp;
/*************************************************************************/
/* Arrangnment of DC CSBP */
/* bits: b7 b6 b5 b4 b3 b2 b1 b0 */
/* CSBP: x x x x x Vdc Udc Ydc */
/*************************************************************************/
UWORD8 u1_yuv_dc_csbp;
WORD8 i1_ref_idx[4];
UWORD8 u1_mv[4][4];
UWORD8 u1_base_mode_flag;
} isvce_mb_info_ctxt_t;
/**
******************************************************************************
* @brief CABAC Context structure : Variables to handle Cabac
******************************************************************************
*/
typedef struct isvce_cabac_ctxt_t
{
/* Base pointer to all the cabac contexts */
bin_ctxt_model au1_cabac_ctxt_table[NUM_SVC_CABAC_CTXTS];
cab_csbp_t s_lft_csbp;
/**
* pointer to Bitstream structure
*/
bitstrm_t *ps_bitstrm;
/* Pointer to mb_info_ctxt_t map_base */
isvce_mb_info_ctxt_t *ps_mb_map_ctxt_inc_base;
/* Pointer to encoding_envirnoment_t */
encoding_envirnoment_t s_cab_enc_env;
/* These things need to be updated at each MbLevel */
/* Prev ps_mb_qp_delta_ctxt */
WORD8 i1_prevps_mb_qp_delta_ctxt;
/* Pointer to mb_info_ctxt_t map */
isvce_mb_info_ctxt_t *ps_mb_map_ctxt_inc;
/* Pointer to default mb_info_ctxt_t */
isvce_mb_info_ctxt_t *ps_def_ctxt_mb_info;
/* Pointer to current mb_info_ctxt_t */
isvce_mb_info_ctxt_t *ps_curr_ctxt_mb_info;
/* Pointer to left mb_info_ctxt_t */
isvce_mb_info_ctxt_t *ps_left_ctxt_mb_info;
/* Pointer to top mb_info_ctxt_t */
isvce_mb_info_ctxt_t *ps_top_ctxt_mb_info;
/* Poniter to left csbp structure */
cab_csbp_t *ps_lft_csbp;
UWORD8 *pu1_left_y_ac_csbp;
UWORD8 *pu1_left_uv_ac_csbp;
UWORD8 *pu1_left_yuv_dc_csbp;
/***************************************************************************/
/* Ref_idx contexts are stored in the following way */
/* Array Idx 0,1 for reference indices in Forward direction */
/* Array Idx 2,3 for reference indices in backward direction */
/***************************************************************************/
/* Dimensions for u1_left_ref_ctxt_inc_arr is [2][4] for Mbaff:Top and Bot */
WORD8 i1_left_ref_idx_ctx_inc_arr[2][4];
WORD8 *pi1_left_ref_idx_ctxt_inc;
/* Dimensions for u1_left_mv_ctxt_inc_arr is [2][4][4] for Mbaff case */
UWORD8 u1_left_mv_ctxt_inc_arr[2][4][4];
UWORD8 (*pu1_left_mv_ctxt_inc)[4];
} isvce_cabac_ctxt_t;
#endif

View file

@ -0,0 +1,88 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
*******************************************************************************
* @file
* isvce_cabac_utils.h
*
* @brief
* Contains function declarations for function declared in
* isvce_svc_cabac_utils.c
*
* @author
* ittiam
*
* @remarks
* None
*
*******************************************************************************
*/
#ifndef _ISVCE_CABAC_UTILS_H_
#define _ISVCE_CABAC_UTILS_H_
#include "ih264_typedefs.h"
#include "isvc_macros.h"
#include "isvc_defs.h"
#include "isvc_cabac_tables.h"
#include "isvce_cabac_structs.h"
#include "isvce_cabac.h"
static FORCEINLINE void isvce_cabac_enc_base_mode_flag(isvce_cabac_ctxt_t *ps_cabac_ctxt,
UWORD8 u1_base_mode_flag)
{
UWORD8 u1_ctx_inc;
UWORD8 u1_a, u1_b;
const UWORD32 u4_ctxidx_offset = BASE_MODE_FLAG;
u1_a = !ps_cabac_ctxt->ps_left_ctxt_mb_info->u1_base_mode_flag;
u1_b = !ps_cabac_ctxt->ps_top_ctxt_mb_info->u1_base_mode_flag;
u1_ctx_inc = u1_a + u1_b;
isvce_cabac_encode_bin(ps_cabac_ctxt, u1_base_mode_flag,
ps_cabac_ctxt->au1_cabac_ctxt_table + u4_ctxidx_offset + u1_ctx_inc);
}
static FORCEINLINE void isvce_cabac_enc_residual_prediction_flag(isvce_cabac_ctxt_t *ps_cabac_ctxt,
UWORD8 u1_base_mode_flag,
UWORD8 u1_residual_prediction_flag)
{
const UWORD32 u4_ctxidx_offset = RESIDUAL_PREDICTION_FLAG;
UWORD8 u1_ctx_inc = !u1_base_mode_flag;
isvce_cabac_encode_bin(ps_cabac_ctxt, u1_residual_prediction_flag,
ps_cabac_ctxt->au1_cabac_ctxt_table + u4_ctxidx_offset + u1_ctx_inc);
}
static FORCEINLINE void isvce_cabac_enc_motion_prediction_flag(isvce_cabac_ctxt_t *ps_cabac_ctxt,
UWORD8 u1_motion_prediction_flag,
UWORD8 u1_is_l0_mvp)
{
const UWORD32 u4_ctxidx_offset =
u1_is_l0_mvp ? MOTION_PREDICTION_FLAG_L0 : MOTION_PREDICTION_FLAG_L1;
isvce_cabac_encode_bin(ps_cabac_ctxt, u1_motion_prediction_flag,
ps_cabac_ctxt->au1_cabac_ctxt_table + u4_ctxidx_offset);
}
#endif

2021
encoder/svc/isvce_cavlc.c Normal file

File diff suppressed because it is too large Load diff

126
encoder/svc/isvce_cavlc.h Normal file
View file

@ -0,0 +1,126 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
******************************************************************************
* @file
* isvce_cavlc.h
*
* @brief
* This file contains enumerations, macros and extern declarations of H264
* cavlc tables
*
* @author
* ittiam
*
* @remarks
* none
******************************************************************************
*/
#ifndef _ISVCE_CAVLC_H_
#define _ISVCE_CAVLC_H_
#include "ih264_typedefs.h"
#include "isvce_defs.h"
#include "isvce_structs.h"
/*****************************************************************************/
/* Function macro definitions */
/*****************************************************************************/
/*****************************************************************************/
/* Extern Function Declarations */
/*****************************************************************************/
/**
*******************************************************************************
*
* @brief
* This function generates CAVLC coded bit stream for an Intra Slice.
*
* @description
* The mb syntax layer for intra slices constitutes luma mb mode, luma sub modes
* (if present), mb qp delta, coded block pattern, chroma mb mode and
* luma/chroma residue. These syntax elements are written as directed by table
* 7.3.5 of h264 specification.
*
* @param[in] ps_ent_ctxt
* pointer to entropy context
*
* @returns error code
*
* @remarks none
*
*******************************************************************************
*/
IH264E_ERROR_T isvce_write_islice_mb_cavlc(isvce_entropy_ctxt_t *ps_ent_ctxt);
/**
*******************************************************************************
*
* @brief
* This function generates CAVLC coded bit stream for Inter slices
*
* @description
* The mb syntax layer for inter slices constitutes luma mb mode, luma sub modes
* (if present), mb qp delta, coded block pattern, chroma mb mode and
* luma/chroma residue. These syntax elements are written as directed by table
* 7.3.5 of h264 specification
*
* @param[in] ps_ent_ctxt
* pointer to entropy context
*
* @returns error code
*
* @remarks none
*
*******************************************************************************
*/
IH264E_ERROR_T isvce_write_pslice_mb_cavlc(isvce_entropy_ctxt_t *ps_ent_ctxt);
/**
*******************************************************************************
*
* @brief
* This function generates CAVLC coded bit stream for Inter(B) slices
*
* @description
* The mb syntax layer for inter slices constitutes luma mb mode, luma sub modes
* (if present), mb qp delta, coded block pattern, chroma mb mode and
* luma/chroma residue. These syntax elements are written as directed by table
* 7.3.5 of h264 specification
*
* @param[in] ps_ent_ctxt
* pointer to entropy context
*
* @returns error code
*
* @remarks none
*
*******************************************************************************
*/
IH264E_ERROR_T isvce_write_bslice_mb_cavlc(isvce_entropy_ctxt_t *ps_ent_ctxt);
#if ENABLE_RE_ENC_AS_SKIP
IH264E_ERROR_T isvce_reencode_as_skip_frame_cavlc(isvce_entropy_ctxt_t *ps_entropy);
#endif
#endif

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,125 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
******************************************************************************
* @file
* isvce_core_coding.h
*
* @brief
* This file contains extern declarations of core coding routines
*
* @author
* ittiam
*
* @remarks
* none
******************************************************************************
*/
#ifndef _ISVCE_CORE_CODING_H_
#define _ISVCE_CORE_CODING_H_
#include "isvce_structs.h"
/*****************************************************************************/
/* Constant Macros */
/*****************************************************************************/
/**
******************************************************************************
* @brief Enable/Disable Hadamard transform of DC Coeff's
******************************************************************************
*/
#define DISABLE_DC_TRANSFORM 0
#define ENABLE_DC_TRANSFORM 1
/**
*******************************************************************************
* @brief bit masks for DC and AC control flags
*******************************************************************************
*/
#define DC_COEFF_CNT_LUMA_MB 16
#define NUM_4X4_BLKS_LUMA_MB_ROW 4
#define NUM_LUMA4x4_BLOCKS_IN_MB 16
#define NUM_CHROMA4x4_BLOCKS_IN_MB 8
#define SIZE_4X4_BLK_HRZ TRANS_SIZE_4
#define SIZE_4X4_BLK_VERT TRANS_SIZE_4
#define CNTRL_FLAG_DC_MASK_LUMA 0x0000FFFF
#define CNTRL_FLAG_AC_MASK_LUMA 0xFFFF0000
#define CNTRL_FLAG_AC_MASK_CHROMA_U 0xF0000000
#define CNTRL_FLAG_DC_MASK_CHROMA_U 0x0000F000
#define CNTRL_FLAG_AC_MASK_CHROMA_V 0x0F000000
#define CNTRL_FLAG_DC_MASK_CHROMA_V 0x00000F00
#define CNTRL_FLAG_AC_MASK_CHROMA (CNTRL_FLAG_AC_MASK_CHROMA_U | CNTRL_FLAG_AC_MASK_CHROMA_V)
#define CNTRL_FLAG_DC_MASK_CHROMA (CNTRL_FLAG_DC_MASK_CHROMA_U | CNTRL_FLAG_DC_MASK_CHROMA_V)
#define CNTRL_FLAG_DCBLK_MASK_CHROMA 0x0000C000
/**
*******************************************************************************
* @brief macros for transforms
*******************************************************************************
*/
#define DEQUEUE_BLKID_FROM_CONTROL(u4_cntrl, blk_lin_id) \
{ \
blk_lin_id = CLZ(u4_cntrl); \
u4_cntrl &= (0x7FFFFFFF >> blk_lin_id); \
};
#define IND2SUB_LUMA_MB(u4_blk_id, i4_offset_x, i4_offset_y) \
{ \
i4_offset_x = (u4_blk_id % 4) << 2; \
i4_offset_y = (u4_blk_id / 4) << 2; \
}
#define IS_V_BLK(u4_blk_id) ((u4_blk_id) > 3)
#define IND2SUB_CHROMA_MB(u4_blk_id, i4_offset_x, i4_offset_y) \
{ \
i4_offset_x = ((u4_blk_id & 0x1) << 3) + IS_V_BLK(u4_blk_id); \
i4_offset_y = (u4_blk_id & 0x2) << 1; \
}
/* Typedefs */
/*****************************************************************************/
/* Function Declarations */
/*****************************************************************************/
extern FT_CORE_CODING isvce_code_luma_intra_macroblock_16x16;
extern FT_CORE_CODING isvce_code_luma_intra_macroblock_4x4;
extern FT_CORE_CODING isvce_code_luma_intra_macroblock_4x4_rdopt_on;
extern FT_CORE_CODING isvce_code_chroma_intra_macroblock_8x8;
extern FT_CORE_CODING isvce_code_luma_inter_macroblock_16x16;
extern FT_CORE_CODING isvce_code_chroma_inter_macroblock_8x8;
#endif

1267
encoder/svc/isvce_deblk.c Normal file

File diff suppressed because it is too large Load diff

53
encoder/svc/isvce_deblk.h Normal file
View file

@ -0,0 +1,53 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
******************************************************************************
* @file
* isvce_deblk.h
*
* @brief
* This file contains extern declarations of deblocking routines
*
* @author
* ittiam
*
* @remarks
* none
******************************************************************************
*/
#ifndef _ISVCE_DEBLK_H_
#define _ISVCE_DEBLK_H_
#include "ih264_typedefs.h"
#include "isvce_structs.h"
#define CSBP_LEFT_BLOCK_MASK 0x1111
#define CSBP_RIGHT_BLOCK_MASK 0x8888
#define NUM_EDGES_IN_MB 4
extern void isvce_compute_bs(isvce_process_ctxt_t *ps_proc, UWORD8 u1_inter_layer_deblk_flag);
extern void isvce_deblock_mb(isvce_process_ctxt_t *ps_proc, isvce_deblk_ctxt_t *ps_deblk,
UWORD8 u1_inter_layer_deblk_flag);
#endif

345
encoder/svc/isvce_defs.h Normal file
View file

@ -0,0 +1,345 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
*******************************************************************************
* @file
* isvce_defs.h
*
* @brief
* Definitions used in the encoder
*
* @author
* ittiam
*
* @remarks
* None
*
*******************************************************************************
*/
#ifndef _ISVCE_DEFS_H_
#define _ISVCE_DEFS_H_
#include "ih264e_defs.h"
#define SVC_MAX_NUM_BFRAMES 0
#define DEFAULT_INIT_QP 1
#define SVC_MAX_NUM_INP_FRAMES ((SVC_MAX_NUM_BFRAMES) + 2)
#define LOG2_MAX_FRAME_NUM_MINUS4 12
#define ENC_MAX_PU_IN_MB ((MB_SIZE / ENC_MIN_PU_SIZE) * (MB_SIZE / ENC_MIN_PU_SIZE))
#define MAX_REF_FRAMES_PER_PRED_DIR 1
#define SVC_MAX_SLICE_HDR_CNT 1
#define MAX_LAYER_REFERENCE_PICS 1
#define ENABLE_RESIDUAL_PREDICTION 1
#define ENABLE_ILP_MV 1
#define USE_ILP_MV_IN_ME (1 && (ENABLE_ILP_MV))
#define USE_ILP_MV_AS_MVP (1 && (ENABLE_ILP_MV))
#define MAX_MVP_IDX (USE_ILP_MV_AS_MVP ? 1 : 0)
#define ENABLE_IBL_MODE 1
#define ENABLE_INTRA_BASE_DEBLOCK (0 && (ENABLE_IBL_MODE))
#define ENABLE_MODE_STAT_VISUALISER 0
#define FORCE_FAST_INTRA4X4 0
#define FORCE_DISTORTION_BASED_INTRA_4X4_GATING 1
#define ENABLE_INTRA16X16_BASED_INTRA4X4_GATING 0
#define ENABLE_ILP_BASED_INTRA4X4_GATING 0
#define DISABLE_POST_ENC_SKIP 1
#define ENABLE_RE_ENC_AS_SKIP 1
#define MAX_ILP_MV_IN_NBR_RGN 4
/* L, T, TL, TR, Zero, Skip, 'Temporal Skip', ILP */
#define MAX_FPEL_SEARCH_CANDIDATES (7 + MAX_PU_IN_MB + MAX_ILP_MV_IN_NBR_RGN)
#define NUM_SVCE_RC_MEMTABS 45
#define SVCE_MAX_INP_DIM 1920
#define SVCE_MAX_INP_FRAME_SIZE (1920 * 1088)
/**
***************************************************************************
* Enum to hold various mem records being request
****************************************************************************
*/
typedef enum ISVCE_MEMREC_TYPES_T
{
/**
* Codec Object at API level
*/
ISVCE_MEM_REC_IV_OBJ,
/**
* Codec context
*/
ISVCE_MEM_REC_CODEC,
/**
* Cabac context
*/
ISVCE_MEM_REC_CABAC,
/**
* Cabac context_mb_info
*/
ISVCE_MEM_REC_CABAC_MB_INFO,
/**
* entropy context
*/
ISVCE_MEM_REC_ENTROPY,
/**
* Buffer to hold coeff data
*/
ISVCE_MEM_REC_MB_COEFF_DATA,
/**
* Buffer to hold coeff data
*/
ISVCE_MEM_REC_MB_HEADER_DATA,
/**
* Motion vector bank
*/
ISVCE_MEM_REC_MVBANK,
/**
* Motion vector bits
*/
ISVCE_MEM_REC_MVBITS,
/**
* Holds mem records passed to the codec.
*/
ISVCE_MEM_REC_BACKUP,
/**
* Holds SPS
*/
ISVCE_MEM_REC_SPS,
/**
* Holds PPS
*/
ISVCE_MEM_REC_PPS,
/**
* Holds SVC NALU Extension data
*/
ISVCE_MEM_REC_SVC_NALU_EXT,
/**
* Holds subset SPS data
*/
ISVCE_MEM_REC_SUBSET_SPS,
/**
* Holds Slice Headers
*/
ISVCE_MEM_REC_SLICE_HDR,
/**
* Holds SVC Slice Headers
*/
ISVCE_MEM_REC_SVC_SLICE_HDR,
/**
* Contains map indicating slice index per MB basis
*/
ISVCE_MEM_REC_SLICE_MAP,
/**
* Holds thread handles
*/
ISVCE_MEM_REC_THREAD_HANDLE,
/**
* Holds control call mutex
*/
ISVCE_MEM_REC_CTL_MUTEX,
/**
* Holds entropy call mutex
*/
ISVCE_MEM_REC_ENTROPY_MUTEX,
/**
* Holds memory for Process JOB Queue
*/
ISVCE_MEM_REC_PROC_JOBQ,
/**
* Holds memory for Entropy JOB Queue
*/
ISVCE_MEM_REC_ENTROPY_JOBQ,
/**
* Contains status map indicating processing status per MB basis
*/
ISVCE_MEM_REC_PROC_MAP,
/**
* Contains status map indicating deblocking status per MB basis
*/
ISVCE_MEM_REC_DBLK_MAP,
/*
* Contains AIR map and mask
*/
ISVCE_MEM_REC_AIR_MAP,
/**
* Contains status map indicating ME status per MB basis
*/
ISVCE_MEM_REC_ME_MAP,
/**
* Holds dpb manager context
*/
ISVCE_MEM_REC_DPB_MGR,
/**
* Holds intermediate buffers needed during processing stage
* Memory for process contexts is allocated in this memtab
*/
ISVCE_MEM_REC_PROC_SCRATCH,
/**
* Holds buffers for vert_bs, horz_bs and QP (all frame level)
*/
ISVCE_MEM_REC_QUANT_PARAM,
/**
* Holds top row syntax information
*/
ISVCE_MEM_REC_TOP_ROW_SYN_INFO,
/**
* Holds buffers for vert_bs, horz_bs and QP (all frame level)
*/
ISVCE_MEM_REC_BS_QP,
/**
* Holds input buffer manager context
*/
ISVCE_MEM_REC_INP_PIC,
/**
* Holds output buffer manager context
*/
ISVCE_MEM_REC_OUT,
/**
* Holds picture buffer manager context and array of pic_buf_ts
* Also holds reference picture buffers in non-shared mode
*/
ISVCE_MEM_REC_REF_PIC,
/*
* Mem record for color space conversion
*/
ISVCE_MEM_REC_CSC,
/**
* NMB info struct
*/
ISVCE_MEM_REC_MB_INFO_NMB,
/**
* SVC Spatial layer Inputs
*/
ISVCE_MEM_SVC_SPAT_INP,
/**
* Downscaler memory records
*/
ISVCE_MEM_DOWN_SCALER,
/**
* SVC ILP data
*/
ISVCE_MEM_SVC_ILP_DATA,
/**
* SVC ILP MV Context
*/
ISVCE_MEM_SVC_ILP_MV_CTXT,
/**
* SVC ResPred Context
*/
ISVCE_MEM_SVC_RES_PRED_CTXT,
/**
* SVC inter-layer intra pred context
*/
ISVCE_MEM_SVC_INTRA_PRED_CTXT,
/**
* RC Utils Context
*/
ISVCE_MEM_SVC_RC_UTILS_CTXT,
/**
* SubPic RC Context
*/
ISVCE_MEM_SVC_SUB_PIC_RC_CTXT,
#if ENABLE_MODE_STAT_VISUALISER
ISVCE_MEM_MODE_STAT_VISUALISER_BUF,
#endif
/**
* Rate control of memory records.
*/
ISVCE_MEM_REC_RC,
/**
* Place holder to compute number of memory records.
*/
ISVCE_MEM_REC_CNT = ISVCE_MEM_REC_RC + NUM_SVCE_RC_MEMTABS,
/*
* Do not add anything below
*/
} ISVCE_MEMREC_TYPES_T;
#endif

View file

@ -0,0 +1,537 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
*******************************************************************************
* @file
* isvce_downscaler.c
*
* @brief
* Contains downscaler functions required by the SVC encoder
*
* @author
* ittiam
*
* @par List of Functions:
* - isvce_get_downscaler_data_size()
* - isvce_get_downscaler_padding_dims()
* - isvce_get_downscaler_normalized_filtered_pixel()
* - isvce_horizontal_downscale_and_transpose()
* - isvce_process_downscaler()
* - isvce_initialize_downscaler()
*
* @remarks
* None
*
*******************************************************************************
*/
/*****************************************************************************/
/* File Includes */
/*****************************************************************************/
/* system include files */
#include <stdio.h>
#include <stdlib.h>
#include "ih264_typedefs.h"
#include "ih264_macros.h"
#include "isvc_macros.h"
#include "ih264_platform_macros.h"
#include "iv2.h"
#include "isvc_defs.h"
#include "isvce_defs.h"
#include "isvc_structs.h"
#include "isvc_structs.h"
#include "isvce_downscaler.h"
#include "isvce_downscaler_private_defs.h"
/**
******************************************************************************
* @brief lanczos filter coefficients for 2x downscaling
* @remarks Though the length of the filter is 8, the
* same coefficients
* are replicated so that 2 rows can be processed at one
* go in SIMD
******************************************************************************
*/
static WORD8 gai1_lanczos_coefficients_2x[NUM_SCALER_FILTER_PHASES][NUM_SCALER_FILTER_TAPS * 2] = {
{-7, 0, 39, 64, 39, 0, -7, 0, -7, 0, 39, 64, 39, 0, -7, 0},
{-6, 0, 33, 62, 41, 4, -6, 0, -6, 0, 33, 62, 41, 4, -6, 0},
{-5, -1, 29, 57, 45, 9, -5, -1, -5, -1, 29, 57, 45, 9, -5, -1},
{-4, -2, 23, 55, 48, 14, -4, -2, -4, -2, 23, 55, 48, 14, -4, -2},
{-3, -3, 18, 52, 52, 18, -3, -3, -3, -3, 18, 52, 52, 18, -3, -3},
{-2, -4, 13, 49, 54, 24, -2, -4, -2, -4, 13, 49, 54, 24, -2, -4},
{-1, -5, 9, 44, 58, 29, -1, -5, -1, -5, 9, 44, 58, 29, -1, -5},
{0, -6, 3, 42, 61, 34, 0, -6, 0, -6, 3, 42, 61, 34, 0, -6}};
/**
******************************************************************************
* @brief lanczos filter coefficients for 1.5x downscaling
* @remarks Though the length of the filter is 8, the same coefficients
* are replicated so that 2 rows can be processed at one go in SIMD.
******************************************************************************
*/
static WORD8 gai1_lanczos_coefficients_3by2x[NUM_SCALER_FILTER_PHASES][NUM_SCALER_FILTER_TAPS * 2] =
{{0, -11, 32, 86, 32, -11, 0, 0, 0, -11, 32, 86, 32, -11, 0, 0},
{0, -10, 26, 79, 39, -5, 0, 0, 0, -10, 26, 79, 39, -5, 0, 0},
{0, -8, 21, 72, 46, 0, -2, 0, 0, -8, 21, 72, 46, 0, -2, 0},
{0, -6, 15, 66, 52, 3, -3, 0, 0, -6, 15, 66, 52, 3, -3, 0},
{0, -6, 10, 60, 60, 10, -6, 0, 0, -6, 10, 60, 60, 10, -6, 0},
{0, -3, 3, 52, 66, 15, -6, 0, 0, -3, 3, 52, 66, 15, -6, 0},
{0, -2, 0, 46, 72, 21, -8, 0, 0, -2, 0, 46, 72, 21, -8, 0},
{0, 0, -5, 39, 79, 26, -10, 0, 0, 0, -5, 39, 79, 26, -10, 0}};
/**
*******************************************************************************
*
* @brief
* gets the memory size required for downscaler
*
* @par Description:
* returns the memory required by the downscaler context and state structs
* for allocation.
*
* @returns
*
* @remarks
*
*
*******************************************************************************
*/
UWORD32 isvce_get_downscaler_data_size(UWORD8 u1_num_spatial_layers, DOUBLE d_scaling_factor,
UWORD32 u4_width, UWORD32 u4_height)
{
UWORD32 u4_size = 0;
if(u1_num_spatial_layers > 1)
{
u4_size += sizeof(downscaler_state_t);
u4_size +=
(u4_height + NUM_SCALER_FILTER_TAPS * 2) * ((UWORD32) (u4_width / d_scaling_factor));
}
return u4_size;
}
/**
*******************************************************************************
*
* @brief
* gets the padding size required for filtering
*
* @par Description:
* gets the padding size required for filtering
*
* @returns
*
* @remarks
*
*
*******************************************************************************
*/
void isvce_get_downscaler_padding_dims(padding_dims_t *ps_pad_dims)
{
ps_pad_dims->u1_left_pad_size = ALIGN8(NUM_SCALER_FILTER_TAPS / 2);
ps_pad_dims->u1_right_pad_size = ALIGN8(NUM_SCALER_FILTER_TAPS / 2);
ps_pad_dims->u1_top_pad_size = NUM_SCALER_FILTER_TAPS / 2;
ps_pad_dims->u1_bottom_pad_size = NUM_SCALER_FILTER_TAPS / 2;
}
/**
*******************************************************************************
*
* @brief
* processes downscaler
*
* @par Description:
* calls the function for padding and scaling
*
* @param[in] ps_scaler
* pointer to downdownscaler context
*
* @param[in] ps_src_buf_props
* pointer to source buffer props struct
*
* @param[in] u4_blk_wd
* width of the block to be processed
*
* @param[in] u4_blk_ht
* height of the block to be processed
*
* @returns
*
* @remarks
*
*
*******************************************************************************
*/
void isvce_process_downscaler(downscaler_ctxt_t *ps_scaler, yuv_buf_props_t *ps_src_buf_props,
yuv_buf_props_t *ps_dst_buf_props, UWORD32 u4_blk_wd,
UWORD32 u4_blk_ht)
{
buffer_container_t s_src_buf;
buffer_container_t s_dst_buf;
UWORD32 u4_scaled_block_size_x, u4_scaled_block_size_y;
downscaler_state_t *ps_scaler_state = (downscaler_state_t *) ps_scaler->pv_scaler_state;
ASSERT(ps_src_buf_props->e_color_format == IV_YUV_420SP_UV);
u4_scaled_block_size_x = (UWORD32) (u4_blk_wd / ps_scaler->d_scaling_factor);
u4_scaled_block_size_y = (UWORD32) (u4_blk_ht / ps_scaler->d_scaling_factor);
/* luma */
s_src_buf = ps_src_buf_props->as_component_bufs[Y];
s_src_buf.pv_data = ((UWORD8 *) s_src_buf.pv_data) - (NUM_SCALER_FILTER_TAPS / 2) -
(NUM_SCALER_FILTER_TAPS / 2) * s_src_buf.i4_data_stride;
s_dst_buf.pv_data = ps_scaler_state->pv_scratch_buf;
s_dst_buf.i4_data_stride = u4_blk_ht + NUM_SCALER_FILTER_TAPS;
ps_scaler_state->pf_downscaler(ps_scaler, &s_src_buf, &s_dst_buf, ps_scaler_state->pai1_filters,
u4_scaled_block_size_x, u4_blk_ht + NUM_SCALER_FILTER_TAPS, 0);
s_src_buf = s_dst_buf;
s_dst_buf = ps_dst_buf_props->as_component_bufs[Y];
ps_scaler_state->pf_downscaler(ps_scaler, &s_src_buf, &s_dst_buf, ps_scaler_state->pai1_filters,
u4_scaled_block_size_y, u4_scaled_block_size_x, 0);
/* chroma */
u4_blk_ht /= 2;
u4_scaled_block_size_y /= 2;
s_src_buf = ps_src_buf_props->as_component_bufs[U];
s_src_buf.pv_data = ((UWORD8 *) s_src_buf.pv_data) - NUM_SCALER_FILTER_TAPS -
(NUM_SCALER_FILTER_TAPS / 2) * s_src_buf.i4_data_stride;
s_dst_buf.pv_data = ps_scaler_state->pv_scratch_buf;
s_dst_buf.i4_data_stride = u4_blk_ht + NUM_SCALER_FILTER_TAPS;
ps_scaler_state->pf_downscaler(ps_scaler, &s_src_buf, &s_dst_buf, ps_scaler_state->pai1_filters,
u4_scaled_block_size_x, u4_blk_ht + NUM_SCALER_FILTER_TAPS, 1);
s_src_buf = s_dst_buf;
s_dst_buf = ps_dst_buf_props->as_component_bufs[U];
ps_scaler_state->pf_downscaler(ps_scaler, &s_src_buf, &s_dst_buf, ps_scaler_state->pai1_filters,
u4_scaled_block_size_y, u4_scaled_block_size_x, 0);
}
/**
*******************************************************************************
*
* @brief
* normalized dot product computer for downscaler
*
* @par Description:
* Given the downscaler filter coefficients, source buffer, the function
* calculates the dot product between them, adds an offset and normalizes it
*
* @param[in] ps_scaler
* pointer to src buf
*
* @param[in] pi1_filter
* pointer to filter coefficients
*
* @returns
*
* @remarks
*
*******************************************************************************
*/
static UWORD8 isvce_get_downscaler_normalized_filtered_pixel(UWORD8 *pu1_src, WORD8 *pi1_filter)
{
WORD32 i;
WORD32 i4_norm_dot_product;
UWORD8 u1_out_pixel;
WORD32 i4_dot_product_sum = 0;
WORD32 i4_rounding_offset = 1 << (FILTER_COEFF_Q - 1);
WORD32 i4_normalizing_factor = 1 << FILTER_COEFF_Q;
for(i = 0; i < NUM_SCALER_FILTER_TAPS; i++)
{
i4_dot_product_sum += (pu1_src[i] * pi1_filter[i]);
}
i4_norm_dot_product = ((i4_dot_product_sum + i4_rounding_offset) / i4_normalizing_factor);
u1_out_pixel = (UWORD8) CLIP_U8(i4_norm_dot_product);
return u1_out_pixel;
}
/**
*******************************************************************************
*
* @brief
* horizontal scaler function
*
* @par Description:
* Does horizontal scaling for the given block
*
* @param[in] ps_scaler
* pointer to downscaler context
*
* @param[in] ps_src
* pointer to source buffer container
*
* @param[in] ps_dst
* pointer to destination buffer container
*
* @param[in] pai1_filters
* pointer to array of downscaler filters
*
* @param[in] u4_blk_wd
* width of the block after horizontal scaling (output block width)
*
* @param[in] u4_blk_ht
* height of the current block (input block height)
*
* @param[in] u1_is_chroma
* flag suggesting whether the buffer is luma or chroma
*
*
* @returns
*
* @remarks
* The same function is used for vertical scaling too as
* the horizontally scaled input in stored in transpose fashion.
*
*******************************************************************************
*/
static void isvce_horizontal_downscale_and_transpose(
downscaler_ctxt_t *ps_scaler, buffer_container_t *ps_src, buffer_container_t *ps_dst,
FILTER_COEFF_ARRAY pai1_filters, UWORD32 u4_blk_wd, UWORD32 u4_blk_ht, UWORD8 u1_is_chroma)
{
WORD32 i, j, k;
UWORD8 u1_phase;
UWORD8 u1_filtered_out_pixel;
UWORD8 *pu1_src_j, *pu1_dst_j;
UWORD8 u1_filtered_out_u_pixel, u1_filtered_out_v_pixel;
UWORD8 *pu1_in_pixel;
UWORD8 *pu1_out_pixel;
WORD8 *pi1_filter_grid;
UWORD16 u2_full_pixel_inc;
UWORD8 au1_temp_u_buff[NUM_SCALER_FILTER_TAPS];
UWORD8 au1_temp_v_buff[NUM_SCALER_FILTER_TAPS];
downscaler_state_t *ps_scaler_state = (downscaler_state_t *) ps_scaler->pv_scaler_state;
UWORD32 u4_center_pixel_pos = ps_scaler_state->i4_init_offset;
UWORD32 u4_src_horz_increments = ps_scaler_state->u4_horz_increment;
UWORD8 *pu1_src = ps_src->pv_data;
UWORD32 u4_in_stride = ps_src->i4_data_stride;
UWORD8 *pu1_dst = ps_dst->pv_data;
UWORD32 u4_out_stride = ps_dst->i4_data_stride;
UWORD32 u4_center_pixel_pos_src = u4_center_pixel_pos;
/* Offset the input so that the input pixel to be processed
co-incides with the centre of filter (4th coefficient)*/
pu1_src += (1 + u1_is_chroma);
ASSERT((1 << DOWNSCALER_Q) == ps_scaler_state->u4_vert_increment);
if(!u1_is_chroma)
{
for(j = 0; j < (WORD32) u4_blk_ht; j++)
{
pu1_src_j = pu1_src + (j * u4_in_stride);
pu1_dst_j = pu1_dst + j;
u4_center_pixel_pos = u4_center_pixel_pos_src;
for(i = 0; i < (WORD32) u4_blk_wd; i++)
{
u1_phase = get_filter_phase(u4_center_pixel_pos);
pi1_filter_grid = pai1_filters[u1_phase];
/* Doing the Calculation for current Loop Count */
u2_full_pixel_inc = u4_center_pixel_pos >> DOWNSCALER_Q;
pu1_in_pixel = pu1_src_j + (u2_full_pixel_inc << u1_is_chroma);
pu1_out_pixel = pu1_dst_j + ((i << u1_is_chroma) * u4_out_stride);
u1_filtered_out_pixel =
isvce_get_downscaler_normalized_filtered_pixel(pu1_in_pixel, pi1_filter_grid);
*pu1_out_pixel = u1_filtered_out_pixel;
/* Update the context for next Loop Count */
u4_center_pixel_pos += u4_src_horz_increments;
}
}
}
else
{
for(j = 0; j < (WORD32) u4_blk_ht; j++)
{
pu1_src_j = pu1_src + (j * u4_in_stride);
pu1_dst_j = pu1_dst + j;
u4_center_pixel_pos = u4_center_pixel_pos_src;
for(i = 0; i < (WORD32) u4_blk_wd; i++)
{
u1_phase = get_filter_phase(u4_center_pixel_pos);
pi1_filter_grid = pai1_filters[u1_phase];
/*Doing the Calculation for current Loop Count */
u2_full_pixel_inc = u4_center_pixel_pos >> DOWNSCALER_Q;
pu1_in_pixel = pu1_src_j + (u2_full_pixel_inc << u1_is_chroma);
pu1_out_pixel = pu1_dst_j + ((i << u1_is_chroma) * u4_out_stride);
for(k = 0; k < NUM_SCALER_FILTER_TAPS; k++)
{
au1_temp_u_buff[k] = *(pu1_in_pixel + (2 * k));
au1_temp_v_buff[k] = *(pu1_in_pixel + ((2 * k) + 1));
}
u1_filtered_out_u_pixel = isvce_get_downscaler_normalized_filtered_pixel(
au1_temp_u_buff, pi1_filter_grid);
u1_filtered_out_v_pixel = isvce_get_downscaler_normalized_filtered_pixel(
au1_temp_v_buff, pi1_filter_grid);
*pu1_out_pixel = u1_filtered_out_u_pixel;
*(pu1_out_pixel + u4_out_stride) = u1_filtered_out_v_pixel;
/* Update the context for next Loop Count */
u4_center_pixel_pos += u4_src_horz_increments;
}
}
}
}
void isvce_downscaler_function_selector(downscaler_state_t *ps_scaler_state, IV_ARCH_T e_arch)
{
switch(e_arch)
{
#if defined(X86)
case ARCH_X86_SSE42:
{
ps_scaler_state->pf_downscaler = isvce_horizontal_downscale_and_transpose_sse42;
break;
}
#elif defined(ARMV8)
case ARCH_ARM_A53:
case ARCH_ARM_A57:
case ARCH_ARM_V8_NEON:
{
ps_scaler_state->pf_downscaler = isvce_horizontal_downscale_and_transpose_neon;
break;
}
#elif !defined(DISABLE_NEON)
case ARCH_ARM_A9Q:
case ARCH_ARM_A9A:
case ARCH_ARM_A9:
case ARCH_ARM_A7:
case ARCH_ARM_A5:
case ARCH_ARM_A15:
{
ps_scaler_state->pf_downscaler = isvce_horizontal_downscale_and_transpose_neon;
break;
}
#endif
default:
{
ps_scaler_state->pf_downscaler = isvce_horizontal_downscale_and_transpose;
break;
}
}
}
/**
*******************************************************************************
*
* @brief
* initializes the downscaler context
*
* @par Description:
* initializes the downscaler context for the given scaling factor
* with padding size, filter size, etc.
*
* @param[in] ps_scaler
* pointer downscaler context
*
* @param[in] ps_mem_rec
* pointer to memory allocated to downscaler process
*
* @param[in] d_scaling_factor
* scaling reatio of width/ height between two consecutive SVC layers
*
* @param[in] u1_num_spatial_layers
* scaling reatio of width/ height between two consecutive SVC layers
*
* @param[in] u4_wd
* width of the input
*
* @param[in] u4_ht
* height of the input
*
* @param[in] e_arch
* architecure type
*
* @returns
*
* @remarks
* when ARM intrinsics are added, update should be done here
*
*******************************************************************************
*/
void isvce_initialize_downscaler(downscaler_ctxt_t *ps_scaler, iv_mem_rec_t *ps_mem_rec,
DOUBLE d_scaling_factor, UWORD8 u1_num_spatial_layers,
UWORD32 u4_in_width, UWORD32 u4_in_height, IV_ARCH_T e_arch)
{
if(u1_num_spatial_layers > 1)
{
downscaler_state_t *ps_scaler_state;
UWORD8 *pu1_buf = (UWORD8 *) ps_mem_rec->pv_base;
ps_scaler_state = (downscaler_state_t *) pu1_buf;
pu1_buf += sizeof(ps_scaler_state[0]);
ps_scaler_state->pv_scratch_buf = pu1_buf;
ps_scaler_state->u4_in_wd = u4_in_width;
ps_scaler_state->u4_in_ht = u4_in_height;
ps_scaler->pv_scaler_state = ps_scaler_state;
ps_scaler->d_scaling_factor = d_scaling_factor;
ps_scaler->u1_num_spatial_layers = u1_num_spatial_layers;
isvce_downscaler_function_selector(ps_scaler_state, e_arch);
ps_scaler_state->u4_horz_increment = (UWORD32) (d_scaling_factor * (1 << DOWNSCALER_Q));
ps_scaler_state->u4_vert_increment = (1 << DOWNSCALER_Q);
ps_scaler_state->i4_init_offset = 0;
ps_scaler_state->pai1_filters = (d_scaling_factor == 2.0) ? gai1_lanczos_coefficients_2x
: gai1_lanczos_coefficients_3by2x;
}
}

View file

@ -0,0 +1,205 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
*******************************************************************************
* @file
* isvce_downscaler.h
*
* @brief
* Contains downscaler functions required by the SVC encoder
*
* @author
* ittiam
*
* @par List of Functions:
* - isvce_get_downscaler_data_size()
* - isvce_get_downscaler_padding_dims()
* - isvce_isvce_process_ctxt_t_downscaler()
* - isvce_get_downscaler_normalized_filtered_pixel()
* - isvce_horizontal_downscale_and_transpose()
* - isvce_process_downscaler()
* - isvce_initialize_downscaler()
*
* @remarks
* None
*
*******************************************************************************
*/
#ifndef _ISVCE_DOWNSCALER_H_
#define _ISVCE_DOWNSCALER_H_
#include "ih264_typedefs.h"
#include "iv2.h"
#include "isvc_defs.h"
#include "isvc_structs.h"
#include "isvce_defs.h"
typedef struct
{
/**
* pointer to the state of downscaler
*/
void *pv_scaler_state;
/**
* scaling factor between the dimensions of two consecutive SVC layers
*/
DOUBLE d_scaling_factor;
/**
* Num spatial layers
*/
UWORD8 u1_num_spatial_layers;
} downscaler_ctxt_t;
typedef struct
{
UWORD8 u1_left_pad_size;
UWORD8 u1_right_pad_size;
UWORD8 u1_top_pad_size;
UWORD8 u1_bottom_pad_size;
} padding_dims_t;
/**
*******************************************************************************
*
* @brief
* initializes the downscaler context
*
* @par Description:
* initializes the downscaler context for the given scaling factor
* with padding size, filter size, etc.
*
* @param[in] ps_scaler
* pointer downscaler context
*
* @param[in] ps_mem_rec
* pointer to memory allocated to downscaler process
*
* @param[in] d_scaling_factor
* scaling reatio of width/ height between two consecutive SVC layers
*
* @param[in] u1_num_spatial_layers
* scaling reatio of width/ height between two consecutive SVC layers
*
* @param[in] u4_wd
* width of the input
*
* @param[in] u4_ht
* height of the input
*
* @param[in] e_arch
* architecure type
*
* @returns
*
* @remarks
* when ARM intrinsics are added, update should be done here
*
*******************************************************************************
*/
extern void isvce_initialize_downscaler(downscaler_ctxt_t *ps_scaler, iv_mem_rec_t *ps_mem_rec,
DOUBLE d_scaling_factor, UWORD8 u1_num_spatial_layers,
UWORD32 u4_in_width, UWORD32 u4_in_height,
IV_ARCH_T e_arch);
/**
*******************************************************************************
*
* @brief
* gets the memory size required for downscaler
*
* @par Description:
* returns the memory required by the downscaler context and state structs
* for allocation.
*
* @returns
*
* @remarks
*
*
*******************************************************************************
*/
extern UWORD32 isvce_get_downscaler_data_size(UWORD8 u1_num_spatial_layers, DOUBLE d_scaling_factor,
UWORD32 u4_width, UWORD32 u4_height);
/**
*******************************************************************************
*
* @brief
* processes downscaler
*
* @par Description:
* calls the function for padding and scaling
*
* @param[in] ps_scaler
* pointer to downdownscaler context
*
* @param[in] ps_src_buf_props
* pointer to source buffer props struct
*
* @param[in] u4_blk_wd
* width of the block to be processed
*
* @param[in] u4_blk_ht
* height of the block to be processed
*
* @returns
*
* @remarks
*
*
*******************************************************************************
*/
extern void isvce_process_downscaler(downscaler_ctxt_t *ps_scaler,
yuv_buf_props_t *ps_src_buf_props,
yuv_buf_props_t *ps_dst_buf_props, UWORD32 u4_blk_wd,
UWORD32 u4_blk_ht);
/**
*******************************************************************************
*
* @brief
* gets the padding size required for filtering
*
* @par Description:
* gets the padding size required for filtering
*
* @returns
*
* @remarks
*
*
*******************************************************************************
*/
extern void isvce_get_downscaler_padding_dims(padding_dims_t *ps_pad_dims);
#endif

View file

@ -0,0 +1,124 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
#ifndef _ISVCE_DOWNSCALER_PRIVATE_DEFS_H_
#define _ISVCE_DOWNSCALER_PRIVATE_DEFS_H_
#include "ih264_typedefs.h"
#include "isvc_macros.h"
#include "ih264_debug.h"
#include "isvc_structs.h"
#include "isvce_downscaler.h"
/* Macros */
#define DOWNSCALER_Q 16
#define FILTER_COEFF_Q 7
#define NUM_SCALER_FILTER_TAPS 8
#define NUM_SCALER_FILTER_PHASES 8
/* Typedefs */
typedef WORD8 (*FILTER_COEFF_ARRAY)[NUM_SCALER_FILTER_TAPS * 2];
typedef void FT_DOWNSCALER(downscaler_ctxt_t *ps_scaler_state, buffer_container_t *ps_src,
buffer_container_t *ps_dst, FILTER_COEFF_ARRAY pai1_filters,
UWORD32 u4_blk_wd, UWORD32 u4_blk_ht, UWORD8 u1_is_chroma);
/* Structs */
typedef struct
{
/**
* pointer to scratch buf
*/
void *pv_scratch_buf;
/**
* initial offset while calculating input pixel location
*/
WORD32 i4_init_offset;
/**
* increment to the centre pixel in horizontal direction
*/
UWORD32 u4_horz_increment;
/**
* increment to the centre pixel in vertical direction
*/
UWORD32 u4_vert_increment;
/**
* pointer to the filter coefficients
*/
FILTER_COEFF_ARRAY pai1_filters;
/**
* function pointer to the leaf level function for horizontal scaling
*/
FT_DOWNSCALER *pf_downscaler;
/**
* width of the input (highest SVC layer)
*/
UWORD32 u4_in_wd;
/**
* height of the input (highest SVC layer)
*/
UWORD32 u4_in_ht;
} downscaler_state_t;
static FORCEINLINE UWORD32 get_filter_phase(UWORD32 u4_center_pixel_pos)
{
UWORD32 au4_phase_binning_pos[NUM_SCALER_FILTER_PHASES + 1];
UWORD32 i;
ASSERT(NUM_SCALER_FILTER_PHASES == 8);
for(i = 0; i < NUM_SCALER_FILTER_PHASES + 1; i++)
{
au4_phase_binning_pos[i] = (i << DOWNSCALER_Q) / NUM_SCALER_FILTER_PHASES;
}
u4_center_pixel_pos = u4_center_pixel_pos % (1 << DOWNSCALER_Q);
for(i = 0; i < NUM_SCALER_FILTER_PHASES; i++)
{
if((u4_center_pixel_pos < au4_phase_binning_pos[i + 1]) &&
(u4_center_pixel_pos >= au4_phase_binning_pos[i]))
{
return i;
}
}
ASSERT(0);
return 0;
}
/* SSE42 Declarations */
extern FT_DOWNSCALER isvce_horizontal_downscale_and_transpose_sse42;
/* NEON Declarations */
extern FT_DOWNSCALER isvce_horizontal_downscale_and_transpose_neon;
#endif

790
encoder/svc/isvce_encode.c Normal file
View file

@ -0,0 +1,790 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
******************************************************************************
* @file
* isvce_encode.c
*
* @brief
* This file contains functions for encoding the input yuv frame in synchronous
* api mode
*
* @author
* ittiam
*
* List of Functions
* - isvce_join_threads()
* - isvce_wait_for_thread()
* - isvce_encode()
*
******************************************************************************
*/
#include <stdio.h>
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
#include <limits.h>
#include <assert.h>
#include <math.h>
#include <stdbool.h>
#include "ih264_typedefs.h"
/* Dependencies of ih264_buf_mgr.h */
/* Dependencies of ih264_list.h */
#include "ih264_error.h"
/* Dependencies of ih264_common_tables.h */
#include "ih264_defs.h"
#include "ih264_structs.h"
#include "ih264_buf_mgr.h"
#include "ih264_common_tables.h"
#include "ih264_list.h"
#include "ih264_platform_macros.h"
#include "ih264_trans_data.h"
#include "ih264_size_defs.h"
/* Dependencies of ih264e_cabac_structs.h */
#include "ih264_cabac_tables.h"
/* Dependencies of ime_structs.h */
#include "ime_defs.h"
#include "ime_distortion_metrics.h"
/* Dependencies of ih264e_structs.h */
#include "iv2.h"
#include "ive2.h"
#include "ih264_defs.h"
#include "ih264_deblk_edge_filters.h"
#include "ih264_inter_pred_filters.h"
#include "ih264_structs.h"
#include "ih264_trans_quant_itrans_iquant.h"
/* Dependencies of ih264e_bitstream.h */
#include "ih264e_error.h"
#include "ih264e_bitstream.h"
#include "ih264e_cabac_structs.h"
#include "irc_cntrl_param.h"
#include "irc_frame_info_collector.h"
#include "ime_statistics.h"
#include "ime_structs.h"
/* Dependencies of 'ih264e_utils.h' */
#include "ih264e_defs.h"
#include "ih264e_structs.h"
#include "ih264e_utils.h"
#include "ime.h"
#include "isvce.h"
#include "isvce_cabac.h"
#include "isvce_deblk.h"
#include "isvce_defs.h"
#include "isvce_downscaler.h"
#include "isvce_encode_header.h"
#include "isvce_fmt_conv.h"
#include "isvce_ibl_eval.h"
#include "isvce_ilp_mv.h"
#include "isvce_intra_modes_eval.h"
#include "isvce_me.h"
#include "isvce_process.h"
#include "isvce_rate_control.h"
#include "isvce_residual_pred.h"
#include "isvce_sub_pic_rc.h"
#include "isvce_utils.h"
#define SEI_BASED_FORCE_IDR 1
/*****************************************************************************/
/* Function Definitions */
/*****************************************************************************/
/**
******************************************************************************
*
* @brief This function puts the current thread to sleep for a duration
* of sleep_us
*
* @par Description
* ithread_yield() method causes the calling thread to yield execution to
*another thread that is ready to run on the current processor. The operating
*system selects the thread to yield to. ithread_usleep blocks the current thread
*for the specified number of milliseconds. In other words, yield just says, end
*my timeslice prematurely, look around for other threads to run. If there is
*nothing better than me, continue. Sleep says I don't want to run for x
* milliseconds. Even if no other thread wants to run, don't make me run.
*
* @param[in] sleep_us
* thread sleep duration
*
* @returns error_status
*
******************************************************************************
*/
IH264E_ERROR_T isvce_wait_for_thread(UWORD32 sleep_us)
{
/* yield thread */
ithread_yield();
/* put thread to sleep */
ithread_sleep(sleep_us);
return IH264E_SUCCESS;
}
/**
******************************************************************************
*
* @brief
* Encodes in synchronous api mode
*
* @par Description
* This routine processes input yuv, encodes it and outputs bitstream and recon
*
* @param[in] ps_codec_obj
* Pointer to codec object at API level
*
* @param[in] pv_api_ip
* Pointer to input argument structure
*
* @param[out] pv_api_op
* Pointer to output argument structure
*
* @returns Status
*
******************************************************************************
*/
WORD32 isvce_encode(iv_obj_t *ps_codec_obj, void *pv_api_ip, void *pv_api_op)
{
/* error status */
IH264E_ERROR_T error_status = IH264E_SUCCESS;
/* codec ctxt */
isvce_codec_t *ps_codec = (isvce_codec_t *) ps_codec_obj->pv_codec_handle;
/* input frame to encode */
isvce_video_encode_ip_t *ps_video_encode_ip = pv_api_ip;
/* output buffer to write stream */
isvce_video_encode_op_t *ps_video_encode_op = pv_api_op;
/* i/o structures */
isvce_inp_buf_t s_inp_buf;
isvce_out_buf_t s_out_buf;
WORD32 ctxt_sel = 0, i4_rc_pre_enc_skip;
WORD32 i, j;
ASSERT(MAX_CTXT_SETS == 1);
/********************************************************************/
/* BEGIN INIT */
/********************************************************************/
/* reset output structure */
ps_video_encode_op->s_ive_op.u4_error_code = IV_SUCCESS;
ps_video_encode_op->s_ive_op.output_present = 0;
ps_video_encode_op->s_ive_op.dump_recon = 0;
ps_video_encode_op->s_ive_op.u4_encoded_frame_type = IV_NA_FRAME;
/* Check for output memory allocation size */
{
UWORD32 u4_min_bufsize =
MIN_STREAM_SIZE * ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers;
UWORD32 u4_bufsize_per_layer = ps_video_encode_ip->s_ive_ip.s_out_buf.u4_bufsize /
ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers;
if(ps_video_encode_ip->s_ive_ip.s_out_buf.u4_bufsize < u4_min_bufsize)
{
error_status = IH264E_INSUFFICIENT_OUTPUT_BUFFER;
SET_ERROR_ON_RETURN(error_status, IVE_UNSUPPORTEDPARAM,
ps_video_encode_op->s_ive_op.u4_error_code, IV_FAIL);
}
for(i = 0; i < ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers; i++)
{
s_out_buf.as_bits_buf[i] = ps_video_encode_ip->s_ive_ip.s_out_buf;
s_out_buf.as_bits_buf[i].u4_bufsize = u4_bufsize_per_layer;
s_out_buf.as_bits_buf[i].pv_buf =
((UWORD8 *) ps_video_encode_ip->s_ive_ip.s_out_buf.pv_buf) +
u4_bufsize_per_layer * i;
}
}
s_out_buf.u4_is_last = 0;
s_out_buf.u4_timestamp_low = ps_video_encode_ip->s_ive_ip.u4_timestamp_low;
s_out_buf.u4_timestamp_high = ps_video_encode_ip->s_ive_ip.u4_timestamp_high;
/* api call cnt */
ps_codec->i4_encode_api_call_cnt += 1;
/* codec context selector */
ctxt_sel = ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS;
/* reset status flags */
ps_codec->ai4_pic_cnt[ctxt_sel] = -1;
ps_codec->s_rate_control.post_encode_skip[ctxt_sel] = 0;
ps_codec->s_rate_control.pre_encode_skip[ctxt_sel] = 0;
/* pass output buffer to codec */
ps_codec->as_out_buf[ctxt_sel] = s_out_buf;
/* initialize codec ctxt with default params for the first encode api call */
if(ps_codec->i4_encode_api_call_cnt == 0)
{
isvce_codec_init(ps_codec);
}
/* parse configuration params */
for(i = 0; i < MAX_ACTIVE_CONFIG_PARAMS; i++)
{
isvce_cfg_params_t *ps_cfg = &ps_codec->as_cfg[i];
if(1 == ps_cfg->u4_is_valid)
{
if(((ps_cfg->u4_timestamp_high == ps_video_encode_ip->s_ive_ip.u4_timestamp_high) &&
(ps_cfg->u4_timestamp_low == ps_video_encode_ip->s_ive_ip.u4_timestamp_low)) ||
((WORD32) ps_cfg->u4_timestamp_high == -1) ||
((WORD32) ps_cfg->u4_timestamp_low == -1))
{
error_status = isvce_codec_update_config(ps_codec, ps_cfg);
SET_ERROR_ON_RETURN(error_status, IVE_UNSUPPORTEDPARAM,
ps_video_encode_op->s_ive_op.u4_error_code, IV_FAIL);
ps_cfg->u4_is_valid = 0;
}
}
}
/* Force IDR based on SEI params */
#if SEI_BASED_FORCE_IDR
{
sei_mdcv_params_t *ps_sei_mdcv_params = &ps_codec->s_sei.s_sei_mdcv_params;
sei_mdcv_params_t *ps_cfg_sei_mdcv_params = &ps_codec->s_cfg.s_sei.s_sei_mdcv_params;
sei_cll_params_t *ps_sei_cll_params = &ps_codec->s_sei.s_sei_cll_params;
sei_cll_params_t *ps_cfg_sei_cll_params = &ps_codec->s_cfg.s_sei.s_sei_cll_params;
sei_ave_params_t *ps_sei_ave_params = &ps_codec->s_sei.s_sei_ave_params;
sei_ave_params_t *ps_cfg_sei_ave_params = &ps_codec->s_cfg.s_sei.s_sei_ave_params;
if((ps_sei_mdcv_params->au2_display_primaries_x[0] !=
ps_cfg_sei_mdcv_params->au2_display_primaries_x[0]) ||
(ps_sei_mdcv_params->au2_display_primaries_x[1] !=
ps_cfg_sei_mdcv_params->au2_display_primaries_x[1]) ||
(ps_sei_mdcv_params->au2_display_primaries_x[2] !=
ps_cfg_sei_mdcv_params->au2_display_primaries_x[2]) ||
(ps_sei_mdcv_params->au2_display_primaries_y[0] !=
ps_cfg_sei_mdcv_params->au2_display_primaries_y[0]) ||
(ps_sei_mdcv_params->au2_display_primaries_y[1] !=
ps_cfg_sei_mdcv_params->au2_display_primaries_y[1]) ||
(ps_sei_mdcv_params->au2_display_primaries_y[2] !=
ps_cfg_sei_mdcv_params->au2_display_primaries_y[2]) ||
(ps_sei_mdcv_params->u2_white_point_x != ps_cfg_sei_mdcv_params->u2_white_point_x) ||
(ps_sei_mdcv_params->u2_white_point_y != ps_cfg_sei_mdcv_params->u2_white_point_y) ||
(ps_sei_mdcv_params->u4_max_display_mastering_luminance !=
ps_cfg_sei_mdcv_params->u4_max_display_mastering_luminance) ||
(ps_sei_mdcv_params->u4_min_display_mastering_luminance !=
ps_cfg_sei_mdcv_params->u4_min_display_mastering_luminance))
{
ps_codec->s_sei.s_sei_mdcv_params = ps_codec->s_cfg.s_sei.s_sei_mdcv_params;
ps_codec->s_sei.u1_sei_mdcv_params_present_flag = 1;
}
else
{
ps_codec->s_sei.u1_sei_mdcv_params_present_flag = 0;
}
if((ps_sei_cll_params->u2_max_content_light_level !=
ps_cfg_sei_cll_params->u2_max_content_light_level) ||
(ps_sei_cll_params->u2_max_pic_average_light_level !=
ps_cfg_sei_cll_params->u2_max_pic_average_light_level))
{
ps_codec->s_sei.s_sei_cll_params = ps_codec->s_cfg.s_sei.s_sei_cll_params;
ps_codec->s_sei.u1_sei_cll_params_present_flag = 1;
}
else
{
ps_codec->s_sei.u1_sei_cll_params_present_flag = 0;
}
if((ps_sei_ave_params->u4_ambient_illuminance !=
ps_cfg_sei_ave_params->u4_ambient_illuminance) ||
(ps_sei_ave_params->u2_ambient_light_x != ps_cfg_sei_ave_params->u2_ambient_light_x) ||
(ps_sei_ave_params->u2_ambient_light_y != ps_cfg_sei_ave_params->u2_ambient_light_y))
{
ps_codec->s_sei.s_sei_ave_params = ps_codec->s_cfg.s_sei.s_sei_ave_params;
ps_codec->s_sei.u1_sei_ave_params_present_flag = 1;
}
else
{
ps_codec->s_sei.u1_sei_ave_params_present_flag = 0;
}
if((1 == ps_codec->s_sei.u1_sei_mdcv_params_present_flag) ||
(1 == ps_codec->s_sei.u1_sei_cll_params_present_flag) ||
(1 == ps_codec->s_sei.u1_sei_ave_params_present_flag))
{
ps_codec->force_curr_frame_type = IV_IDR_FRAME;
}
}
#endif
/* In case of alt ref and B pics we will have non reference frame in stream */
if(ps_codec->s_cfg.u4_enable_alt_ref || ps_codec->s_cfg.u4_num_bframes)
{
ps_codec->i4_non_ref_frames_in_stream = 1;
}
if(ps_codec->i4_encode_api_call_cnt == 0)
{
/********************************************************************/
/* number of mv/ref bank buffers used by the codec, */
/* 1 to handle curr frame */
/* 1 to store information of ref frame */
/* 1 more additional because of the codec employs 2 ctxt sets */
/* to assist asynchronous API */
/********************************************************************/
/* initialize mv bank buffer manager */
error_status = isvce_svc_au_data_mgr_add_bufs(ps_codec);
SET_ERROR_ON_RETURN(error_status, IVE_FATALERROR,
ps_video_encode_op->s_ive_op.u4_error_code, IV_FAIL);
/* initialize ref bank buffer manager */
error_status = isvce_svc_au_buf_mgr_add_bufs(ps_codec);
SET_ERROR_ON_RETURN(error_status, IVE_FATALERROR,
ps_video_encode_op->s_ive_op.u4_error_code, IV_FAIL);
/* for the first frame, generate header when not requested explicitly */
if(ps_codec->i4_header_mode == 0 && ps_codec->u4_header_generated == 0)
{
ps_codec->i4_gen_header = 1;
}
}
/* generate header and return when encoder is operated in header mode */
if(ps_codec->i4_header_mode == 1)
{
/* whenever the header is generated, this implies a start of sequence
* and a sequence needs to be started with IDR
*/
ps_codec->force_curr_frame_type = IV_IDR_FRAME;
s_inp_buf.s_svc_params = ps_codec->s_cfg.s_svc_params;
s_inp_buf.s_inp_props.s_raw_buf = ps_video_encode_ip->s_ive_ip.s_inp_buf;
s_inp_buf.s_inp_props.s_raw_buf.au4_wd[Y] = ps_codec->s_cfg.u4_wd;
s_inp_buf.s_inp_props.s_raw_buf.au4_ht[Y] = ps_codec->s_cfg.u4_ht;
isvce_init_svc_dimension(&s_inp_buf);
/* generate header */
error_status = isvce_generate_sps_pps(ps_codec, &s_inp_buf);
/* send the input to app */
ps_video_encode_op->s_ive_op.s_inp_buf = ps_video_encode_ip->s_ive_ip.s_inp_buf;
ps_video_encode_op->s_ive_op.u4_timestamp_low =
ps_video_encode_ip->s_ive_ip.u4_timestamp_low;
ps_video_encode_op->s_ive_op.u4_timestamp_high =
ps_video_encode_ip->s_ive_ip.u4_timestamp_high;
ps_video_encode_op->s_ive_op.u4_is_last = ps_video_encode_ip->s_ive_ip.u4_is_last;
/* send the output to app */
ps_video_encode_op->s_ive_op.output_present = 1;
ps_video_encode_op->s_ive_op.dump_recon = 0;
ps_video_encode_op->s_ive_op.s_out_buf = ps_codec->as_out_buf[ctxt_sel].as_bits_buf[0];
for(i = 1; i < ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers; i++)
{
memmove(((UWORD8 *) ps_video_encode_op->s_ive_op.s_out_buf.pv_buf +
ps_video_encode_op->s_ive_op.s_out_buf.u4_bytes),
ps_codec->as_out_buf[ctxt_sel].as_bits_buf[i].pv_buf,
ps_codec->as_out_buf[ctxt_sel].as_bits_buf[i].u4_bytes);
ps_video_encode_op->s_ive_op.s_out_buf.u4_bytes +=
ps_codec->as_out_buf[ctxt_sel].as_bits_buf[i].u4_bytes;
}
/* error status */
SET_ERROR_ON_RETURN(error_status, IVE_FATALERROR,
ps_video_encode_op->s_ive_op.u4_error_code, IV_FAIL);
/* indicates that header has been generated previously */
ps_codec->u4_header_generated = 1;
/* api call cnt */
ps_codec->i4_encode_api_call_cnt--;
/* header mode tag is not sticky */
ps_codec->i4_header_mode = 0;
ps_codec->i4_gen_header = 0;
return IV_SUCCESS;
}
/* curr pic cnt */
ps_codec->i4_pic_cnt += 1;
i4_rc_pre_enc_skip = 0;
for(i = 0; i < ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers; i++)
{
i4_rc_pre_enc_skip =
isvce_input_queue_update(ps_codec, &ps_video_encode_ip->s_ive_ip, &s_inp_buf, i);
}
s_out_buf.u4_is_last = s_inp_buf.s_inp_props.u4_is_last;
ps_video_encode_op->s_ive_op.u4_is_last = s_inp_buf.s_inp_props.u4_is_last;
/* Only encode if the current frame is not pre-encode skip */
if(!i4_rc_pre_enc_skip && s_inp_buf.s_inp_props.s_raw_buf.apv_bufs[0])
{
isvce_process_ctxt_t *ps_proc = &ps_codec->as_process[ctxt_sel * MAX_PROCESS_THREADS];
WORD32 num_thread_cnt = ps_codec->s_cfg.u4_num_cores - 1;
ps_codec->ai4_pic_cnt[ctxt_sel] = ps_codec->i4_pic_cnt;
error_status = isvce_svc_au_init(ps_codec, &s_inp_buf);
SET_ERROR_ON_RETURN(error_status, IVE_FATALERROR,
ps_video_encode_op->s_ive_op.u4_error_code, IV_FAIL);
isvce_nalu_info_au_init(ps_codec->as_nalu_descriptors,
ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers);
#if ENABLE_MODE_STAT_VISUALISER
isvce_msv_get_input_frame(ps_codec->ps_mode_stat_visualiser, &s_inp_buf);
#endif
for(i = 0; i < ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers; i++)
{
isvce_svc_layer_pic_init(ps_codec, &s_inp_buf, i);
for(j = 0; j < num_thread_cnt; j++)
{
ithread_create(ps_codec->apv_proc_thread_handle[j], NULL, isvce_process_thread,
&ps_codec->as_process[j + 1]);
ps_codec->ai4_process_thread_created[j] = 1;
ps_codec->i4_proc_thread_cnt++;
}
/* launch job */
isvce_process_thread(ps_proc);
/* Join threads at the end of encoding a frame */
isvce_join_threads(ps_codec);
ih264_list_reset(ps_codec->pv_proc_jobq);
ih264_list_reset(ps_codec->pv_entropy_jobq);
}
#if ENABLE_MODE_STAT_VISUALISER
isvce_msv_dump_visualisation(ps_codec->ps_mode_stat_visualiser);
#endif
isvce_sub_pic_rc_dump_data(ps_codec->as_process->ps_sub_pic_rc_ctxt);
}
/****************************************************************************
* RECON
* Since we have forward dependent frames, we cannot return recon in
*encoding order. It must be in poc order, or input pic order. To achieve this
*we introduce a delay of 1 to the recon wrt encode. Now since we have that
* delay, at any point minimum of pic_cnt in our ref buffer will be the
* correct frame. For ex let our GOP be IBBP [1 2 3 4] . The encode order
* will be [1 4 2 3] .Now since we have a delay of 1, when we are done with
* encoding 4, the min in the list will be 1. After encoding 2, it will be
* 2, 3 after 3 and 4 after 4. Hence we can return in sequence. Note
* that the 1 delay is critical. Hence if we have post enc skip, we must
* skip here too. Note that since post enc skip already frees the recon
* buffer we need not do any thing here
*
* We need to return a recon when ever we consume an input buffer. This
* comsumption include a pre or post enc skip. Thus dump recon is set for
* all cases except when
* 1) We are waiting -> ps_codec->i4_pic_cnt >
*ps_codec->s_cfg.u4_num_bframe An exception need to be made for the case when
*we have the last buffer since we need to flush out the on remainig recon.
****************************************************************************/
ps_video_encode_op->s_ive_op.dump_recon = 0;
if(ps_codec->s_cfg.u4_enable_recon &&
((ps_codec->i4_pic_cnt > (WORD32) ps_codec->s_cfg.u4_num_bframes) ||
s_inp_buf.s_inp_props.u4_is_last))
{
/* error status */
IH264_ERROR_T ret = IH264_SUCCESS;
svc_au_buf_t *ps_pic_buf = NULL;
WORD32 i4_buf_status, i4_curr_poc = 32768;
/* In case of skips we return recon, but indicate that buffer is zero size
*/
if(ps_codec->s_rate_control.post_encode_skip[ctxt_sel] || i4_rc_pre_enc_skip)
{
ps_video_encode_op->s_ive_op.dump_recon = 1;
ps_video_encode_op->s_ive_op.s_recon_buf.au4_wd[0] = 0;
ps_video_encode_op->s_ive_op.s_recon_buf.au4_wd[1] = 0;
}
else
{
for(i = 0; i < ps_codec->i4_ref_buf_cnt; i++)
{
if(ps_codec->as_ref_set[i].i4_pic_cnt == -1) continue;
i4_buf_status = ih264_buf_mgr_get_status(
ps_codec->pv_ref_buf_mgr, ps_codec->as_ref_set[i].ps_pic_buf->i4_buf_id);
if((i4_buf_status & BUF_MGR_IO) && (ps_codec->as_ref_set[i].i4_poc < i4_curr_poc))
{
ps_pic_buf = ps_codec->as_ref_set[i].ps_pic_buf;
i4_curr_poc = ps_codec->as_ref_set[i].i4_poc;
}
}
ps_video_encode_op->s_ive_op.s_recon_buf = ps_video_encode_ip->s_ive_ip.s_recon_buf;
/*
* If we get a valid buffer. output and free recon.
*
* we may get an invalid buffer if num_b_frames is 0. This is because
* We assume that there will be a ref frame in ref list after encoding
* the last frame. With B frames this is correct since its forward ref
* pic will be in the ref list. But if num_b_frames is 0, we will not
* have a forward ref pic
*/
if(ps_pic_buf)
{
if((ps_video_encode_ip->s_ive_ip.s_recon_buf.au4_wd[Y] !=
ps_codec->s_cfg.u4_disp_wd) ||
(ps_video_encode_ip->s_ive_ip.s_recon_buf.au4_ht[Y] !=
ps_codec->s_cfg.u4_disp_ht))
{
SET_ERROR_ON_RETURN(IH264E_NO_FREE_RECONBUF, IVE_FATALERROR,
ps_video_encode_op->s_ive_op.u4_error_code, IV_FAIL);
}
isvce_fmt_conv(ps_codec, ps_pic_buf,
ps_video_encode_ip->s_ive_ip.s_recon_buf.apv_bufs[0],
ps_video_encode_ip->s_ive_ip.s_recon_buf.apv_bufs[1],
ps_video_encode_ip->s_ive_ip.s_recon_buf.apv_bufs[2],
ps_video_encode_ip->s_ive_ip.s_recon_buf.au4_wd[0],
ps_video_encode_ip->s_ive_ip.s_recon_buf.au4_wd[1], 0,
ps_codec->s_cfg.u4_disp_ht);
ps_video_encode_op->s_ive_op.dump_recon = 1;
ret = ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr, ps_pic_buf->i4_buf_id,
BUF_MGR_IO);
if(IH264_SUCCESS != ret)
{
SET_ERROR_ON_RETURN((IH264E_ERROR_T) ret, IVE_FATALERROR,
ps_video_encode_op->s_ive_op.u4_error_code, IV_FAIL);
}
}
}
}
/***************************************************************************
* Free reference buffers:
* In case of a post enc skip, we have to ensure that those pics will not
* be used as reference anymore. In all other cases we will not even mark
* the ref buffers
***************************************************************************/
if(ps_codec->s_rate_control.post_encode_skip[ctxt_sel])
{
/* pic info */
svc_au_buf_t *ps_cur_pic;
/* mv info */
svc_au_data_t *ps_cur_mv_buf;
/* error status */
IH264_ERROR_T ret = IH264_SUCCESS;
/* Decrement coded pic count */
ps_codec->i4_poc--;
/* loop through to get the min pic cnt among the list of pics stored in ref
* list */
/* since the skipped frame may not be on reference list, we may not have an
* MV bank hence free only if we have allocated */
for(i = 0; i < ps_codec->i4_ref_buf_cnt; i++)
{
if(ps_codec->i4_pic_cnt == ps_codec->as_ref_set[i].i4_pic_cnt)
{
ps_cur_pic = ps_codec->as_ref_set[i].ps_pic_buf;
ps_cur_mv_buf = ps_codec->as_ref_set[i].ps_svc_au_data;
/* release this frame from reference list and recon list */
ret = ih264_buf_mgr_release(ps_codec->pv_svc_au_data_store_mgr,
ps_cur_mv_buf->i4_buf_id, BUF_MGR_REF);
ret |= ih264_buf_mgr_release(ps_codec->pv_svc_au_data_store_mgr,
ps_cur_mv_buf->i4_buf_id, BUF_MGR_IO);
SET_ERROR_ON_RETURN((IH264E_ERROR_T) ret, IVE_FATALERROR,
ps_video_encode_op->s_ive_op.u4_error_code, IV_FAIL);
ret = ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr, ps_cur_pic->i4_buf_id,
BUF_MGR_REF);
ret |= ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr, ps_cur_pic->i4_buf_id,
BUF_MGR_IO);
SET_ERROR_ON_RETURN((IH264E_ERROR_T) ret, IVE_FATALERROR,
ps_video_encode_op->s_ive_op.u4_error_code, IV_FAIL);
break;
}
}
}
/*
* Since recon is not in sync with output, ie there can be frame to be
* given back as recon even after last output. Hence we need to mark that
* the output is not the last.
* Hence search through reflist and mark appropriately
*/
if(ps_codec->s_cfg.u4_enable_recon)
{
WORD32 i4_buf_status = 0;
for(i = 0; i < ps_codec->i4_ref_buf_cnt; i++)
{
if(ps_codec->as_ref_set[i].i4_pic_cnt == -1) continue;
i4_buf_status |= ih264_buf_mgr_get_status(
ps_codec->pv_ref_buf_mgr, ps_codec->as_ref_set[i].ps_pic_buf->i4_buf_id);
}
if(i4_buf_status & BUF_MGR_IO)
{
s_out_buf.u4_is_last = 0;
ps_video_encode_op->s_ive_op.u4_is_last = 0;
}
}
/**************************************************************************
* Signaling to APP
* 1) If we valid a valid output mark it so
* 2) Set the codec output ps_video_encode_op
* 3) Set the error status
* 4) Set the return Pic type
* Note that we already has marked recon properly
* 5)Send the consumed input back to app so that it can free it if possible
*
* We will have to return the output and input buffers unconditionally
* so that app can release them
**************************************************************************/
if(!i4_rc_pre_enc_skip && !ps_codec->s_rate_control.post_encode_skip[ctxt_sel] &&
s_inp_buf.s_inp_props.s_raw_buf.apv_bufs[0])
{
/* receive output back from codec */
s_out_buf = ps_codec->as_out_buf[ctxt_sel];
/* send the output to app */
ps_video_encode_op->s_ive_op.output_present = 1;
ps_video_encode_op->s_ive_op.u4_error_code = IV_SUCCESS;
/* Set the time stamps of the encodec input */
ps_video_encode_op->s_ive_op.u4_timestamp_low = s_inp_buf.s_inp_props.u4_timestamp_low;
ps_video_encode_op->s_ive_op.u4_timestamp_high = s_inp_buf.s_inp_props.u4_timestamp_high;
switch(ps_codec->pic_type)
{
case PIC_IDR:
ps_video_encode_op->s_ive_op.u4_encoded_frame_type = IV_IDR_FRAME;
break;
case PIC_I:
ps_video_encode_op->s_ive_op.u4_encoded_frame_type = IV_I_FRAME;
break;
case PIC_P:
ps_video_encode_op->s_ive_op.u4_encoded_frame_type = IV_P_FRAME;
break;
case PIC_B:
ps_video_encode_op->s_ive_op.u4_encoded_frame_type = IV_B_FRAME;
break;
default:
ps_video_encode_op->s_ive_op.u4_encoded_frame_type = IV_NA_FRAME;
break;
}
for(i = 0; i < (WORD32) ps_codec->s_cfg.u4_num_cores; i++)
{
error_status = ps_codec->as_process[ctxt_sel + i].i4_error_code;
SET_ERROR_ON_RETURN(error_status, IVE_FATALERROR,
ps_video_encode_op->s_ive_op.u4_error_code, IV_FAIL);
}
}
else
{
/* receive output back from codec */
s_out_buf = ps_codec->as_out_buf[ctxt_sel];
ps_video_encode_op->s_ive_op.output_present = 0;
ps_video_encode_op->s_ive_op.u4_error_code = IV_SUCCESS;
/* Set the time stamps of the encodec input */
ps_video_encode_op->s_ive_op.u4_timestamp_low = 0;
ps_video_encode_op->s_ive_op.u4_timestamp_high = 0;
ps_video_encode_op->s_ive_op.s_inp_buf = s_inp_buf.s_inp_props.s_raw_buf;
ps_video_encode_op->s_ive_op.u4_encoded_frame_type = IV_NA_FRAME;
}
/* Send the input to encoder so that it can free it if possible */
ps_video_encode_op->s_ive_op.s_out_buf = ps_codec->as_out_buf[ctxt_sel].as_bits_buf[0];
for(i = 1; i < ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers; i++)
{
memmove(((UWORD8 *) ps_video_encode_op->s_ive_op.s_out_buf.pv_buf +
ps_video_encode_op->s_ive_op.s_out_buf.u4_bytes),
ps_codec->as_out_buf[ctxt_sel].as_bits_buf[i].pv_buf,
ps_codec->as_out_buf[ctxt_sel].as_bits_buf[i].u4_bytes);
ps_video_encode_op->s_ive_op.s_out_buf.u4_bytes +=
ps_codec->as_out_buf[ctxt_sel].as_bits_buf[i].u4_bytes;
}
if(ps_codec->s_cfg.b_nalu_info_export_enable && !i4_rc_pre_enc_skip &&
!ps_codec->s_rate_control.post_encode_skip[ctxt_sel] &&
s_inp_buf.s_inp_props.s_raw_buf.apv_bufs[0])
{
ps_video_encode_op->b_is_nalu_info_present = true;
for(i = 0; i < ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers; i++)
{
isvce_nalu_info_csv_translator(&ps_codec->as_nalu_descriptors[i],
&ps_video_encode_ip->ps_nalu_info_buf[i]);
ps_video_encode_op->ps_nalu_info_buf[i] = ps_video_encode_ip->ps_nalu_info_buf[i];
}
}
else
{
ps_video_encode_op->b_is_nalu_info_present = false;
}
ps_video_encode_op->s_ive_op.s_inp_buf = s_inp_buf.s_inp_props.s_raw_buf;
return IV_SUCCESS;
}

View file

@ -0,0 +1,41 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
*******************************************************************************
* @file
* isvce_encode.h
*
* @brief
* Contains functions for encode API
*
*******************************************************************************
*/
#ifndef _ISVCE_ENCODE_H_
#define _ISVCE_ENCODE_H_
#include "ih264_typedefs.h"
#include "iv2.h"
#include "ive2.h"
extern WORD32 isvce_encode(iv_obj_t *ps_codec_obj, void *pv_api_ip, void *pv_api_op);
#endif

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,296 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
******************************************************************************
* @file
* isvce_encode_header.h
*
* @brief
* This file contains structures and interface prototypes for h264 bitstream
* header encoding
*
* @author
* ittiam
*
* @remarks
* None
*
*******************************************************************************
*/
#ifndef _ISVCE_ENCODE_HEADER_H_
#define _ISVCE_ENCODE_HEADER_H_
#include "ih264_typedefs.h"
/* Dependencies of ih264e_bitstream.h */
#include "ih264e_error.h"
#include "ih264e_bitstream.h"
#include "ih264e_trace.h"
#include "isvce_structs.h"
/**
******************************************************************************
* @brief Macro to put a code with specified number of bits into the
* bitstream
******************************************************************************
*/
#define PUT_BITS(ps_bitstrm, code_val, code_len, ret_val, syntax_string) \
{ \
ENTROPY_TRACE(syntax_string, code_val); \
ret_val = ih264e_put_bits((ps_bitstrm), (code_val), (code_len)); \
if(ret_val != IH264E_SUCCESS) \
{ \
return ret_val; \
} \
}
/**
******************************************************************************
* @brief Macro to put a code with specified number of bits into the
* bitstream using 0th order exponential Golomb encoding for
* signed numbers
******************************************************************************
*/
#define PUT_BITS_UEV(ps_bitstrm, code_val, ret_val, syntax_string) \
{ \
ENTROPY_TRACE(syntax_string, code_val); \
ret_val = ih264e_put_uev((ps_bitstrm), (code_val)); \
if(ret_val != IH264E_SUCCESS) \
{ \
return ret_val; \
} \
}
/**
******************************************************************************
* @brief Macro to put a code with specified number of bits into the
* bitstream using 0th order exponential Golomb encoding for
* signed numbers
******************************************************************************
*/
#define PUT_BITS_SEV(ps_bitstrm, code_val, ret_val, syntax_string) \
{ \
ENTROPY_TRACE(syntax_string, code_val); \
ret_val = ih264e_put_sev((ps_bitstrm), (code_val)); \
if(ret_val != IH264E_SUCCESS) \
{ \
return ret_val; \
} \
}
/**
******************************************************************************
* @brief Macro to set active entropy threads to zero and return
* in case of errors
******************************************************************************
*/
#define RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel) \
if(ps_entropy->i4_error_code != IH264E_SUCCESS) \
{ \
DATA_SYNC(); \
ps_codec->au4_entropy_thread_active[ctxt_sel] = 0; \
return ps_entropy->i4_error_code; \
}
/*****************************************************************************/
/* Extern Function Declarations */
/*****************************************************************************/
extern WORD32 ih264e_generate_nal_unit_header(bitstrm_t *ps_bitstrm, WORD32 nal_unit_type,
WORD32 nal_ref_idc);
extern WORD32 ih264e_generate_vui(bitstrm_t *ps_bitstrm, vui_t *ps_vui);
extern IH264E_ERROR_T ih264e_generate_sei(bitstrm_t *ps_bitstrm, sei_params_t *ps_sei,
UWORD32 u4_insert_per_idr);
extern IH264E_ERROR_T ih264e_add_filler_nal_unit(bitstrm_t *ps_bitstrm, WORD32 insert_fill_bytes);
/**
******************************************************************************
*
* @brief Generates SPS (Sequence Parameter Set)
*
* @par Description
* This function generates Sequence Parameter Set header as per the spec
*
* @param[in] ps_bitstrm
* pointer to bitstream context (handle)
*
* @param[in] ps_sps
* pointer to structure containing SPS data
*
* @return success or failure error code
*
******************************************************************************
*/
WORD32 isvce_generate_sps(bitstrm_t *ps_bitstrm, sps_t *ps_sps, NAL_UNIT_TYPE_T nal_type);
/**
******************************************************************************
*
* @brief Generates PPS (Picture Parameter Set)
*
* @par Description
* Generate Picture Parameter Set as per Section 7.3.2.2
*
* @param[in] ps_bitstrm
* pointer to bitstream context (handle)
*
* @param[in] ps_pps
* pointer to structure containing PPS data
*
* @return success or failure error code
*
******************************************************************************
*/
WORD32 isvce_generate_pps(bitstrm_t *ps_bitstrm, pps_t *ps_pps, sps_t *ps_sps);
/**
******************************************************************************
*
* @brief Generates Slice Header
*
* @par Description
* Generate Slice Header as per Section 7.3.5.1
*
* @param[inout] ps_bitstrm
* pointer to bitstream context for generating slice header
*
* @param[in] ps_slice_hdr
* pointer to slice header params
*
* @param[in] ps_pps
* pointer to pps params referred by slice
*
* @param[in] ps_sps
* pointer to sps params referred by slice
*
* @param[out] ps_dup_bit_strm_ent_offset
* Bitstream struct to store bitstream state
*
* @param[out] pu4_first_slice_start_offset
* first slice offset is returned
*
* @return success or failure error code
*
******************************************************************************
*/
WORD32 isvce_generate_slice_header(bitstrm_t *ps_bitstrm, slice_header_t *ps_slice_hdr,
pps_t *ps_pps, sps_t *ps_sps, UWORD8 u1_idr_flag);
/**
******************************************************************************
*
* @brief Populates sps structure
*
* @par Description
* Populates sps structure for its use in header generation
*
* @param[in] ps_codec
* pointer to encoder context
*
* @param[out] ps_sps
* pointer to sps params that needs to be populated
*
* @return success or failure error code
*
******************************************************************************
*/
IH264E_ERROR_T isvce_populate_sps(isvce_codec_t *ps_codec, sps_t *ps_sps, UWORD8 u1_sps_id,
UWORD8 u1_profile_idc, isvce_inp_buf_t *ps_inp_buf,
UWORD8 u1_spatial_layer_id);
/**
******************************************************************************
*
* @brief Populates pps structure
*
* @par Description
* Populates pps structure for its use in header generation
*
* @param[in] ps_codec
* pointer to encoder context
*
* @param[out] ps_pps
* pointer to pps params that needs to be populated
*
* @return success or failure error code
*
******************************************************************************
*/
IH264E_ERROR_T isvce_populate_pps(isvce_codec_t *ps_codec, pps_t *ps_pps, UWORD8 u1_sps_id,
UWORD8 u1_pps_id, UWORD8 u1_spatial_layer_id);
/**
******************************************************************************
*
* @brief Populates slice header structure
*
* @par Description
* Populates slice header structure for its use in header generation
*
* @param[in] ps_proc
* pointer to proc context
*
* @param[out] ps_slice_hdr
* pointer to slice header structure that needs to be populated
*
* @param[in] ps_pps
* pointer to pps params structure referred by the slice
*
* @param[in] ps_sps
* pointer to sps params referred by the pps
*
* @return success or failure error code
*
******************************************************************************
*/
WORD32 isvce_populate_slice_header(isvce_process_ctxt_t *ps_proc, slice_header_t *ps_slice_hdr,
pps_t *ps_pps, sps_t *ps_sps, UWORD8 u1_is_idr);
extern WORD32 isvce_populate_svc_nalu_extension(isvce_process_ctxt_t *ps_proc,
svc_nalu_ext_t *ps_svc_nalu_ext,
NAL_UNIT_TYPE_T nalu_type, UWORD8 u1_idr_flag);
extern WORD32 isvce_generate_svc_nalu_extension(bitstrm_t *ps_bitstrm,
svc_nalu_ext_t *ps_svc_nalu_ext, UWORD8 u1_nalu_id);
extern WORD32 isvce_populate_svc_slice(isvce_process_ctxt_t *ps_proc,
svc_slice_header_t *ps_svc_slice_hdr, pps_t *ps_pps,
subset_sps_t *ps_subset_sps,
svc_nalu_ext_t *ps_svc_nalu_ext);
extern WORD32 isvce_populate_subset_sps(isvce_codec_t *ps_codec, subset_sps_t *ps_subset_sps,
UWORD8 u1_sps_id, isvce_inp_buf_t *ps_inp_buf,
UWORD8 u1_spatial_layer_id);
extern WORD32 isvce_generate_prefix_nal(bitstrm_t *ps_bitstrm, svc_nalu_ext_t *ps_svc_nalu_ext,
slice_header_t *ps_slice_header,
UWORD8 u1_max_num_ref_frames, UWORD8 u1_num_spatial_layers);
extern WORD32 isvce_generate_slice_header_svc(bitstrm_t *ps_bitstrm, pps_t *ps_pps,
svc_nalu_ext_t *ps_svc_nalu_ext,
svc_slice_header_t *ps_svc_slice_hdr,
subset_sps_t *ps_subset_sps);
extern WORD32 isvce_generate_subset_sps(bitstrm_t *ps_bitstrm, subset_sps_t *ps_subset_sps);
#endif

70
encoder/svc/isvce_error.h Normal file
View file

@ -0,0 +1,70 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
*******************************************************************************
* @file
* isvce_error.h
*
* @brief
* SVC specific error codes
*
* @author
* ittiam
*
* @remarks
* None
*
*******************************************************************************
*/
#ifndef _ISVCE_ERROR_H_
#define _ISVCE_ERROR_H_
#include "ih264e_error.h"
typedef enum ISVCE_ERRORS_T
{
/**Invalid SVC params */
IH264E_INVALID_SVC_PARAMS = IH264E_CODEC_ERROR_START + 0x100,
/**Invalid num_temporal_layers */
IH264E_INVALID_NUM_TEMPORAL_LAYERS = IH264E_CODEC_ERROR_START + 0x101,
/**Invalid num_spatial_layers */
IH264E_INVALID_NUM_SPATIAL_LAYERS = IH264E_CODEC_ERROR_START + 0x102,
/**Invalid spatial_res_ratio */
IH264E_INVALID_SPATIAL_RES_RATIO = IH264E_CODEC_ERROR_START + 0x103,
/** Weighted prediction not supported */
IH264E_WEIGHTED_PRED_NOT_SUPPORTED = IH264E_CODEC_ERROR_START + 0x104,
/** CABAC entropy mode not supported for SVC */
IH264E_CABAC_NOT_SUPPORTED = IH264E_CODEC_ERROR_START + 0x105,
/**Invalid input dimensions */
IH264E_INVALID_SVC_INPUT_DIMENSIONS = IH264E_CODEC_ERROR_START + 0x106,
/** Invalid init QP */
IH264E_INVALID_DYN_INIT_QP = IH264E_CODEC_ERROR_START + 0x107,
} ISVCE_ERRORS_T;
#endif

View file

@ -0,0 +1,145 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
*******************************************************************************
* @file
* isvce_fmt_conv.c
*
* @brief
* Contains functions for format conversion or frame copy of output buffer
*
* @author
* ittiam
*
* @par List of Functions:
* - isvce_fmt_conv()
*
* @remarks
* None
*
*******************************************************************************
*/
#include "ih264_typedefs.h"
#include "ih264_macros.h"
/* Dependencies of ih264_buf_mgr.h */
/* Dependencies of ih264_list.h */
#include "ih264_error.h"
/* Dependencies of ih264_common_tables.h */
#include "ih264_defs.h"
#include "ih264_structs.h"
#include "ih264_buf_mgr.h"
#include "ih264_common_tables.h"
#include "ih264_list.h"
#include "ih264_platform_macros.h"
#include "ih264_trans_data.h"
#include "ih264_size_defs.h"
/* Dependencies of ih264e_cabac_structs.h */
#include "ih264_cabac_tables.h"
/* Dependencies of ime_structs.h */
#include "ime_defs.h"
#include "ime_distortion_metrics.h"
/* Dependencies of ih264e_structs.h */
#include "iv2.h"
#include "ive2.h"
#include "ih264_defs.h"
#include "ih264_deblk_edge_filters.h"
#include "ih264_inter_pred_filters.h"
#include "ih264_structs.h"
#include "ih264_trans_quant_itrans_iquant.h"
/* Dependencies of ih264e_bitstream.h */
#include "ih264e_error.h"
#include "ih264e_bitstream.h"
#include "ih264e_cabac_structs.h"
#include "irc_cntrl_param.h"
#include "irc_frame_info_collector.h"
#include "ime_statistics.h"
#include "ime_structs.h"
/* Dependencies of 'ih264e_utils.h' */
#include "ih264e_defs.h"
#include "ih264e_structs.h"
#include "ih264e_fmt_conv.h"
#include "isvce_structs.h"
IH264E_ERROR_T isvce_fmt_conv(isvce_codec_t *ps_codec, svc_au_buf_t *ps_pic, UWORD8 *pu1_y_dst,
UWORD8 *pu1_u_dst, UWORD8 *pu1_v_dst, UWORD32 u4_dst_y_strd,
UWORD32 u4_dst_uv_strd, WORD32 cur_row, WORD32 num_rows)
{
IH264E_ERROR_T ret = IH264E_SUCCESS;
UWORD8 *pu1_y_src, *pu1_uv_src;
UWORD8 *pu1_y_dst_tmp, *pu1_uv_dst_tmp;
UWORD8 *pu1_u_dst_tmp, *pu1_v_dst_tmp;
WORD32 is_u_first;
UWORD8 *pu1_luma;
UWORD8 *pu1_chroma;
WORD32 wd;
WORD32 src_y_strd;
WORD32 src_uv_strd;
WORD32 layer_id = ps_pic->u1_num_spatial_layers - 1;
if(0 == num_rows)
{
return ret;
}
pu1_luma = ps_pic->ps_layer_yuv_buf_props[layer_id].as_component_bufs[0].pv_data;
pu1_chroma = ps_pic->ps_layer_yuv_buf_props[layer_id].as_component_bufs[1].pv_data;
src_y_strd = ps_pic->ps_layer_yuv_buf_props[layer_id].as_component_bufs[0].i4_data_stride;
src_uv_strd = ps_pic->ps_layer_yuv_buf_props[layer_id].as_component_bufs[1].i4_data_stride;
wd = ps_codec->s_cfg.u4_disp_wd;
is_u_first = (IV_YUV_420SP_UV == ps_codec->e_codec_color_format) ? 1 : 0;
/* In case of 420P output luma copy is disabled for shared mode */
{
pu1_y_src = pu1_luma + cur_row * src_y_strd;
pu1_uv_src = pu1_chroma + (cur_row / 2) * src_uv_strd;
pu1_y_dst_tmp = pu1_y_dst + cur_row * u4_dst_y_strd;
pu1_uv_dst_tmp = pu1_u_dst + (cur_row / 2) * u4_dst_uv_strd;
pu1_u_dst_tmp = pu1_u_dst + (cur_row / 2) * u4_dst_uv_strd;
pu1_v_dst_tmp = pu1_v_dst + (cur_row / 2) * u4_dst_uv_strd;
/* If the call is non-blocking and there are no rows to be copied then
* return */
/* In non-shared mode, reference buffers are in 420SP UV format,
* if output also is in 420SP_UV, then just copy
* if output is in 420SP_VU then swap UV values
*/
if((IV_YUV_420SP_UV == ps_codec->s_cfg.e_recon_color_fmt) ||
(IV_YUV_420SP_VU == ps_codec->s_cfg.e_recon_color_fmt))
{
ih264e_fmt_conv_420sp_to_420sp(pu1_y_src, pu1_uv_src, pu1_y_dst_tmp, pu1_uv_dst_tmp, wd,
num_rows, ps_codec->i4_rec_strd, ps_codec->i4_rec_strd,
u4_dst_y_strd, u4_dst_uv_strd);
}
else if(IV_YUV_420P == ps_codec->s_cfg.e_recon_color_fmt)
{
ih264e_fmt_conv_420sp_to_420p(pu1_y_src, pu1_uv_src, pu1_y_dst_tmp, pu1_u_dst_tmp,
pu1_v_dst_tmp, wd, num_rows, ps_codec->i4_rec_strd,
ps_codec->i4_rec_strd, u4_dst_y_strd, u4_dst_uv_strd,
is_u_first, 0);
}
}
return (ret);
}

View file

@ -0,0 +1,48 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
*******************************************************************************
* @file
* ih264e_fmt_conv.h
*
* @brief
* The file contains extern declarations of color space conversion routines
*
* @author
* ittiam
*
* @remarks
* None
*
*******************************************************************************
*/
#ifndef _ISVCE_FMT_CONV_H_
#define _ISVCE_FMT_CONV_H_
#include "ih264e_fmt_conv.h"
#include "isvce_structs.h"
IH264E_ERROR_T isvce_fmt_conv(isvce_codec_t *ps_codec, svc_au_buf_t *ps_pic, UWORD8 *pu1_y_dst,
UWORD8 *pu1_u_dst, UWORD8 *pu1_v_dst, UWORD32 u4_dst_y_strd,
UWORD32 u4_dst_uv_strd, WORD32 cur_row, WORD32 num_rows);
#endif

View file

@ -0,0 +1,314 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
*******************************************************************************
* @file
* isvce_function_selector_generic.c
*
* @brief
* Contains functions to initialize function pointers of codec context
*
* @author
* ittiam
*
* @par List of Functions:
* - isvce_init_function_ptr_generic
*
* @remarks
* None
*
*******************************************************************************
*/
/*****************************************************************************/
/* File Includes */
/*****************************************************************************/
/* System Include files */
#include <stdio.h>
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
/* User Include files */
#include "ih264_typedefs.h"
#include "iv2.h"
#include "ive2.h"
#include "isvc_defs.h"
#include "ih264_size_defs.h"
#include "isvce_defs.h"
#include "ih264e_error.h"
#include "ih264e_bitstream.h"
#include "ime_distortion_metrics.h"
#include "ime_defs.h"
#include "ime_structs.h"
#include "ih264_error.h"
#include "isvc_structs.h"
#include "isvc_trans_quant_itrans_iquant.h"
#include "ih264_inter_pred_filters.h"
#include "isvc_mem_fns.h"
#include "ih264_padding.h"
#include "ih264_intra_pred_filters.h"
#include "ih264_deblk_edge_filters.h"
#include "isvc_cabac_tables.h"
#include "irc_cntrl_param.h"
#include "irc_frame_info_collector.h"
#include "isvce_rate_control.h"
#include "isvce_cabac_structs.h"
#include "isvce_structs.h"
#include "ih264e_platform_macros.h"
#include "isvce_cabac.h"
#include "isvce_core_coding.h"
#include "ih264_cavlc_tables.h"
#include "isvce_cavlc.h"
#include "ih264e_intra_modes_eval.h"
#include "ih264e_fmt_conv.h"
#include "ih264e_half_pel.h"
#include "isvce_me.h"
/*****************************************************************************/
/* Function Definitions */
/*****************************************************************************/
/**
*******************************************************************************
*
* @brief Initialize the intra/inter/transform/deblk function pointers of
* codec context
*
* @par Description: the current routine initializes the function pointers of
* codec context basing on the architecture in use
*
* @param[in] ps_codec
* Codec context pointer
*
* @returns none
*
* @remarks none
*
*******************************************************************************
*/
void isvce_init_function_ptr_generic(isvce_codec_t *ps_codec)
{
WORD32 i = 0;
/* curr proc ctxt */
isvce_process_ctxt_t *ps_proc = NULL;
isvce_me_ctxt_t *ps_me_ctxt = NULL;
isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns;
enc_loop_fxns_t *ps_enc_loop_fxns = &ps_isa_dependent_fxns->s_enc_loop_fxns;
inter_pred_fxns_t *ps_inter_pred_fxns = &ps_isa_dependent_fxns->s_inter_pred_fxns;
mem_fxns_t *ps_mem_fxns = &ps_isa_dependent_fxns->s_mem_fxns;
/* Init function pointers for intra pred leaf level functions luma
* Intra 16x16 */
ps_codec->apf_intra_pred_16_l[0] = ih264_intra_pred_luma_16x16_mode_vert;
ps_codec->apf_intra_pred_16_l[1] = ih264_intra_pred_luma_16x16_mode_horz;
ps_codec->apf_intra_pred_16_l[2] = ih264_intra_pred_luma_16x16_mode_dc;
ps_codec->apf_intra_pred_16_l[3] = ih264_intra_pred_luma_16x16_mode_plane;
/* Init function pointers for intra pred leaf level functions luma
* Intra 4x4 */
ps_codec->apf_intra_pred_4_l[0] = ih264_intra_pred_luma_4x4_mode_vert;
ps_codec->apf_intra_pred_4_l[1] = ih264_intra_pred_luma_4x4_mode_horz;
ps_codec->apf_intra_pred_4_l[2] = ih264_intra_pred_luma_4x4_mode_dc;
ps_codec->apf_intra_pred_4_l[3] = ih264_intra_pred_luma_4x4_mode_diag_dl;
ps_codec->apf_intra_pred_4_l[4] = ih264_intra_pred_luma_4x4_mode_diag_dr;
ps_codec->apf_intra_pred_4_l[5] = ih264_intra_pred_luma_4x4_mode_vert_r;
ps_codec->apf_intra_pred_4_l[6] = ih264_intra_pred_luma_4x4_mode_horz_d;
ps_codec->apf_intra_pred_4_l[7] = ih264_intra_pred_luma_4x4_mode_vert_l;
ps_codec->apf_intra_pred_4_l[8] = ih264_intra_pred_luma_4x4_mode_horz_u;
/* Init function pointers for intra pred leaf level functions luma
* Intra 8x8 */
ps_codec->apf_intra_pred_8_l[0] = ih264_intra_pred_luma_8x8_mode_vert;
ps_codec->apf_intra_pred_8_l[2] = ih264_intra_pred_luma_8x8_mode_dc;
ps_codec->apf_intra_pred_8_l[3] = ih264_intra_pred_luma_8x8_mode_diag_dl;
ps_codec->apf_intra_pred_8_l[4] = ih264_intra_pred_luma_8x8_mode_diag_dr;
ps_codec->apf_intra_pred_8_l[5] = ih264_intra_pred_luma_8x8_mode_vert_r;
ps_codec->apf_intra_pred_8_l[6] = ih264_intra_pred_luma_8x8_mode_horz_d;
ps_codec->apf_intra_pred_8_l[7] = ih264_intra_pred_luma_8x8_mode_vert_l;
ps_codec->apf_intra_pred_8_l[8] = ih264_intra_pred_luma_8x8_mode_horz_u;
/* Init function pointers for intra pred leaf level functions chroma
* Intra 8x8 */
ps_codec->apf_intra_pred_c[0] = ih264_intra_pred_chroma_8x8_mode_dc;
ps_codec->apf_intra_pred_c[1] = ih264_intra_pred_chroma_8x8_mode_horz;
ps_codec->apf_intra_pred_c[2] = ih264_intra_pred_chroma_8x8_mode_vert;
ps_codec->apf_intra_pred_c[3] = ih264_intra_pred_chroma_8x8_mode_plane;
/* Init luma forward transform fn ptr */
ASSERT((sizeof(ps_enc_loop_fxns->apf_resi_trans_quant_8x8) /
sizeof(ps_enc_loop_fxns->apf_resi_trans_quant_8x8[0])) ==
NUM_RESI_TRANS_QUANT_VARIANTS);
ASSERT((sizeof(ps_enc_loop_fxns->apf_resi_trans_quant_4x4) /
sizeof(ps_enc_loop_fxns->apf_resi_trans_quant_4x4[0])) ==
NUM_RESI_TRANS_QUANT_VARIANTS);
ASSERT((sizeof(ps_enc_loop_fxns->apf_resi_trans_quant_chroma_4x4) /
sizeof(ps_enc_loop_fxns->apf_resi_trans_quant_chroma_4x4[0])) ==
NUM_RESI_TRANS_QUANT_VARIANTS);
ps_enc_loop_fxns->apf_resi_trans_quant_8x8[0] = isvc_resi_trans_quant_8x8;
ps_enc_loop_fxns->apf_resi_trans_quant_4x4[0] = isvc_resi_trans_quant_4x4;
ps_enc_loop_fxns->apf_resi_trans_quant_chroma_4x4[0] = isvc_resi_trans_quant_chroma_4x4;
ps_enc_loop_fxns->apf_resi_trans_quant_8x8[1] = isvc_resi_trans_quant_8x8;
ps_enc_loop_fxns->apf_resi_trans_quant_4x4[1] = isvc_resi_trans_quant_4x4;
ps_enc_loop_fxns->apf_resi_trans_quant_chroma_4x4[1] = isvc_resi_trans_quant_chroma_4x4;
ps_enc_loop_fxns->pf_hadamard_quant_4x4 = isvc_hadamard_quant_4x4;
ps_enc_loop_fxns->pf_hadamard_quant_2x2_uv = isvc_hadamard_quant_2x2_uv;
/* Init inverse transform fn ptr */
ASSERT((sizeof(ps_enc_loop_fxns->apf_iquant_itrans_recon_8x8) /
sizeof(ps_enc_loop_fxns->apf_iquant_itrans_recon_8x8[0])) == NUM_IQ_IT_RECON_VARIANTS);
ASSERT((sizeof(ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4) /
sizeof(ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4[0])) == NUM_IQ_IT_RECON_VARIANTS);
ASSERT((sizeof(ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4_dc) /
sizeof(ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4_dc[0])) ==
NUM_IQ_IT_RECON_VARIANTS);
ASSERT((sizeof(ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4) /
sizeof(ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4[0])) ==
NUM_IQ_IT_RECON_VARIANTS);
ASSERT((sizeof(ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4_dc) /
sizeof(ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4_dc[0])) ==
NUM_IQ_IT_RECON_VARIANTS);
ps_enc_loop_fxns->apf_iquant_itrans_recon_8x8[0] = isvc_iquant_itrans_recon_8x8;
ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4[0] = isvc_iquant_itrans_recon_4x4;
ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4_dc[0] = isvc_iquant_itrans_recon_4x4_dc;
ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4[0] = isvc_iquant_itrans_recon_chroma_4x4;
ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4_dc[0] =
isvc_iquant_itrans_recon_chroma_4x4_dc;
ps_enc_loop_fxns->apf_iquant_itrans_recon_8x8[1] = isvc_iquant_itrans_recon_8x8;
ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4[1] = isvc_iquant_itrans_recon_4x4;
ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4_dc[1] = isvc_iquant_itrans_recon_4x4_dc;
ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4[1] = isvc_iquant_itrans_recon_chroma_4x4;
ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4_dc[1] =
isvc_iquant_itrans_recon_chroma_4x4_dc;
ps_enc_loop_fxns->apf_iquant_itrans_recon_8x8[2] = isvc_iquant_itrans_recon_8x8;
ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4[2] = isvc_iquant_itrans_recon_4x4;
ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4_dc[2] = isvc_iquant_itrans_recon_4x4_dc;
ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4[2] = isvc_iquant_itrans_recon_chroma_4x4;
ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4_dc[2] =
isvc_iquant_itrans_recon_chroma_4x4_dc;
ps_enc_loop_fxns->pf_zcbf_iquant_itrans_recon_4x4 = isvc_zcbf_iquant_itrans_recon_4x4;
ps_enc_loop_fxns->pf_chroma_zcbf_iquant_itrans_recon_4x4 =
isvc_chroma_zcbf_iquant_itrans_recon_4x4;
ps_enc_loop_fxns->pf_ihadamard_scaling_4x4 = ih264_ihadamard_scaling_4x4;
ps_enc_loop_fxns->pf_ihadamard_scaling_2x2_uv = ih264_ihadamard_scaling_2x2_uv;
/* Init fn ptr luma core coding */
ps_enc_loop_fxns->apf_luma_energy_compaction[0] = isvce_code_luma_intra_macroblock_16x16;
ps_enc_loop_fxns->apf_luma_energy_compaction[1] = isvce_code_luma_intra_macroblock_4x4;
ps_enc_loop_fxns->apf_luma_energy_compaction[3] = isvce_code_luma_inter_macroblock_16x16;
/* Init fn ptr chroma core coding */
ps_enc_loop_fxns->apf_chroma_energy_compaction[0] = isvce_code_chroma_intra_macroblock_8x8;
ps_enc_loop_fxns->apf_chroma_energy_compaction[1] = isvce_code_chroma_inter_macroblock_8x8;
/* Init fn ptr luma deblocking */
ps_codec->pf_deblk_luma_vert_bs4 = ih264_deblk_luma_vert_bs4;
ps_codec->pf_deblk_luma_vert_bslt4 = ih264_deblk_luma_vert_bslt4;
ps_codec->pf_deblk_luma_horz_bs4 = ih264_deblk_luma_horz_bs4;
ps_codec->pf_deblk_luma_horz_bslt4 = ih264_deblk_luma_horz_bslt4;
/* Init fn ptr chroma deblocking */
ps_codec->pf_deblk_chroma_vert_bs4 = ih264_deblk_chroma_vert_bs4;
ps_codec->pf_deblk_chroma_vert_bslt4 = ih264_deblk_chroma_vert_bslt4;
ps_codec->pf_deblk_chroma_horz_bs4 = ih264_deblk_chroma_horz_bs4;
ps_codec->pf_deblk_chroma_horz_bslt4 = ih264_deblk_chroma_horz_bslt4;
/* write mb syntax layer */
ps_codec->pf_write_mb_syntax_layer[CAVLC][ISLICE] = isvce_write_islice_mb_cavlc;
ps_codec->pf_write_mb_syntax_layer[CAVLC][PSLICE] = isvce_write_pslice_mb_cavlc;
ps_codec->pf_write_mb_syntax_layer[CAVLC][BSLICE] = isvce_write_bslice_mb_cavlc;
ps_codec->pf_write_mb_syntax_layer[CABAC][ISLICE] = isvce_write_islice_mb_cabac;
ps_codec->pf_write_mb_syntax_layer[CABAC][PSLICE] = isvce_write_pslice_mb_cabac;
ps_codec->pf_write_mb_syntax_layer[CABAC][BSLICE] = isvce_write_bslice_mb_cabac;
/* Padding Functions */
ps_codec->pf_pad_top = ih264_pad_top;
ps_codec->pf_pad_bottom = ih264_pad_bottom;
ps_codec->pf_pad_left_luma = ih264_pad_left_luma;
ps_codec->pf_pad_left_chroma = ih264_pad_left_chroma;
ps_codec->pf_pad_right_luma = ih264_pad_right_luma;
ps_codec->pf_pad_right_chroma = ih264_pad_right_chroma;
/* Inter pred leaf level functions */
ps_inter_pred_fxns->pf_inter_pred_luma_copy = ih264_inter_pred_luma_copy;
ps_inter_pred_fxns->pf_inter_pred_luma_horz = ih264_inter_pred_luma_horz;
ps_inter_pred_fxns->pf_inter_pred_luma_vert = ih264_inter_pred_luma_vert;
ps_inter_pred_fxns->pf_inter_pred_luma_bilinear = ih264_inter_pred_luma_bilinear;
ps_inter_pred_fxns->pf_inter_pred_chroma = ih264_inter_pred_chroma;
/* sad me level functions */
ps_codec->apf_compute_sad_16x16[0] = ime_compute_sad_16x16;
ps_codec->apf_compute_sad_16x16[1] = ime_compute_sad_16x16_fast;
ps_codec->pf_compute_sad_16x8 = ime_compute_sad_16x8;
/* memory handling operations */
ps_mem_fxns->pf_mem_cpy = ih264_memcpy;
ps_mem_fxns->pf_mem_cpy_mul8 = ih264_memcpy_mul_8;
ps_mem_fxns->pf_mem_set = ih264_memset;
ps_mem_fxns->pf_mem_set_mul8 = ih264_memset_mul_8;
ps_mem_fxns->pf_copy_2d = isvc_copy_2d;
ps_mem_fxns->pf_memset_2d = isvc_memset_2d;
ps_mem_fxns->pf_16bit_interleaved_copy = isvc_16bit_interleaved_copy;
ps_mem_fxns->pf_16bit_interleaved_memset = isvc_16bit_interleaved_memset;
ps_mem_fxns->pf_nonzero_checker = isvc_is_nonzero_blk;
/* sad me level functions */
for(i = 0; i < (MAX_PROCESS_CTXT); i++)
{
ps_proc = &ps_codec->as_process[i];
ps_me_ctxt = &ps_proc->s_me_ctxt;
ps_me_ctxt->pf_ime_compute_sad_16x16[0] = ime_compute_sad_16x16;
ps_me_ctxt->pf_ime_compute_sad_16x16[1] = ime_compute_sad_16x16_fast;
ps_me_ctxt->pf_ime_compute_sad_16x8 = ime_compute_sad_16x8;
ps_me_ctxt->pf_ime_compute_sad4_diamond = ime_calculate_sad4_prog;
ps_me_ctxt->pf_ime_compute_sad3_diamond = ime_calculate_sad3_prog;
ps_me_ctxt->pf_ime_compute_sad2_diamond = ime_calculate_sad2_prog;
ps_me_ctxt->pf_ime_sub_pel_compute_sad_16x16 = ime_sub_pel_compute_sad_16x16;
ps_me_ctxt->pf_ime_compute_sad_stat_luma_16x16 = ime_compute_satqd_16x16_lumainter;
}
/* intra mode eval -encoder level function */
ps_codec->pf_ih264e_evaluate_intra16x16_modes = ih264e_evaluate_intra16x16_modes;
ps_codec->pf_ih264e_evaluate_intra_chroma_modes = ih264e_evaluate_intra_chroma_modes;
ps_codec->pf_ih264e_evaluate_intra_4x4_modes = ih264e_evaluate_intra_4x4_modes;
/* csc */
ps_codec->pf_ih264e_conv_420p_to_420sp = ih264e_fmt_conv_420p_to_420sp;
ps_codec->pf_ih264e_fmt_conv_422i_to_420sp = ih264e_fmt_conv_422i_to_420sp;
/* Halp pel generation function - encoder level*/
ps_codec->pf_ih264e_sixtapfilter_horz = ih264e_sixtapfilter_horz;
ps_codec->pf_ih264e_sixtap_filter_2dvh_vert = ih264e_sixtap_filter_2dvh_vert;
/* ME compute */
ps_codec->apf_compute_me[PSLICE] = &isvce_compute_me_single_reflist;
ps_codec->apf_compute_me[BSLICE] = &isvce_compute_me_multi_reflist;
/* skip decision */
ps_codec->apf_find_skip_params_me[PSLICE] = &isvce_find_pskip_params_me;
ps_codec->apf_find_skip_params_me[BSLICE] = &isvce_find_bskip_params_me;
}

View file

@ -0,0 +1,48 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
*******************************************************************************
* @file
* isvce_globals.c
*
* @brief
* Contains definitions of global variables used across the encoder
*
* @author
* ittiam
*
* @par List of functions
*
*
* @remarks
*
*******************************************************************************
*/
/*****************************************************************************/
/* File Includes */
/*****************************************************************************/
#include "ih264_typedefs.h"
#include "ih264_defs.h"
/* Raster to z scan map */
const UWORD8 gau1_raster_to_zscan_map[MAX_TU_IN_MB] = {0, 1, 4, 5, 2, 3, 6, 7,
8, 9, 12, 13, 10, 11, 14, 15};

View file

@ -0,0 +1,44 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
*******************************************************************************
* @file
* isvce_globals.h
*
* @brief
* Contains declarations of global variables for H264 encoder
*
* @author
* Ittiam
*
* @remarks
*
*******************************************************************************
*/
#ifndef _ISVCE_GLOBALS_H_
#define _ISVCE_GLOBALS_H_
#include "ih264e_globals.h"
extern const UWORD8 gau1_raster_to_zscan_map[MAX_TU_IN_MB];
#endif

1378
encoder/svc/isvce_ibl_eval.c Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,105 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
*******************************************************************************
* @file
* isvce_intra_pred.h
*
* @brief
* Contains function declarations for function declared in
*isvce_intra_pred.c
*
* @author
* ittiam
*
* @remarks
* None
*
*******************************************************************************
*/
#ifndef _ISVCE_IBL_EVAL_H_
#define _ISVCE_IBL_EVAL_H_
#include "ih264_typedefs.h"
#include "isvc_macros.h"
#include "ih264_debug.h"
#include "isvc_defs.h"
#include "isvc_structs.h"
#include "isvc_intra_resample.h"
#include "isvce_structs.h"
#include "isvce_structs.h"
#define TEMP_BUF_SIZE_LUMA (REF_ARRAY_WIDTH * REF_ARRAY_WIDTH)
#define TEMP_BUF_SIZE_CB (REF_ARRAY_WIDTH * REF_ARRAY_WIDTH)
#define TEMP_BUF_SIZE_CR (DYADIC_REF_W_C * DYADIC_REF_H_C)
#define INTERMEDIATE_BUFF_WIDTH 48
#define INTERMEDIATE_BUFF_HEIGHT (MB_SIZE + 4)
#define TEMP_INTERPOLATION_BUF_SIZE (INTERMEDIATE_BUFF_WIDTH * INTERMEDIATE_BUFF_HEIGHT)
/* Structs */
typedef struct intra_pred_constants_t
{
void *pv_state;
} intra_pred_constants_t;
typedef struct intra_pred_outputs_t
{
yuv_buf_props_t s_pred_buf;
} intra_pred_outputs_t;
typedef struct intra_pred_variables_t
{
svc_ilp_data_t *ps_svc_ilp_data;
coordinates_t s_mb_pos;
UWORD8 u1_spatial_layer_id;
} intra_pred_variables_t;
typedef struct svc_intra_pred_ctxt_t
{
intra_pred_constants_t s_intra_pred_constants;
intra_pred_variables_t s_intra_pred_variables;
intra_pred_outputs_t s_intra_pred_outputs;
} svc_intra_pred_ctxt_t;
extern UWORD32 isvce_get_svc_intra_pred_ctxt_size(UWORD8 u1_num_spatial_layers,
DOUBLE d_spatial_res_ratio, UWORD32 u4_wd,
UWORD32 u4_ht);
extern void isvce_intra_pred_ctxt_init(isvce_codec_t *ps_codec, iv_mem_rec_t *ps_mem_rec);
extern void isvce_update_ibl_info(svc_intra_pred_ctxt_t *ps_intra_pred_ctxt,
UWORD8 u1_num_spatial_layers, UWORD8 u1_spatial_layer_id,
UWORD16 u2_mb_type, WORD32 i4_mb_x, WORD32 i4_mb_y,
WORD8 u1_base_mode_flag);
extern void isvce_evaluate_IBL_mode(isvce_process_ctxt_t *ps_proc);
extern void isvce_pad_mb_mode_buf(svc_intra_pred_ctxt_t *ps_intra_pred_ctxt,
UWORD8 u1_spatial_layer_id, UWORD8 u1_num_spatial_layers,
DOUBLE d_spatial_res_ratio, UWORD32 u4_wd, UWORD32 u4_ht);
#endif

View file

@ -0,0 +1,94 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
*******************************************************************************
* @file
* isvce_intra_pred_private_defs.h
*
* @brief
* Contains datatype and macro definitions used exclusively in
* residual prediction
*
*******************************************************************************
*/
#ifndef _ISVCE_IBL_PRIVATE_DEFS_H_
#define _ISVCE_IBL_PRIVATE_DEFS_H_
#include "ih264_typedefs.h"
#include "isvc_defs.h"
#include "isvc_structs.h"
#include "isvce_structs.h"
#include "isvc_intra_resample.h"
/* Structs */
typedef struct intra_pred_mb_state_t
{
coordinates_t s_offsets;
coordinates_t s_ref_array_dims;
WORD32 *pi4_ref_array_positions_x;
WORD32 *pi4_ref_array_positions_y;
coordinates_t *ps_ref_array_phases;
coordinates_t s_min_pos;
coordinates_t s_max_pos;
} intra_pred_mb_state_t;
typedef struct intra_pred_layer_state_t
{
layer_resampler_props_t *ps_luma_props;
layer_resampler_props_t *ps_chroma_props;
intra_pred_mb_state_t *ps_luma_mb_states;
intra_pred_mb_state_t *ps_chroma_mb_states;
WORD8 *pi1_mb_mode;
WORD32 i4_mb_mode_stride;
/* buffer to store the reference
layer data before intra sampling */
UWORD8 *pu1_refarray_buffer;
UWORD8 *pu1_refarray_cb;
UWORD8 *pu1_refarray_cr;
WORD32 *pi4_temp_interpolation_buffer;
} intra_pred_layer_state_t;
typedef struct intra_pred_state_t
{
/* Array of size numSpatialLayers */
intra_pred_layer_state_t *ps_layer_state;
} intra_pred_state_t;
#endif

737
encoder/svc/isvce_ilp_mv.c Normal file
View file

@ -0,0 +1,737 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
*******************************************************************************
* @file
* isvce_ilp_mv.c
*
* @brief
* Contains functions used for deriving inter_layer MV's
*
*******************************************************************************
*/
#include <stdint.h>
#include <math.h>
#include <stdbool.h>
#include "ih264_typedefs.h"
#include "ih264_debug.h"
#include "isvc_macros.h"
#include "isvc_defs.h"
#include "isvce_defs.h"
#include "isvce_structs.h"
#include "isvce_ilp_mv_private_defs.h"
#include "isvce_ilp_mv.h"
#include "isvce_ilp_mv_utils.h"
/**
*******************************************************************************
*
* @brief
* Returns size of buffers for storing ILP MV ctxt
*
* @param[in] u1_num_spatial_layers
* Num Spatial Layers
*
* @param[in] d_spatial_res_ratio
* Resolution Ratio b/w spatial layers
*
* @param[in] u4_wd
* Input Width
*
* @param[in] u4_ht
* Input Height
*
* @returns Size of buffers
*
*******************************************************************************
*/
UWORD32 isvce_get_ilp_mv_ctxt_size(UWORD8 u1_num_spatial_layers, DOUBLE d_spatial_res_ratio,
UWORD32 u4_wd, UWORD32 u4_ht)
{
UWORD32 u4_size = 0;
if(u1_num_spatial_layers > 1)
{
WORD32 i;
u4_size += MAX_PROCESS_CTXT * sizeof(svc_ilp_mv_ctxt_t);
u4_size += MAX_PROCESS_CTXT * sizeof(ilp_mv_state_t);
u4_size += u1_num_spatial_layers * sizeof(ilp_mv_layer_state_t);
for(i = u1_num_spatial_layers - 1; i >= 1; i--)
{
WORD32 i4_layer_luma_wd =
(WORD32) ((DOUBLE) u4_wd /
pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - i)) +
0.99;
WORD32 i4_layer_luma_ht =
((DOUBLE) u4_ht / pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - i)) + 0.99;
WORD32 i4_layer_luma_mbs = (i4_layer_luma_wd / MB_SIZE) * (i4_layer_luma_ht / MB_SIZE);
u4_size += i4_layer_luma_mbs * sizeof(ilp_mv_mb_state_t);
}
}
return u4_size;
}
static FORCEINLINE void isvce_ref_layer_pu_and_mb_pos_init(layer_resampler_props_t *ps_layer_props,
ilp_mv_mb_state_t *ps_mb_state,
coordinates_t *ps_mb_pos,
UWORD32 u4_ref_wd, UWORD32 u4_ref_ht,
UWORD8 u1_field_pic_flag,
UWORD8 u1_field_mb_flag)
{
UWORD32 i, j;
coordinates_t(*aps_pu_positions)[MAX_PU_IN_MB_ROW] = ps_mb_state->as_pu_positions;
coordinates_t(*aps_mb_positions)[MAX_PU_IN_MB_ROW] = ps_mb_state->as_mb_positions;
for(i = 0; i < MAX_PU_IN_MB_COL; i++)
{
UWORD32 u4_y_ref16;
UWORD32 u4_yc = ps_mb_pos->i4_ordinate * ps_layer_props->u4_mb_ht +
(4 * i + 1) * (1 + u1_field_mb_flag - u1_field_pic_flag);
u4_y_ref16 =
(u4_yc * ps_layer_props->u4_scale_y + (1 << (ps_layer_props->u4_shift_y - 1))) >>
ps_layer_props->u4_shift_y;
u4_y_ref16 = MIN(u4_y_ref16, u4_ref_ht - 1);
for(j = 0; j < MAX_PU_IN_MB_ROW; j++)
{
UWORD32 u4_x_ref16;
UWORD32 u4_xc = ps_mb_pos->i4_abscissa * ps_layer_props->u4_mb_wd + 4 * j + 1;
u4_x_ref16 =
(u4_xc * ps_layer_props->u4_scale_x + (1 << (ps_layer_props->u4_shift_x - 1))) >>
ps_layer_props->u4_shift_x;
u4_x_ref16 = MIN(u4_x_ref16, u4_ref_wd - 1);
aps_pu_positions[i][j].i4_abscissa = u4_x_ref16;
aps_pu_positions[i][j].i4_ordinate = u4_y_ref16;
aps_mb_positions[i][j].i4_abscissa = (u4_x_ref16 / MB_SIZE);
aps_mb_positions[i][j].i4_ordinate = (u4_y_ref16 / MB_SIZE);
}
}
}
static void isvce_ilp_mv_layer_state_init(ilp_mv_layer_state_t *ps_layer_state,
DOUBLE d_spatial_res_ratio, UWORD32 u4_wd, UWORD32 u4_ht)
{
UWORD32 i, j;
const UWORD8 u1_ref_layer_field_pic_flag = 0;
const UWORD8 u1_field_pic_flag = 0;
const UWORD8 u1_field_mb_flag = 0;
ilp_mv_mb_state_t *ps_mb_states;
layer_resampler_props_t *ps_layer_props;
UWORD32 u4_wd_in_mbs;
UWORD32 u4_ht_in_mbs;
UWORD32 u4_ref_wd = (u4_wd / d_spatial_res_ratio);
UWORD32 u4_ref_ht = (u4_ht / d_spatial_res_ratio) * (1 + u1_ref_layer_field_pic_flag);
UWORD32 u4_scaled_wd = u4_wd;
UWORD32 u4_scaled_ht = u4_ht * (1 + u1_field_pic_flag);
ps_mb_states = ps_layer_state->ps_mb_states;
ps_layer_props = ps_layer_state->ps_props;
u4_wd_in_mbs = u4_scaled_wd / ps_layer_props->u4_mb_wd;
u4_ht_in_mbs = u4_scaled_ht / ps_layer_props->u4_mb_ht;
ps_layer_state->s_mv_scale.i4_abscissa = ((u4_scaled_wd << 16) + (u4_ref_wd >> 1)) / u4_ref_wd;
ps_layer_state->s_mv_scale.i4_ordinate = ((u4_scaled_ht << 16) + (u4_ref_ht >> 1)) / u4_ref_ht;
for(i = 0; i < u4_ht_in_mbs; i++)
{
for(j = 0; j < u4_wd_in_mbs; j++)
{
coordinates_t s_mb_pos = {j, i};
isvce_ref_layer_pu_and_mb_pos_init(ps_layer_props, &ps_mb_states[j + i * u4_wd_in_mbs],
&s_mb_pos, u4_ref_wd, u4_ref_ht, u1_field_pic_flag,
u1_field_mb_flag);
}
}
}
/**
*******************************************************************************
*
* @brief
* Function to initialize svc ilp buffers
*
* @param[in] ps_codec
* Pointer to codec context
*
* @param[in] ps_mem_rec
* Pointer to memory allocated for input buffers
*
*******************************************************************************
*/
void isvce_ilp_mv_ctxt_init(isvce_codec_t *ps_codec, iv_mem_rec_t *ps_mem_rec)
{
WORD32 i, j;
const WORD32 i4_num_proc_ctxts = sizeof(ps_codec->as_process) / sizeof(ps_codec->as_process[0]);
UWORD8 u1_num_spatial_layers = ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers;
if(u1_num_spatial_layers > 1)
{
ilp_mv_layer_state_t *ps_layer_states;
ilp_mv_mb_state_t *aps_luma_mb_states[MAX_NUM_SPATIAL_LAYERS];
DOUBLE d_spatial_res_ratio = ps_codec->s_cfg.s_svc_params.d_spatial_res_ratio;
UWORD32 u4_wd = ps_codec->s_cfg.u4_wd;
UWORD32 u4_ht = ps_codec->s_cfg.u4_ht;
UWORD8 *pu1_buf = ps_mem_rec->pv_base;
WORD64 i8_alloc_mem_size =
isvce_get_ilp_mv_ctxt_size(u1_num_spatial_layers, d_spatial_res_ratio, u4_wd, u4_ht);
for(i = 0; i < i4_num_proc_ctxts; i++)
{
ilp_mv_state_t *ps_ilp_mv_state;
svc_ilp_mv_ctxt_t *ps_ilp_mv_ctxt;
isvce_process_ctxt_t *ps_proc = ps_codec->as_process + i;
ps_ilp_mv_ctxt = ps_proc->ps_svc_ilp_mv_ctxt = (svc_ilp_mv_ctxt_t *) pu1_buf;
pu1_buf += sizeof(svc_ilp_mv_ctxt_t);
i8_alloc_mem_size -= sizeof(svc_ilp_mv_ctxt_t);
ps_ilp_mv_ctxt->s_ilp_mv_constants.pv_state = pu1_buf;
ps_ilp_mv_state = (ilp_mv_state_t *) pu1_buf;
pu1_buf += sizeof(ilp_mv_state_t);
i8_alloc_mem_size -= sizeof(ilp_mv_state_t);
if(0 == i)
{
ps_ilp_mv_state->ps_layer_state = (ilp_mv_layer_state_t *) pu1_buf;
ps_layer_states = ps_ilp_mv_state->ps_layer_state;
pu1_buf += u1_num_spatial_layers * sizeof(ps_ilp_mv_state->ps_layer_state[0]);
i8_alloc_mem_size -=
u1_num_spatial_layers * sizeof(ps_ilp_mv_state->ps_layer_state[0]);
}
else
{
ps_ilp_mv_state->ps_layer_state = ps_layer_states;
}
ASSERT(i8_alloc_mem_size >= 0);
if(0 == i)
{
for(j = u1_num_spatial_layers - 1; j >= 1; j--)
{
ilp_mv_layer_state_t *ps_layer = &ps_ilp_mv_state->ps_layer_state[j];
WORD32 i4_layer_luma_wd =
((DOUBLE) u4_wd / pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - j)) +
0.99;
WORD32 i4_layer_luma_ht =
((DOUBLE) u4_ht / pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - j)) +
0.99;
WORD32 i4_layer_luma_mbs =
(i4_layer_luma_wd / MB_SIZE) * (i4_layer_luma_ht / MB_SIZE);
ps_layer->ps_mb_states = (ilp_mv_mb_state_t *) pu1_buf;
aps_luma_mb_states[j] = ps_layer->ps_mb_states;
pu1_buf += i4_layer_luma_mbs * sizeof(ps_layer->ps_mb_states[0]);
i8_alloc_mem_size -= u1_num_spatial_layers * sizeof(ps_layer->ps_mb_states[0]);
ASSERT(i8_alloc_mem_size >= 0);
/* Asserts below verify that
* 'ps_codec->s_svc_ilp_data.aps_layer_resampler_props' is initialised
*/
ASSERT(ps_codec->s_svc_ilp_data.aps_layer_resampler_props[Y][j].u4_mb_wd ==
MB_SIZE);
ps_layer->ps_props = &ps_codec->s_svc_ilp_data.aps_layer_resampler_props[Y][j];
isvce_ilp_mv_layer_state_init(ps_layer, d_spatial_res_ratio, i4_layer_luma_wd,
i4_layer_luma_ht);
}
}
else
{
for(j = u1_num_spatial_layers - 1; j >= 1; j--)
{
ilp_mv_layer_state_t *ps_layer = &ps_ilp_mv_state->ps_layer_state[j];
ps_layer->ps_mb_states = aps_luma_mb_states[j];
ps_layer->ps_props = &ps_codec->s_svc_ilp_data.aps_layer_resampler_props[Y][j];
}
}
}
}
else
{
for(i = 0; i < i4_num_proc_ctxts; i++)
{
ps_codec->as_process[i].ps_svc_ilp_mv_ctxt = NULL;
}
}
}
static void isvce_get_ilp_mvs_for_me(svc_ilp_mv_ctxt_t *ps_ilp_mv_ctxt)
{
svc_layer_data_t *ps_ref_layer_data;
ilp_mv_layer_state_t *ps_layer_state;
ilp_mv_mb_state_t *ps_mb_state;
isvce_mb_info_t *ps_ref_mb_info;
coordinates_t s_frame_dims;
coordinates_t s_frame_dims_in_mbs;
coordinates_t s_ref_frame_dims;
coordinates_t s_ref_frame_dims_in_mbs;
bool b_is_mv_non_identical;
WORD32 i, j, k;
ilp_mv_constants_t *ps_ilp_mv_constants = &ps_ilp_mv_ctxt->s_ilp_mv_constants;
ilp_mv_variables_t *ps_ilp_mv_variables = &ps_ilp_mv_ctxt->s_ilp_mv_variables;
ilp_mv_outputs_t *ps_ilp_mv_outputs = &ps_ilp_mv_ctxt->s_ilp_mv_outputs;
ilp_mv_state_t *ps_ilp_mv_state = (ilp_mv_state_t *) ps_ilp_mv_constants->pv_state;
svc_ilp_data_t *ps_svc_ilp_data = ps_ilp_mv_variables->ps_svc_ilp_data;
svc_au_data_t *ps_svc_au_data = ps_svc_ilp_data->ps_svc_au_data;
coordinates_t *ps_mb_pos = &ps_ilp_mv_variables->s_mb_pos;
const isvce_enc_pu_mv_t s_default_mv = {{0, 0}, -1};
UWORD8 u1_spatial_layer_id = ps_ilp_mv_variables->u1_spatial_layer_id;
WORD32 i4_num_ilp_mvs = 0;
s_frame_dims.i4_abscissa = ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id].u4_width;
s_frame_dims.i4_ordinate = ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id].u4_height;
s_frame_dims_in_mbs.i4_abscissa = s_frame_dims.i4_abscissa / MB_SIZE;
s_frame_dims_in_mbs.i4_ordinate = s_frame_dims.i4_ordinate / MB_SIZE;
s_ref_frame_dims.i4_abscissa =
ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id - 1].u4_width;
s_ref_frame_dims.i4_ordinate =
ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id - 1].u4_height;
s_ref_frame_dims_in_mbs.i4_abscissa = s_ref_frame_dims.i4_abscissa / MB_SIZE;
s_ref_frame_dims_in_mbs.i4_ordinate = s_ref_frame_dims.i4_ordinate / MB_SIZE;
ps_ref_layer_data = &ps_svc_au_data->ps_svc_layer_data[u1_spatial_layer_id - 1];
ps_layer_state = &ps_ilp_mv_state->ps_layer_state[u1_spatial_layer_id];
ps_mb_state =
&ps_layer_state->ps_mb_states[ps_mb_pos->i4_abscissa +
ps_mb_pos->i4_ordinate * s_frame_dims_in_mbs.i4_abscissa];
for(i = 0; i < MAX_PU_IN_MB_COL; i++)
{
for(j = 0; j < MAX_PU_IN_MB_ROW; j++)
{
b_is_mv_non_identical = true;
ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L0] = s_default_mv;
ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L1] = s_default_mv;
ps_ref_mb_info =
&ps_ref_layer_data->ps_mb_info[ps_mb_state->as_mb_positions[i][j].i4_abscissa +
ps_mb_state->as_mb_positions[i][j].i4_ordinate *
s_ref_frame_dims_in_mbs.i4_abscissa];
if((ps_ref_mb_info->u2_mb_type == P16x16) || (ps_ref_mb_info->u2_mb_type == B16x16))
{
ps_ilp_mv_outputs->s_ilp_me_cands.e_mb_type[i4_num_ilp_mvs] =
ps_ref_mb_info->u2_mb_type;
ps_ilp_mv_outputs->s_ilp_me_cands.ae_pred_mode[i4_num_ilp_mvs] =
ps_ref_mb_info->as_pu->u1_pred_mode;
if(ps_ilp_mv_outputs->s_ilp_me_cands.ae_pred_mode[i4_num_ilp_mvs] != L0)
{
ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L1] =
ps_ref_mb_info->as_pu->as_me_info[L1];
ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L1].s_mv.i2_mvx =
(ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L1].s_mv.i2_mvx *
ps_layer_state->s_mv_scale.i4_abscissa +
32768) >>
16;
ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L1].s_mv.i2_mvy =
(ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L1].s_mv.i2_mvy *
ps_layer_state->s_mv_scale.i4_ordinate +
32768) >>
16;
}
if(ps_ilp_mv_outputs->s_ilp_me_cands.ae_pred_mode[i4_num_ilp_mvs] != L1)
{
ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L0] =
ps_ref_mb_info->as_pu->as_me_info[L0];
ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L0].s_mv.i2_mvx =
(ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L0].s_mv.i2_mvx *
ps_layer_state->s_mv_scale.i4_abscissa +
32768) >>
16;
ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L0].s_mv.i2_mvy =
(ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L0].s_mv.i2_mvy *
ps_layer_state->s_mv_scale.i4_ordinate +
32768) >>
16;
}
if(i4_num_ilp_mvs == 0)
{
i4_num_ilp_mvs++;
}
else
{
for(k = i4_num_ilp_mvs - 1; k >= 0; k--)
{
if((ps_ilp_mv_outputs->s_ilp_me_cands.e_mb_type[k] ==
ps_ilp_mv_outputs->s_ilp_me_cands.e_mb_type[i4_num_ilp_mvs]) &&
(ps_ilp_mv_outputs->s_ilp_me_cands.ae_pred_mode[k] ==
ps_ilp_mv_outputs->s_ilp_me_cands.ae_pred_mode[i4_num_ilp_mvs]) &&
isvce_check_identical_mv(
ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[k],
ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs],
ps_ilp_mv_outputs->s_ilp_me_cands.ae_pred_mode[k]))
{
b_is_mv_non_identical = false;
}
}
if(b_is_mv_non_identical)
{
i4_num_ilp_mvs++;
}
}
}
else
{
ps_ilp_mv_outputs->s_ilp_me_cands.e_mb_type[i4_num_ilp_mvs] = INVALID_MB_TYPE;
}
}
}
ps_ilp_mv_outputs->s_ilp_me_cands.u4_num_ilp_mvs = i4_num_ilp_mvs;
for(i = 0; i < MAX_ILP_MV_IN_NBR_RGN; i++)
{
b_is_mv_non_identical = true;
ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L0] = s_default_mv;
ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L1] = s_default_mv;
if(ps_mb_pos->i4_abscissa + gai1_nbr_ilp_mv_map[i][0] >= 0 &&
ps_mb_pos->i4_abscissa + gai1_nbr_ilp_mv_map[i][0] < s_frame_dims_in_mbs.i4_abscissa &&
ps_mb_pos->i4_ordinate + gai1_nbr_ilp_mv_map[i][1] >= 0 &&
ps_mb_pos->i4_ordinate + gai1_nbr_ilp_mv_map[i][1] < s_frame_dims_in_mbs.i4_ordinate)
{
ps_mb_state =
&ps_layer_state->ps_mb_states[(ps_mb_pos->i4_abscissa + gai1_nbr_ilp_mv_map[i][0]) +
(ps_mb_pos->i4_ordinate + gai1_nbr_ilp_mv_map[i][1]) *
s_frame_dims_in_mbs.i4_abscissa];
ps_ref_mb_info =
&ps_ref_layer_data->ps_mb_info[(ps_mb_state
->as_mb_positions[gai1_nbr_ilp_mv_map[i][2]]
[gai1_nbr_ilp_mv_map[i][3]]
.i4_abscissa) +
ps_mb_state
->as_mb_positions[gai1_nbr_ilp_mv_map[i][2]]
[gai1_nbr_ilp_mv_map[i][3]]
.i4_ordinate *
s_ref_frame_dims_in_mbs.i4_abscissa];
if((ps_ref_mb_info->u2_mb_type == P16x16) || (ps_ref_mb_info->u2_mb_type == B16x16))
{
ps_ilp_mv_outputs->s_ilp_me_cands.e_mb_type[i4_num_ilp_mvs] =
ps_ref_mb_info->u2_mb_type;
ps_ilp_mv_outputs->s_ilp_me_cands.ae_pred_mode[i4_num_ilp_mvs] =
ps_ref_mb_info->as_pu->u1_pred_mode;
if(ps_ilp_mv_outputs->s_ilp_me_cands.ae_pred_mode[i4_num_ilp_mvs] != L0)
{
ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L1] =
ps_ref_mb_info->as_pu->as_me_info[L1];
ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L1].s_mv.i2_mvx =
(ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L1].s_mv.i2_mvx *
ps_layer_state->s_mv_scale.i4_abscissa +
32768) >>
16;
ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L1].s_mv.i2_mvy =
(ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L1].s_mv.i2_mvy *
ps_layer_state->s_mv_scale.i4_ordinate +
32768) >>
16;
}
if(ps_ilp_mv_outputs->s_ilp_me_cands.ae_pred_mode[i4_num_ilp_mvs] != L1)
{
ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L0] =
ps_ref_mb_info->as_pu->as_me_info[L0];
ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L0].s_mv.i2_mvx =
(ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L0].s_mv.i2_mvx *
ps_layer_state->s_mv_scale.i4_abscissa +
32768) >>
16;
ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L0].s_mv.i2_mvy =
(ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L0].s_mv.i2_mvy *
ps_layer_state->s_mv_scale.i4_ordinate +
32768) >>
16;
}
if(i4_num_ilp_mvs == 0)
{
i4_num_ilp_mvs++;
}
else
{
for(k = i4_num_ilp_mvs - 1; k >= 0; k--)
{
if((ps_ilp_mv_outputs->s_ilp_me_cands.e_mb_type[k] ==
ps_ilp_mv_outputs->s_ilp_me_cands.e_mb_type[i4_num_ilp_mvs]) &&
(ps_ilp_mv_outputs->s_ilp_me_cands.ae_pred_mode[k] ==
ps_ilp_mv_outputs->s_ilp_me_cands.ae_pred_mode[i4_num_ilp_mvs]) &&
isvce_check_identical_mv(
ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[k],
ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs],
ps_ilp_mv_outputs->s_ilp_me_cands.ae_pred_mode[k]))
b_is_mv_non_identical = false;
}
if(b_is_mv_non_identical)
{
i4_num_ilp_mvs++;
}
}
}
else
{
ps_ilp_mv_outputs->s_ilp_me_cands.e_mb_type[i4_num_ilp_mvs] = INVALID_MB_TYPE;
}
}
}
ps_ilp_mv_outputs->s_ilp_me_cands.u4_num_ilp_mvs_incl_nbrs = i4_num_ilp_mvs;
}
void isvce_get_mb_ilp_mv(svc_ilp_mv_ctxt_t *ps_ilp_mv_ctxt)
{
svc_layer_data_t *ps_ref_layer_data;
ilp_mv_layer_state_t *ps_layer_state;
ilp_mv_mb_state_t *ps_mb_state;
isvce_mb_info_t *ps_ref_mb_info;
coordinates_t s_frame_dims;
coordinates_t s_frame_dims_in_mbs;
coordinates_t s_ref_frame_dims;
coordinates_t s_ref_frame_dims_in_mbs;
WORD32 i, j;
ilp_mv_constants_t *ps_ilp_mv_constants = &ps_ilp_mv_ctxt->s_ilp_mv_constants;
ilp_mv_variables_t *ps_ilp_mv_variables = &ps_ilp_mv_ctxt->s_ilp_mv_variables;
ilp_mv_outputs_t *ps_ilp_mv_outputs = &ps_ilp_mv_ctxt->s_ilp_mv_outputs;
ilp_mv_state_t *ps_ilp_mv_state = (ilp_mv_state_t *) ps_ilp_mv_constants->pv_state;
svc_ilp_data_t *ps_svc_ilp_data = ps_ilp_mv_variables->ps_svc_ilp_data;
svc_au_data_t *ps_svc_au_data = ps_svc_ilp_data->ps_svc_au_data;
coordinates_t *ps_mb_pos = &ps_ilp_mv_variables->s_mb_pos;
const isvce_enc_pu_mv_t s_default_mv = {{0, 0}, -1};
UWORD8 u1_spatial_layer_id = ps_ilp_mv_variables->u1_spatial_layer_id;
s_frame_dims.i4_abscissa = ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id].u4_width;
s_frame_dims.i4_ordinate = ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id].u4_height;
s_frame_dims_in_mbs.i4_abscissa = s_frame_dims.i4_abscissa / MB_SIZE;
s_frame_dims_in_mbs.i4_ordinate = s_frame_dims.i4_ordinate / MB_SIZE;
s_ref_frame_dims.i4_abscissa =
ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id - 1].u4_width;
s_ref_frame_dims.i4_ordinate =
ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id - 1].u4_height;
s_ref_frame_dims_in_mbs.i4_abscissa = s_ref_frame_dims.i4_abscissa / MB_SIZE;
s_ref_frame_dims_in_mbs.i4_ordinate = s_ref_frame_dims.i4_ordinate / MB_SIZE;
ps_ref_layer_data = &ps_svc_au_data->ps_svc_layer_data[u1_spatial_layer_id - 1];
ps_layer_state = &ps_ilp_mv_state->ps_layer_state[u1_spatial_layer_id];
ps_mb_state =
&ps_layer_state->ps_mb_states[ps_mb_pos->i4_abscissa +
ps_mb_pos->i4_ordinate * s_frame_dims_in_mbs.i4_abscissa];
ps_ilp_mv_outputs->s_ilp_mv.as_mv[0][L0] = s_default_mv;
ps_ilp_mv_outputs->s_ilp_mv.as_mv[0][L1] = s_default_mv;
ps_ref_mb_info = &ps_ref_layer_data->ps_mb_info[ps_mb_state->as_mb_positions[0][0].i4_abscissa +
ps_mb_state->as_mb_positions[0][0].i4_ordinate *
s_ref_frame_dims_in_mbs.i4_abscissa];
if((ps_ref_mb_info->u2_mb_type == P16x16) || (ps_ref_mb_info->u2_mb_type == B16x16))
{
ps_ilp_mv_outputs->s_ilp_mv.e_mb_type = ps_ref_mb_info->u2_mb_type;
ps_ilp_mv_outputs->s_ilp_mv.ae_pred_mode[0] = ps_ref_mb_info->as_pu->u1_pred_mode;
if(ps_ilp_mv_outputs->s_ilp_mv.ae_pred_mode[0] != L0)
{
ps_ilp_mv_outputs->s_ilp_mv.as_mv[0][L1] = ps_ref_mb_info->as_pu->as_me_info[L1];
}
if(ps_ilp_mv_outputs->s_ilp_mv.ae_pred_mode[0] != L1)
{
ps_ilp_mv_outputs->s_ilp_mv.as_mv[0][L0] = ps_ref_mb_info->as_pu->as_me_info[L0];
}
}
else
{
ps_ilp_mv_outputs->s_ilp_mv.e_mb_type = INVALID_MB_TYPE;
}
/* Function call to get non 16x16 ilp mvs for me candidates */
isvce_get_ilp_mvs_for_me(ps_ilp_mv_ctxt);
/* Encoder supports only 16x16 partition. */
/* The code below ensures only 16x16 ILP MV's are used */
for(i = 0; i < MAX_PU_IN_MB_COL; i++)
{
for(j = 0; j < MAX_PU_IN_MB_ROW; j++)
{
bool b_unsupported_mv;
ps_ref_mb_info =
&ps_ref_layer_data->ps_mb_info[ps_mb_state->as_mb_positions[i][j].i4_abscissa +
ps_mb_state->as_mb_positions[i][j].i4_ordinate *
s_ref_frame_dims_in_mbs.i4_abscissa];
b_unsupported_mv =
(ps_ref_mb_info->u2_mb_type != ps_ilp_mv_outputs->s_ilp_mv.e_mb_type) ||
(ps_ilp_mv_outputs->s_ilp_mv.ae_pred_mode[0] !=
ps_ref_mb_info->as_pu->u1_pred_mode) ||
!isvce_check_identical_mv(ps_ilp_mv_outputs->s_ilp_mv.as_mv[0],
ps_ref_mb_info->as_pu->as_me_info,
ps_ilp_mv_outputs->s_ilp_mv.ae_pred_mode[0]);
if(b_unsupported_mv)
{
ps_ilp_mv_outputs->s_ilp_mv.as_mv[0][L0] = s_default_mv;
ps_ilp_mv_outputs->s_ilp_mv.as_mv[0][L1] = s_default_mv;
ps_ilp_mv_outputs->s_ilp_mv.e_mb_type = INVALID_MB_TYPE;
return;
}
}
}
if(ps_ilp_mv_outputs->s_ilp_mv.e_mb_type != INVALID_MB_TYPE)
{
if(ps_ilp_mv_outputs->s_ilp_mv.ae_pred_mode[0] != L0)
{
ps_ilp_mv_outputs->s_ilp_mv.as_mv[0][L1].s_mv.i2_mvx =
(ps_ilp_mv_outputs->s_ilp_mv.as_mv[0][L1].s_mv.i2_mvx *
ps_layer_state->s_mv_scale.i4_abscissa +
32768) >>
16;
ps_ilp_mv_outputs->s_ilp_mv.as_mv[0][L1].s_mv.i2_mvy =
(ps_ilp_mv_outputs->s_ilp_mv.as_mv[0][L1].s_mv.i2_mvy *
ps_layer_state->s_mv_scale.i4_ordinate +
32768) >>
16;
}
if(ps_ilp_mv_outputs->s_ilp_mv.ae_pred_mode[0] != L1)
{
ps_ilp_mv_outputs->s_ilp_mv.as_mv[0][L0].s_mv.i2_mvx =
(ps_ilp_mv_outputs->s_ilp_mv.as_mv[0][L0].s_mv.i2_mvx *
ps_layer_state->s_mv_scale.i4_abscissa +
32768) >>
16;
ps_ilp_mv_outputs->s_ilp_mv.as_mv[0][L0].s_mv.i2_mvy =
(ps_ilp_mv_outputs->s_ilp_mv.as_mv[0][L0].s_mv.i2_mvy *
ps_layer_state->s_mv_scale.i4_ordinate +
32768) >>
16;
}
}
else
{
ps_ilp_mv_outputs->s_ilp_mv.e_mb_type = INVALID_MB_TYPE;
ps_ilp_mv_outputs->s_ilp_mv.ae_pred_mode[0] = INVALID_PRED_MODE;
}
}
void isvce_mvp_idx_eval(isvce_mb_info_t *ps_mb_info, isvce_enc_pu_mv_t *ps_spatial_mvp,
isvce_enc_pu_mv_t *ps_ilp_mvp, UWORD8 *pu1_mvd_costs)
{
if(USE_ILP_MV_AS_MVP && ps_ilp_mvp && !ps_mb_info->u1_is_intra &&
(ps_mb_info->u2_mb_type != PSKIP) && (ps_mb_info->u2_mb_type != BSKIP) &&
(ps_mb_info->u2_mb_type != BASE_MODE))
{
isvce_enc_pu_mv_t *ps_mv;
isvce_enc_pu_mv_t *aps_mvps[2];
WORD32 ai4_mvd_costs[2];
WORD32 i, j;
for(i = 0; i < NUM_PRED_DIRS; i++)
{
PRED_MODE_T e_pred_mode = (PRED_MODE_T) i;
PRED_MODE_T e_cmpl_pred_mode = (e_pred_mode == L0) ? L1 : L0;
if(ps_mb_info->as_pu->u1_pred_mode != e_pred_mode)
{
ps_mv = &ps_mb_info->as_pu->as_me_info[e_cmpl_pred_mode];
aps_mvps[0] = &ps_spatial_mvp[e_cmpl_pred_mode];
aps_mvps[1] = &ps_ilp_mvp[e_cmpl_pred_mode];
for(j = 0; j < 2; j++)
{
if((aps_mvps[j]->i1_ref_idx != -1) &&
(!j || ((j == 1) && (ps_mv->i1_ref_idx == aps_mvps[j]->i1_ref_idx))))
{
ai4_mvd_costs[j] =
pu1_mvd_costs[ps_mv->s_mv.i2_mvx - aps_mvps[j]->s_mv.i2_mvx] +
pu1_mvd_costs[ps_mv->s_mv.i2_mvy - aps_mvps[j]->s_mv.i2_mvy];
}
else
{
ai4_mvd_costs[j] = INT32_MAX;
}
}
ps_mb_info->as_pu->au1_mvp_idx[e_cmpl_pred_mode] =
ai4_mvd_costs[0] > ai4_mvd_costs[1];
}
else
{
ps_mb_info->as_pu->au1_mvp_idx[e_cmpl_pred_mode] = 0;
}
}
}
else
{
ps_mb_info->as_pu->au1_mvp_idx[L0] = 0;
ps_mb_info->as_pu->au1_mvp_idx[L1] = 0;
}
}

115
encoder/svc/isvce_ilp_mv.h Normal file
View file

@ -0,0 +1,115 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
*******************************************************************************
* @file
* isvce_ilp_mv.h
*
* @brief
* Contains function declarations for function declared in
* isvce_ilp_mv.c
*
* @author
* ittiam
*
* @remarks
* None
*
*******************************************************************************
*/
#ifndef _ISVCE_ILP_MV_H_
#define _ISVCE_ILP_MV_H_
#include "ih264_typedefs.h"
#include "iv2.h"
#include "isvc_macros.h"
#include "ih264_debug.h"
#include "isvc_defs.h"
#include "isvc_structs.h"
#include "isvce_defs.h"
#include "isvce_pred_structs.h"
#include "isvce_structs.h"
#include "isvce_structs.h"
#include "isvce_utils.h"
/* Structs */
typedef struct ilp_mv_constants_t
{
void *pv_state;
} ilp_mv_constants_t;
typedef struct ilp_mv_outputs_t
{
ilp_mv_t s_ilp_mv;
ilp_me_cands_t s_ilp_me_cands;
} ilp_mv_outputs_t;
typedef struct ilp_mv_variables_t
{
svc_ilp_data_t *ps_svc_ilp_data;
coordinates_t s_mb_pos;
UWORD8 u1_spatial_layer_id;
} ilp_mv_variables_t;
typedef struct svc_ilp_mv_ctxt_t
{
ilp_mv_constants_t s_ilp_mv_constants;
ilp_mv_variables_t s_ilp_mv_variables;
ilp_mv_outputs_t s_ilp_mv_outputs;
} svc_ilp_mv_ctxt_t;
/* Function declarations */
extern UWORD32 isvce_get_ilp_mv_ctxt_size(UWORD8 u1_num_spatial_layers, DOUBLE d_spatial_res_ratio,
UWORD32 u4_wd, UWORD32 u4_ht);
extern void isvce_ilp_mv_ctxt_init(isvce_codec_t *ps_codec, iv_mem_rec_t *ps_mem_rec);
extern void isvce_get_mb_ilp_mv(svc_ilp_mv_ctxt_t *ps_ilp_mv_ctxt);
extern void isvce_mvp_idx_eval(isvce_mb_info_t *ps_mb_info, isvce_enc_pu_mv_t *ps_spatial_mvp,
isvce_enc_pu_mv_t *ps_ilp_mvp, UWORD8 *pu1_mvd_costs);
static FORCEINLINE UWORD8 isvce_is_ilp_mv_winning_mv(isvce_mb_info_t *ps_mb_info,
ilp_mv_t *ps_ilp_mv)
{
if(ENABLE_ILP_MV && ps_ilp_mv && (ps_mb_info->u2_mb_type != PSKIP) &&
(ps_mb_info->u2_mb_type != BSKIP))
{
if((ps_mb_info->u2_mb_type == ps_ilp_mv->e_mb_type) &&
(((PRED_MODE_T) ps_mb_info->as_pu->u1_pred_mode) == ps_ilp_mv->ae_pred_mode[0]))
{
return isvce_check_identical_mv(ps_mb_info->as_pu->as_me_info, ps_ilp_mv->as_mv[0],
ps_ilp_mv->ae_pred_mode[0]);
}
}
return 0;
}
#endif

View file

@ -0,0 +1,68 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
*******************************************************************************
* @file
* isvc_svc_ilp_mv_private_defs.h
*
* @brief
* Contains datatype and macro definitions used exclusively in
* ILP MV derivations
*
*******************************************************************************
*/
#ifndef _ISVCE_ILP_MV_PRIVATE_DEFS_H_
#define _ISVCE_ILP_MV_PRIVATE_DEFS_H_
#include "ih264_typedefs.h"
#include "isvc_defs.h"
#include "isvc_structs.h"
#include "isvce_structs.h"
/* Structs */
/* Offsets, etc used for resLayer MV upsampling */
/* Derived as per 'G.8.6.1.1' for all MB's once during init */
typedef struct ilp_mv_mb_state_t
{
coordinates_t as_pu_positions[MAX_PU_IN_MB_COL][MAX_PU_IN_MB_ROW];
coordinates_t as_mb_positions[MAX_PU_IN_MB_COL][MAX_PU_IN_MB_ROW];
} ilp_mv_mb_state_t;
typedef struct ilp_mv_layer_state_t
{
layer_resampler_props_t *ps_props;
ilp_mv_mb_state_t *ps_mb_states;
coordinates_t s_mv_scale;
} ilp_mv_layer_state_t;
typedef struct ilp_mv_state_t
{
/* Array of size numSpatialLayers */
ilp_mv_layer_state_t *ps_layer_state;
} ilp_mv_state_t;
#endif

View file

@ -0,0 +1,111 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
*******************************************************************************
* @file
* isvce_ilp_mv_utils.h
*
* @brief
* Defs to perform experiments in ilp mv
*
*
* @remarks
* None
*
*******************************************************************************
*/
#ifndef _ISVCE_ILP_MV_UTILS_H_
#define _ISVCE_ILP_MV_UTILS_H_
#include <stdbool.h>
#include "ih264_typedefs.h"
#include "isvc_defs.h"
#include "isvc_macros.h"
#include "isvce_pred_structs.h"
#include "isvce_structs.h"
#define MAX_CAND_IF_NUM_ILP_MV_LT_2 8
#define MAX_CAND_IF_NUM_ILP_MV_GTEQ_2 6
/* nbr_mb.x, nbr_mb.y, pu_pos.x, pu_pos.y */
#define NBR_PU_AND_MB_POS 4
static const WORD8 gai1_nbr_ilp_mv_map[MAX_ILP_MV_IN_NBR_RGN][NBR_PU_AND_MB_POS] = {
{-1, 0, 3, 0},
{0, -1, 0, 3},
{1, 0, 0, 0},
{0, 1, 0, 0},
};
/**
*******************************************************************************
*
* @brief
* This function checks if the max difference between ILP MVs is less than four
* or not if number of ILP MVs is greater than or equal to two
*
* @param[in] ps_me
* Pointer to ilp_me_cands
*
* @returns One if number of ILP MVs is greater than equal to two and max
* difference between them is less than 4 otherwise returns zero
*
* @remarks none
*
*******************************************************************************
*/
static FORCEINLINE bool isvce_check_max_mv_diff_lt_4(ilp_me_cands_t *ps_ilp_me_cands,
WORD32 i4_reflist)
{
UWORD32 i, j;
UWORD32 u4_mv_diff_x, u4_mv_diff_y;
for(i = 1; i < ps_ilp_me_cands->u4_num_ilp_mvs; i++)
{
for(j = 0; j < i; j++)
{
if(((ps_ilp_me_cands->ae_pred_mode[i] == ((PRED_MODE_T) i4_reflist)) ||
((ps_ilp_me_cands->ae_pred_mode[i] == BI))) &&
((ps_ilp_me_cands->ae_pred_mode[j] == ((PRED_MODE_T) i4_reflist)) ||
((ps_ilp_me_cands->ae_pred_mode[j] == BI))))
{
u4_mv_diff_x = ABS(ps_ilp_me_cands->as_mv[i][i4_reflist].s_mv.i2_mvx -
ps_ilp_me_cands->as_mv[j][i4_reflist].s_mv.i2_mvx);
u4_mv_diff_y = ABS(ps_ilp_me_cands->as_mv[i][i4_reflist].s_mv.i2_mvy -
ps_ilp_me_cands->as_mv[j][i4_reflist].s_mv.i2_mvy);
if(u4_mv_diff_x >= 4 || u4_mv_diff_y >= 4)
{
return false;
}
}
else
{
return false;
}
}
}
return true;
}
#endif

View file

@ -0,0 +1,116 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
*******************************************************************************
* @file
* isvce_interface_structs.h
*
* @brief
* Contains struct definition used for interface objects such as input,
* output, and rec
*
* @author
* ittiam
*
* @remarks
* None
*
*******************************************************************************
*/
#ifndef _ISVCE_INTERFACE_STRUCTS_H_
#define _ISVCE_INTERFACE_STRUCTS_H_
#include "isvc_structs.h"
typedef struct isvce_raw_inp_buf_t
{
/** Descriptor of raw buffer */
iv_raw_buf_t s_raw_buf;
/** Lower 32bits of time stamp corresponding to the above buffer */
UWORD32 u4_timestamp_low;
/** Upper 32bits of time stamp corresponding to the above buffer */
UWORD32 u4_timestamp_high;
/** Flag to indicate if the current buffer is last buffer */
UWORD32 u4_is_last;
/** Flag to indicate if mb info is sent along with input buffer */
UWORD32 u4_mb_info_type;
/** Flag to indicate the size of mb info structure */
UWORD32 u4_mb_info_size;
/** Buffer containing mb info if isvce_mb_info_type is non-zero */
void *pv_mb_info;
/** Flag to indicate if pic info is sent along with input buffer */
UWORD32 u4_pic_info_type;
/** Buffer containing pic info if isvce_mb_info_type is non-zero */
void *pv_pic_info;
/** SEI CCV params flag */
UWORD8 u1_sei_ccv_params_present_flag;
/** SEI CCV params info */
sei_ccv_params_t s_sei_ccv;
} isvce_raw_inp_buf_t;
typedef struct
{
/** Descriptor of bitstream buffer */
iv_bits_buf_t as_bits_buf[MAX_NUM_SPATIAL_LAYERS];
/** Lower 32bits of time stamp corresponding to the above buffer */
UWORD32 u4_timestamp_low;
/** Upper 32bits of time stamp corresponding to the above buffer */
UWORD32 u4_timestamp_high;
/** Flag to indicate if the current buffer is last buffer */
UWORD32 u4_is_last;
} isvce_out_buf_t;
typedef struct
{
/** Descriptor of picture buffer */
svc_au_buf_t s_pic_buf;
/** Lower 32bits of time stamp corresponding to the above buffer */
UWORD32 u4_timestamp_low;
/** Upper 32bits of time stamp corresponding to the above buffer */
UWORD32 u4_timestamp_high;
/** Flag to indicate if the current buffer is last buffer */
UWORD32 u4_is_last;
/** Picture count corresponding to current picture */
WORD32 i4_pic_cnt;
} isvce_rec_buf_t;
#endif

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,361 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
*******************************************************************************
* @file
* isvce_intra_modes_eval.h
*
* @brief
* This file contains declarations of routines that perform rate distortion
* analysis on a macroblock if coded as intra.
*
* @author
* ittiam
*
* @remarks
* none
*
*******************************************************************************
*/
#ifndef _ISVCE_INTRA_MODES_EVAL_H_
#define _ISVCE_INTRA_MODES_EVAL_H_
/**
******************************************************************************
*
* @brief
* derivation process for subblock/partition availability
*
* @par Description
* Calculates the availability of the left, top, topright and topleft subblock
* or partitions.
*
* @param[in] ps_proc_ctxt
* pointer to macroblock context (handle)
*
* @param[in] i1_pel_pos_x
* column position of the pel wrt the current block
*
* @param[in] i1_pel_pos_y
* row position of the pel in wrt current block
*
* @remarks Assumptions: before calling this function it is assumed that
* the neighbor availability of the current macroblock is already derived.
* Based on table 6-3 of H264 specification
*
* @return availability status (yes or no)
*
******************************************************************************
*/
UWORD8 isvce_derive_ngbr_avbl_of_mb_partitions(block_neighbors_t *s_ngbr_avbl, WORD8 i1_pel_pos_x,
WORD8 i1_pel_pos_y);
/**
******************************************************************************
*
* @brief
* evaluate best intra 16x16 mode (rate distortion opt off)
*
* @par Description
* This function evaluates all the possible intra 16x16 modes and finds the mode
* that best represents the macro-block (least distortion) and occupies fewer
* bits in the bit-stream.
*
* @param[in] ps_proc_ctxt
* pointer to process context (handle)
*
* @remarks
* Ideally the cost of encoding a macroblock is calculated as
* (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
* input block and the reconstructed block and rate is the number of bits taken
* to place the macroblock in the bit-stream. In this routine the rate does not
* exactly point to the total number of bits it takes, rather it points to header
* bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits
* and residual bits fall in to texture bits the number of bits taken to encoding
* mbtype is considered as rate, we compute cost. Further we will approximate
* the distortion as the deviation b/w input and the predicted block as opposed
* to input and reconstructed block.
*
* NOTE: As per the Document JVT-O079, for intra 16x16 macroblock,
* the SAD and cost are one and the same.
*
* @return none
*
******************************************************************************
*/
void isvce_evaluate_intra16x16_modes_for_least_cost_rdoptoff(isvce_process_ctxt_t *ps_proc_ctxt);
/**
******************************************************************************
*
* @brief
* evaluate best intra 8x8 mode (rate distortion opt on)
*
* @par Description
* This function evaluates all the possible intra 8x8 modes and finds the mode
* that best represents the macro-block (least distortion) and occupies fewer
* bits in the bit-stream.
*
* @param[in] ps_proc_ctxt
* pointer to proc ctxt
*
* @remarks Ideally the cost of encoding a macroblock is calculated as
* (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
* input block and the reconstructed block and rate is the number of bits taken
* to place the macroblock in the bit-stream. In this routine the rate does not
* exactly point to the total number of bits it takes, rather it points to header
* bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits
* and residual bits fall in to texture bits the number of bits taken to encoding
* mbtype is considered as rate, we compute cost. Further we will approximate
* the distortion as the deviation b/w input and the predicted block as opposed
* to input and reconstructed block.
*
* NOTE: TODO: This function needs to be tested
*
* @return none
*
******************************************************************************
*/
void isvce_evaluate_intra8x8_modes_for_least_cost_rdoptoff(isvce_process_ctxt_t *ps_proc_ctxt);
/**
******************************************************************************
*
* @brief
* evaluate best intra 4x4 mode (rate distortion opt on)
*
* @par Description
* This function evaluates all the possible intra 4x4 modes and finds the mode
* that best represents the macro-block (least distortion) and occupies fewer
* bits in the bit-stream.
*
* @param[in] ps_proc_ctxt
* pointer to proc ctxt
*
* @remarks
* Ideally the cost of encoding a macroblock is calculated as
* (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
* input block and the reconstructed block and rate is the number of bits taken
* to place the macroblock in the bit-stream. In this routine the rate does not
* exactly point to the total number of bits it takes, rather it points to header
* bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits
* and residual bits fall in to texture bits the number of bits taken to encoding
* mbtype is considered as rate, we compute cost. Further we will approximate
* the distortion as the deviation b/w input and the predicted block as opposed
* to input and reconstructed block.
*
* NOTE: As per the Document JVT-O079, for the whole intra 4x4 macroblock,
* 24*lambda is added to the SAD before comparison with the best SAD for
* inter prediction. This is an empirical value to prevent using too many intra
* blocks.
*
* @return none
*
******************************************************************************
*/
void isvce_evaluate_intra4x4_modes_for_least_cost_rdopton(isvce_process_ctxt_t *ps_proc_ctxt);
/**
******************************************************************************
*
* @brief
* evaluate best intra 4x4 mode (rate distortion opt off)
*
* @par Description
* This function evaluates all the possible intra 4x4 modes and finds the mode
* that best represents the macro-block (least distortion) and occupies fewer
* bits in the bit-stream.
*
* @param[in] ps_proc_ctxt
* pointer to proc ctxt
*
* @remarks
* Ideally the cost of encoding a macroblock is calculated as
* (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
* input block and the reconstructed block and rate is the number of bits taken
* to place the macroblock in the bit-stream. In this routine the rate does not
* exactly point to the total number of bits it takes, rather it points to header
* bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits
* and residual bits fall in to texture bits the number of bits taken to encoding
* mbtype is considered as rate, we compute cost. Further we will approximate
* the distortion as the deviation b/w input and the predicted block as opposed
* to input and reconstructed block.
*
* NOTE: As per the Document JVT-O079, for the whole intra 4x4 macroblock,
* 24*lambda is added to the SAD before comparison with the best SAD for
* inter prediction. This is an empirical value to prevent using too many intra
* blocks.
*
* @return none
*
******************************************************************************
*/
void isvce_evaluate_intra4x4_modes_for_least_cost_rdoptoff(isvce_process_ctxt_t *ps_proc_ctxt);
/**
******************************************************************************
*
* @brief
* evaluate best chroma intra 8x8 mode (rate distortion opt off)
*
* @par Description
* This function evaluates all the possible chroma intra 8x8 modes and finds
* the mode that best represents the macroblock (least distortion) and occupies
* fewer bits in the bitstream.
*
* @param[in] ps_proc_ctxt
* pointer to macroblock context (handle)
*
* @remarks
* For chroma best intra pred mode is calculated based only on SAD
*
* @returns none
*
******************************************************************************
*/
void isvce_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff(
isvce_process_ctxt_t *ps_proc_ctxt);
/**
******************************************************************************
*
* @brief
* Evaluate best intra 16x16 mode (among VERT, HORZ and DC) and do the
* prediction.
*
* @par Description
* This function evaluates first three 16x16 modes and compute corresponding sad
* and return the buffer predicted with best mode.
*
* @param[in] pu1_src
* UWORD8 pointer to the source
*
* @param[in] pu1_ngbr_pels_i16
* UWORD8 pointer to neighbouring pels
*
* @param[out] pu1_dst
* UWORD8 pointer to the destination
*
* @param[in] src_strd
* integer source stride
*
* @param[in] dst_strd
* integer destination stride
*
* @param[in] u4_n_avblty
* availability of neighbouring pixels
*
* @param[in] u4_intra_mode
* Pointer to the variable in which best mode is returned
*
* @param[in] pu4_sadmin
* Pointer to the variable in which minimum sad is returned
*
* @param[in] u4_valid_intra_modes
* Says what all modes are valid
*
* @returns none
*
******************************************************************************
*/
typedef void isvce_evaluate_intra_modes_ft(UWORD8 *pu1_src, UWORD8 *pu1_ngbr_pels_i16,
UWORD8 *pu1_dst, UWORD32 src_strd, UWORD32 dst_strd,
WORD32 u4_n_avblty, UWORD32 *u4_intra_mode,
WORD32 *pu4_sadmin, UWORD32 u4_valid_intra_modes);
isvce_evaluate_intra_modes_ft isvce_evaluate_intra16x16_modes;
isvce_evaluate_intra_modes_ft isvce_evaluate_intra_chroma_modes;
/* assembly */
isvce_evaluate_intra_modes_ft isvce_evaluate_intra16x16_modes_a9q;
isvce_evaluate_intra_modes_ft isvce_evaluate_intra_chroma_modes_a9q;
isvce_evaluate_intra_modes_ft isvce_evaluate_intra16x16_modes_av8;
isvce_evaluate_intra_modes_ft isvce_evaluate_intra_chroma_modes_av8;
/* x86 intrinsics */
isvce_evaluate_intra_modes_ft isvce_evaluate_intra16x16_modes_ssse3;
isvce_evaluate_intra_modes_ft isvce_evaluate_intra_chroma_modes_ssse3;
/**
******************************************************************************
*
* @brief
* Evaluate best intra 4x4 mode and perform prediction.
*
* @par Description
* This function evaluates 4x4 modes and compute corresponding sad
* and return the buffer predicted with best mode.
*
* @param[in] pu1_src
* UWORD8 pointer to the source
*
* @param[in] pu1_ngbr_pels
* UWORD8 pointer to neighbouring pels
*
* @param[out] pu1_dst
* UWORD8 pointer to the destination
*
* @param[in] src_strd
* integer source stride
*
* @param[in] dst_strd
* integer destination stride
*
* @param[in] u4_n_avblty
* availability of neighbouring pixels
*
* @param[in] u4_intra_mode
* Pointer to the variable in which best mode is returned
*
* @param[in] pu4_sadmin
* Pointer to the variable in which minimum cost is returned
*
* @param[in] u4_valid_intra_modes
* Says what all modes are valid
*
* @param[in] u4_lambda
* Lamda value for computing cost from SAD
*
* @param[in] u4_predictd_mode
* Predicted mode for cost computation
*
* @returns none
*
******************************************************************************
*/
typedef void isvce_evaluate_intra_4x4_modes_ft(UWORD8 *pu1_src, UWORD8 *pu1_ngbr_pels,
UWORD8 *pu1_dst, UWORD32 src_strd, UWORD32 dst_strd,
WORD32 u4_n_avblty, UWORD32 *u4_intra_mode,
WORD32 *pu4_sadmin, UWORD32 u4_valid_intra_modes,
UWORD32 u4_lambda, UWORD32 u4_predictd_mode);
isvce_evaluate_intra_4x4_modes_ft isvce_evaluate_intra_4x4_modes;
/* x86 intrinsics */
isvce_evaluate_intra_4x4_modes_ft isvce_evaluate_intra_4x4_modes_ssse3;
/* assembly */
isvce_evaluate_intra_4x4_modes_ft isvce_evaluate_intra_4x4_modes_a9q;
isvce_evaluate_intra_4x4_modes_ft isvce_evaluate_intra_4x4_modes_av8;
#endif

480
encoder/svc/isvce_mc.c Normal file
View file

@ -0,0 +1,480 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
*******************************************************************************
* @file
* isvce_mc.c
*
* @brief
* Contains definition of functions for motion compensation
*
* @author
* ittiam
*
* @par List of Functions:
* - isvce_motion_comp_luma()
* - isvce_motion_comp_chroma()
*
* @remarks
* None
*
*******************************************************************************
*/
/*****************************************************************************/
/* File Includes */
/*****************************************************************************/
/* System include files */
#include <stdio.h>
/* User include files */
#include "ih264_typedefs.h"
#include "ih264_debug.h"
#include "isvc_defs.h"
#include "iv2.h"
#include "ive2.h"
#include "ime_distortion_metrics.h"
#include "ime_defs.h"
#include "ime_structs.h"
#include "isvc_structs.h"
#include "isvc_inter_pred_filters.h"
#include "isvc_mem_fns.h"
#include "ih264_padding.h"
#include "ih264_intra_pred_filters.h"
#include "ih264_deblk_edge_filters.h"
#include "isvc_trans_quant_itrans_iquant.h"
#include "isvc_cabac_tables.h"
#include "isvce_defs.h"
#include "ih264e_error.h"
#include "ih264e_bitstream.h"
#include "irc_cntrl_param.h"
#include "irc_frame_info_collector.h"
#include "isvce_rate_control.h"
#include "isvce_cabac_structs.h"
#include "isvce_structs.h"
#include "isvce_mc.h"
#include "ih264e_half_pel.h"
#include "isvce_ibl_eval.h"
/*****************************************************************************/
/* Function Definitions */
/*****************************************************************************/
/**
******************************************************************************
*
* @brief
* performs motion compensation for a luma mb for the given mv.
*
* @par Description
* This routine performs motion compensation of an inter mb. When the inter
* mb mode is P16x16, there is no need to copy 16x16 unit from reference buffer
* to pred buffer. In this case the function returns pointer and stride of the
* ref. buffer and this info is used in place of pred buffer else where.
* In other cases, the pred buffer is populated via copy / filtering + copy
* (q pel cases) and returned.
*
* @param[in] ps_proc
* pointer to current proc ctxt
*
* @return none
*
* @remarks Assumes half pel buffers for the entire frame are populated.
*
******************************************************************************
*/
void isvce_motion_comp_luma(isvce_process_ctxt_t *ps_proc, buffer_container_t *ps_pred)
{
/* codec context */
isvce_codec_t *ps_codec = ps_proc->ps_codec;
/* me ctxt */
isvce_me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt;
isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns;
inter_pred_fxns_t *ps_inter_pred_fxns = &ps_isa_dependent_fxns->s_inter_pred_fxns;
/* Pointer to the structure having motion vectors, size and position of curr
* partitions */
isvce_enc_pu_t *ps_curr_pu;
/* pointers to full pel, half pel x, half pel y, half pel xy reference buffer
*/
UWORD8 *pu1_ref[4];
/* pred buffer ptr */
UWORD8 *pu1_pred;
/* strides of full pel, half pel x, half pel y, half pel xy reference buffer
*/
WORD32 i4_ref_strd[4];
/* pred buffer stride */
WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
/* full pel motion vectors */
WORD32 u4_mv_x_full, u4_mv_y_full;
/* half pel motion vectors */
WORD32 u4_mv_x_hpel, u4_mv_y_hpel;
/* quarter pel motion vectors */
WORD32 u4_mv_x_qpel, u4_mv_y_qpel;
/* width & height of the partition */
UWORD32 wd, ht;
/* partition idx */
UWORD32 u4_num_prtn;
/* half / qpel coefficient */
UWORD32 u4_subpel_factor;
/* BIPRED Flag */
WORD32 i4_bipred_flag;
/* temp var */
UWORD32 u4_lkup_idx1;
if((ps_proc->ps_mb_info->u2_mb_type == BASE_MODE) && ps_proc->ps_mb_info->u1_is_intra)
{
svc_intra_pred_ctxt_t *ps_intra_pred_ctxt = ps_proc->ps_intra_pred_ctxt;
ps_pred->pv_data =
(UWORD8 *) (ps_intra_pred_ctxt->s_intra_pred_outputs.s_pred_buf.as_component_bufs[Y]
.pv_data);
ps_pred->i4_data_stride =
ps_intra_pred_ctxt->s_intra_pred_outputs.s_pred_buf.as_component_bufs[Y].i4_data_stride;
return;
}
/* Init */
i4_ref_strd[0] = ps_proc->as_ref_buf_props[0].as_component_bufs[0].i4_data_stride;
i4_ref_strd[1] = i4_ref_strd[2] = i4_ref_strd[3] = ps_me_ctxt->u4_subpel_buf_strd;
for(u4_num_prtn = 0; u4_num_prtn < ps_proc->u4_num_sub_partitions; u4_num_prtn++)
{
mv_t *ps_curr_mv;
/* update ptr to curr partition */
ps_curr_pu = ps_proc->ps_mb_info->as_pu + u4_num_prtn;
/* Set no no bipred */
i4_bipred_flag = 0;
switch(ps_curr_pu->u1_pred_mode)
{
case PRED_L0:
ps_curr_mv = &ps_curr_pu->as_me_info[0].s_mv;
pu1_ref[0] = ps_proc->as_ref_buf_props[0].as_component_bufs[0].pv_data;
break;
case PRED_L1:
ps_curr_mv = &ps_curr_pu->as_me_info[1].s_mv;
pu1_ref[0] = ps_proc->as_ref_buf_props[1].as_component_bufs[0].pv_data;
break;
case PRED_BI:
/*
* In case of PRED_BI, we only need to ensure that
* the reference buffer that gets selected is
* ps_proc->pu1_best_subpel_buf
*/
/* Dummy */
ps_curr_mv = &ps_curr_pu->as_me_info[0].s_mv;
pu1_ref[0] = ps_proc->as_ref_buf_props[0].as_component_bufs[0].pv_data;
i4_bipred_flag = 1;
break;
default:
ps_curr_mv = &ps_curr_pu->as_me_info[0].s_mv;
pu1_ref[0] = ps_proc->as_ref_buf_props[0].as_component_bufs[0].pv_data;
break;
}
/* get full pel mv's (full pel units) */
u4_mv_x_full = ps_curr_mv->i2_mvx >> 2;
u4_mv_y_full = ps_curr_mv->i2_mvy >> 2;
/* get half pel mv's */
u4_mv_x_hpel = (ps_curr_mv->i2_mvx & 0x2) >> 1;
u4_mv_y_hpel = (ps_curr_mv->i2_mvy & 0x2) >> 1;
/* get quarter pel mv's */
u4_mv_x_qpel = (ps_curr_mv->i2_mvx & 0x1);
u4_mv_y_qpel = (ps_curr_mv->i2_mvy & 0x1);
/* width and height of partition */
wd = (ps_curr_pu->u1_wd_in_4x4_m1 + 1) << 2;
ht = (ps_curr_pu->u1_ht_in_4x4_m1 + 1) << 2;
/* decision ? qpel/hpel, fpel */
u4_subpel_factor =
(u4_mv_y_hpel << 3) + (u4_mv_x_hpel << 2) + (u4_mv_y_qpel << 1) + (u4_mv_x_qpel);
/* Move ref to position given by MV */
pu1_ref[0] += ((u4_mv_y_full * i4_ref_strd[0]) + u4_mv_x_full);
/* Sub pel ptrs/ Biperd pointers init */
pu1_ref[1] = ps_proc->pu1_best_subpel_buf;
i4_ref_strd[1] = ps_proc->u4_bst_spel_buf_strd;
/* update pred buff ptr */
pu1_pred = ps_proc->pu1_pred_mb + 4 * ps_curr_pu->u1_pos_y_in_4x4 * i4_pred_strd +
4 * ps_curr_pu->u1_pos_x_in_4x4;
/* u4_lkup_idx1 will be non zero for half pel and bipred */
u4_lkup_idx1 = ((u4_subpel_factor >> 2) != 0) || i4_bipred_flag;
{
/********************************************************************/
/* if the block is P16x16 MB and mv are not quarter pel motion */
/* vectors, there is no need to copy 16x16 unit from reference frame*/
/* to pred buffer. We might as well send the reference frame buffer */
/* pointer as pred buffer (ofc with updated stride) to fwd transform*/
/* and inverse transform unit. */
/********************************************************************/
if(ps_proc->u4_num_sub_partitions == 1)
{
ps_pred->pv_data = pu1_ref[u4_lkup_idx1];
ps_pred->i4_data_stride = i4_ref_strd[u4_lkup_idx1];
}
/*
* Copying half pel or full pel to prediction buffer
* Currently ps_proc->u4_num_sub_partitions will always be 1 as we only
* support 16x16 in P mbs
*/
else
{
ps_inter_pred_fxns->pf_inter_pred_luma_copy(pu1_ref[u4_lkup_idx1], pu1_pred,
i4_ref_strd[u4_lkup_idx1], i4_pred_strd,
ht, wd, NULL, 0);
}
}
}
}
/**
******************************************************************************
*
* @brief
* performs motion compensation for chroma mb
*
* @par Description
* Copies a MB of data from the reference buffer (Full pel, half pel or q pel)
* according to the motion vectors given
*
* @param[in] ps_proc
* pointer to current proc ctxt
*
* @return none
*
* @remarks Assumes half pel and quarter pel buffers for the entire frame are
* populated.
******************************************************************************
*/
void isvce_motion_comp_chroma(isvce_process_ctxt_t *ps_proc, buffer_container_t *ps_pred)
{
/* codec context */
isvce_codec_t *ps_codec = ps_proc->ps_codec;
isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns;
inter_pred_fxns_t *ps_inter_pred_fxns = &ps_isa_dependent_fxns->s_inter_pred_fxns;
/* Pointer to the structure having motion vectors, size and position of curr
* partitions */
isvce_enc_pu_t *ps_curr_pu;
/* pointers to full pel, half pel x, half pel y, half pel xy reference buffer
*/
UWORD8 *pu1_ref;
/* pred buffer ptr */
UWORD8 *pu1_pred;
/* strides of full pel reference buffer */
WORD32 i4_ref_strd;
/* pred buffer stride */
WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
/* full pel motion vectors */
WORD32 u4_mv_x_full, u4_mv_y_full;
/* half pel motion vectors */
WORD32 u4_mv_x_hpel, u4_mv_y_hpel;
/* quarter pel motion vectors */
WORD32 u4_mv_x_qpel, u4_mv_y_qpel;
/* width & height of the partition */
UWORD32 wd, ht;
/* partition idx */
UWORD32 u4_num_prtn;
WORD32 u4_mv_x;
WORD32 u4_mv_y;
UWORD8 u1_dx, u1_dy;
ASSERT(ps_proc->u4_num_sub_partitions <= ENC_MAX_PU_IN_MB);
if((ps_proc->ps_mb_info->u2_mb_type == BASE_MODE) && ps_proc->ps_mb_info->u1_is_intra)
{
svc_intra_pred_ctxt_t *ps_intra_pred_ctxt = ps_proc->ps_intra_pred_ctxt;
ps_pred->pv_data =
(UWORD8 *) (ps_intra_pred_ctxt->s_intra_pred_outputs.s_pred_buf.as_component_bufs[UV]
.pv_data);
ps_pred->i4_data_stride =
ps_intra_pred_ctxt->s_intra_pred_outputs.s_pred_buf.as_component_bufs[UV]
.i4_data_stride;
return;
}
else
{
ps_pred->pv_data = ps_proc->pu1_pred_mb;
ps_pred->i4_data_stride = ps_proc->i4_pred_strd;
}
for(u4_num_prtn = 0; u4_num_prtn < ps_proc->u4_num_sub_partitions; u4_num_prtn++)
{
mv_t *ps_curr_mv;
ps_curr_pu = ps_proc->ps_mb_info->as_pu + u4_num_prtn;
if(ps_curr_pu->u1_pred_mode != BI)
{
ps_curr_mv = &ps_curr_pu->as_me_info[ps_curr_pu->u1_pred_mode].s_mv;
pu1_ref =
ps_proc->as_ref_buf_props[ps_curr_pu->u1_pred_mode].as_component_bufs[1].pv_data;
i4_ref_strd = ps_proc->as_ref_buf_props[ps_curr_pu->u1_pred_mode]
.as_component_bufs[1]
.i4_data_stride;
u4_mv_x = ps_curr_mv->i2_mvx >> 3;
u4_mv_y = ps_curr_mv->i2_mvy >> 3;
/* corresponds to full pel motion vector in luma, but in chroma
* corresponds to pel formed wiith dx, dy =4 */
u4_mv_x_full = (ps_curr_mv->i2_mvx & 0x4) >> 2;
u4_mv_y_full = (ps_curr_mv->i2_mvy & 0x4) >> 2;
/* get half pel mv's */
u4_mv_x_hpel = (ps_curr_mv->i2_mvx & 0x2) >> 1;
u4_mv_y_hpel = (ps_curr_mv->i2_mvy & 0x2) >> 1;
/* get quarter pel mv's */
u4_mv_x_qpel = (ps_curr_mv->i2_mvx & 0x1);
u4_mv_y_qpel = (ps_curr_mv->i2_mvy & 0x1);
/* width and height of sub macro block */
wd = (ps_curr_pu->u1_wd_in_4x4_m1 + 1) << 1;
ht = (ps_curr_pu->u1_ht_in_4x4_m1 + 1) << 1;
/* move the pointers so that they point to the motion compensated
* locations */
pu1_ref += ((u4_mv_y * i4_ref_strd) + (u4_mv_x << 1));
pu1_pred = ps_proc->pu1_pred_mb + 4 * ps_curr_pu->u1_pos_y_in_4x4 * i4_pred_strd +
2 * ps_curr_pu->u1_pos_x_in_4x4;
u1_dx = (u4_mv_x_full << 2) + (u4_mv_x_hpel << 1) + (u4_mv_x_qpel);
u1_dy = (u4_mv_y_full << 2) + (u4_mv_y_hpel << 1) + (u4_mv_y_qpel);
/* cases where u1_dx = 0 or u1_dy = 0 are dealt separately in neon with
* separate functions for better performance
*
* isvc_inter_pred_chroma_dx_zero_a9q
* and
* isvc_inter_pred_chroma_dy_zero_a9q
*/
ps_inter_pred_fxns->pf_inter_pred_chroma(pu1_ref, pu1_pred, i4_ref_strd, i4_pred_strd,
u1_dx, u1_dy, ht, wd);
}
else
{
/*
* We need to interpolate the L0 and L1 ref pics with the chorma MV
* then use them to average for bilinrar interpred
*/
WORD32 i4_predmode;
UWORD8 *pu1_ref_buf[2];
/* Temporary buffers to store the interpolated value from L0 and L1 */
pu1_ref_buf[L0] = ps_proc->apu1_subpel_buffs[0];
pu1_ref_buf[L1] = ps_proc->apu1_subpel_buffs[1];
for(i4_predmode = 0; i4_predmode < BI; i4_predmode++)
{
ps_curr_mv = &ps_curr_pu->as_me_info[i4_predmode].s_mv;
pu1_ref = ps_proc->as_ref_buf_props[i4_predmode].as_component_bufs[1].pv_data;
i4_ref_strd =
ps_proc->as_ref_buf_props[i4_predmode].as_component_bufs[1].i4_data_stride;
u4_mv_x = ps_curr_mv->i2_mvx >> 3;
u4_mv_y = ps_curr_mv->i2_mvy >> 3;
/*
* corresponds to full pel motion vector in luma, but in chroma
* corresponds to pel formed wiith dx, dy =4
*/
u4_mv_x_full = (ps_curr_mv->i2_mvx & 0x4) >> 2;
u4_mv_y_full = (ps_curr_mv->i2_mvy & 0x4) >> 2;
/* get half pel mv's */
u4_mv_x_hpel = (ps_curr_mv->i2_mvx & 0x2) >> 1;
u4_mv_y_hpel = (ps_curr_mv->i2_mvy & 0x2) >> 1;
/* get quarter pel mv's */
u4_mv_x_qpel = (ps_curr_mv->i2_mvx & 0x1);
u4_mv_y_qpel = (ps_curr_mv->i2_mvy & 0x1);
/* width and height of sub macro block */
wd = (ps_curr_pu->u1_wd_in_4x4_m1 + 1) << 1;
ht = (ps_curr_pu->u1_ht_in_4x4_m1 + 1) << 1;
/* move the pointers so that they point to the motion compensated
* locations */
pu1_ref += ((u4_mv_y * i4_ref_strd) + (u4_mv_x << 1));
pu1_pred = ps_proc->pu1_pred_mb + 4 * ps_curr_pu->u1_pos_y_in_4x4 * i4_pred_strd +
2 * ps_curr_pu->u1_pos_x_in_4x4;
u1_dx = (u4_mv_x_full << 2) + (u4_mv_x_hpel << 1) + (u4_mv_x_qpel);
u1_dy = (u4_mv_y_full << 2) + (u4_mv_y_hpel << 1) + (u4_mv_y_qpel);
ps_inter_pred_fxns->pf_inter_pred_chroma(
pu1_ref, pu1_ref_buf[i4_predmode], i4_ref_strd, MB_SIZE, u1_dx, u1_dy, ht, wd);
}
ps_inter_pred_fxns->pf_inter_pred_luma_bilinear(pu1_ref_buf[L0], pu1_ref_buf[L1],
pu1_pred, MB_SIZE, MB_SIZE,
i4_pred_strd, MB_SIZE >> 1, MB_SIZE);
}
}
}

87
encoder/svc/isvce_mc.h Normal file
View file

@ -0,0 +1,87 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
*******************************************************************************
* @file
* isvce_mc.h
*
* @brief
* This file contains declarations of routines that perform motion compensation
* of luma and chroma macroblocks.
*
* @author
* ittiam
*
* @remarks
* none
*
*******************************************************************************
*/
#ifndef _ISVCE_MC_H_
#define _ISVCE_MC_H_
/**
******************************************************************************
*
* @brief
* performs motion compensation for a luma mb for the given mv.
*
* @par Description
* This routine performs motion compensation of an inter mb. When the inter
* mb mode is P16x16, there is no need to copy 16x16 unit from reference buffer
* to pred buffer. In this case the function returns pointer and stride of the
* ref. buffer and this info is used in place of pred buffer else where.
* In other cases, the pred buffer is populated via copy / filtering + copy
* (q pel cases) and returned.
*
* @param[in] ps_proc
* pointer to current proc ctxt
*
* @return none
*
* @remarks Assumes half pel buffers for the entire frame are populated.
*
******************************************************************************
*/
extern void isvce_motion_comp_luma(isvce_process_ctxt_t *ps_proc, buffer_container_t *ps_pred);
/**
******************************************************************************
*
* @brief
* performs motion compensation for chroma mb
*
* @par Description
* Copies a MB of data from the reference buffer (Full pel, half pel or q pel)
* according to the motion vectors given
*
* @param[in] ps_proc
* pointer to current proc ctxt
*
* @return none
*
* @remarks Assumes half pel and quarter pel buffers for the entire frame are
* populated.
******************************************************************************
*/
extern void isvce_motion_comp_chroma(isvce_process_ctxt_t *ps_proc, buffer_container_t *ps_pred);
#endif

2924
encoder/svc/isvce_me.c Normal file

File diff suppressed because it is too large Load diff

381
encoder/svc/isvce_me.h Normal file
View file

@ -0,0 +1,381 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
*******************************************************************************
* @file
* isvce_me.h
*
* @brief
* Contains declarations of global variables for H264 encoder
*
* @author
* ittiam
*
* @remarks
*
*******************************************************************************
*/
#ifndef _ISVCE_ME_H_
#define _ISVCE_ME_H_
#include "ih264_typedefs.h"
#include "isvce_structs.h"
/*****************************************************************************/
/* Constant Macros */
/*****************************************************************************/
/**
******************************************************************************
* @brief Skip Bias value for P slice
******************************************************************************
*/
#define SKIP_BIAS_P 0
/**
******************************************************************************
* @brief Skip Bias value for B slice
******************************************************************************
*/
#define SKIP_BIAS_B 0
/*****************************************************************************/
/* Function Macros */
/*****************************************************************************/
/**
******************************************************************************
* @brief compute median of 3 elements (a, b, c) and store the output
* in to result. This is used for mv prediction
******************************************************************************
*/
#define MEDIAN(a, b, c, result) \
if(a > b) \
{ \
if(b > c) \
result = b; \
else \
{ \
if(a > c) \
result = c; \
else \
result = a; \
} \
} \
else \
{ \
if(c > b) \
result = b; \
else \
{ \
if(c > a) \
result = c; \
else \
result = a; \
} \
}
/*****************************************************************************/
/* Extern Function Declarations */
/*****************************************************************************/
/**
*******************************************************************************
*
* @brief
* This function populates the length of the codewords for motion vectors in
*the range (-search range, search range) in pixels
*
* @param[in] ps_me
* Pointer to me ctxt
*
* @param[out] pu1_mv_bits
* length of the codeword for all mv's
*
* @remarks The length of the code words are derived from signed exponential
* goloumb codes.
*
*******************************************************************************
*/
void isvce_init_mv_bits(isvce_me_ctxt_t *ps_me);
/**
*******************************************************************************
*
* @brief The function computes the parameters for a P skip MB
*
* @par Description:
* The function computes the parameters for a P skip MB
*
* @param[in] ps_proc
* Process context
*
* @param[in] u4_for_me
* Flag to indicate the purpose of computing skip
*
* @param[out] ps_pred_mv
* Flag to indicate the current active refernce list
*
* @returns
* 1) Updates skip MV in proc
* 2) Returns if the current MB can be coded as skip or not
*
* @remarks The code implements the logic as described in sec 8.4.1.1 in H264
* specification.
*
*******************************************************************************
*/
FT_FIND_SKIP_PARAMS isvce_find_pskip_params;
/**
*******************************************************************************
*
* @brief The function computes the parameters for a P skip MB
*
* @par Description:
* The function computes the parameters for a P skip MB
*
* @param[in] ps_proc
* Process context
*
* @param[in] u4_for_me
* Flag to indicate the purpose of computing skip
*
* @param[out] ps_pred_mv
* Flag to indicate the current active refernce list
*
* @returns
* 1) Updates skip MV in proc
* 2) Returns if the current MB can be coded as skip or not
*
* @remarks The code implements the logic as described in sec 8.4.1.1 in H264
* specification.
*
*******************************************************************************
*/
FT_FIND_SKIP_PARAMS isvce_find_pskip_params_me;
/**
*******************************************************************************
*
* @brief The function computes the parameters for a B skip MB
*
* @par Description:
* The function computes the parameters for a B skip MB
*
* @param[in] ps_proc
* Process context
*
* @param[in] u4_for_me
* Flag to indicate the purpose of computing skip
*
* @param[out] ps_pred_mv
* Flag to indicate the current active refernce list
*
* @returns
* 1) Updates skip MV in proc
* 2) Returns if the current MB can be coded as skip or not
*
* @remarks The code implements the logic as described in sec 8.4.1.1 in H264
* specification.
*
*******************************************************************************
*/
FT_FIND_SKIP_PARAMS isvce_find_bskip_params;
/**
*******************************************************************************
*
* @brief The function computes the parameters for a B skip MB
*
* @par Description:
* The function computes the parameters for a B skip MB
*
* @param[in] ps_proc
* Process context
*
* @param[in] u4_for_me
* Flag to indicate the purpose of computing skip
*
* @param[out] ps_pred_mv
* Flag to indicate the current active refernce list
*
* @returns
* 1) Updates skip MV in proc
* 2) The type of SKIP [L0/L1/BI]
*
* @remarks
*******************************************************************************
*/
FT_FIND_SKIP_PARAMS isvce_find_bskip_params_me;
/**
*******************************************************************************
*
* @brief motion vector predictor
*
* @par Description:
* The routine calculates the motion vector predictor for a given block,
* given the candidate MV predictors.
*
* @param[in] ps_left_mb_pu
* pointer to left mb motion vector info
*
* @param[in] ps_top_row_pu
* pointer to top & top right mb motion vector info
*
* @param[out] ps_pred_mv
* pointer to candidate predictors for the current block
*
* @returns The x & y components of the MV predictor.
*
* @remarks The code implements the logic as described in sec 8.4.1.3 in H264
* specification.
* Assumptions : 1. Assumes Only partition of size 16x16
*
*******************************************************************************
*/
void isvce_get_mv_predictor(isvce_enc_pu_mv_t *ps_pred_mv, isvce_enc_pu_mv_t *ps_neig_mv,
WORD32 pred_algo);
/**
*******************************************************************************
*
* @brief This fucntion evalues ME for 2 reference lists
*
* @par Description:
* It evaluates skip, full-pel an half-pel and assigns the correct MV in proc
*
* @param[in] ps_proc
* Process context corresponding to the job
*
* @returns none
*
* @remarks none
*
*******************************************************************************
*/
FT_ME_ALGORITHM isvce_compute_me_multi_reflist;
/**
*******************************************************************************
*
* @brief This fucntion evalues ME for single reflist [Pred L0]
*
* @par Description:
* It evaluates skip, full-pel an half-pel and assigns the correct MV in proc
*
* @param[in] ps_proc
* Process context corresponding to the job
*
* @returns none
*
* @remarks none
*
*******************************************************************************
*/
FT_ME_ALGORITHM isvce_compute_me_single_reflist;
/**
*******************************************************************************
*
* @brief This function initializes me ctxt
*
* @par Description:
* Before dispatching the current job to me thread, the me context associated
* with the job is initialized.
*
* @param[in] ps_proc
* Process context corresponding to the job
*
* @returns none
*
* @remarks none
*
*******************************************************************************
*/
void isvce_init_me(isvce_process_ctxt_t *ps_proc);
/**
*******************************************************************************
*
* @brief This function performs motion estimation for the current NMB
*
* @par Description:
* Intializes input and output pointers required by the function
*isvce_compute_me and calls the function isvce_compute_me in a loop to
*process NMBs.
*
* @param[in] ps_proc
* Process context corresponding to the job
*
* @returns
*
* @remarks none
*
*******************************************************************************
*/
void isvce_compute_me_nmb(isvce_process_ctxt_t *ps_proc, UWORD32 u4_nmb_count);
/**
*******************************************************************************
*
* @brief This function performs MV prediction
*
* @par Description:
*
* @param[in] ps_proc
* Process context corresponding to the job
*
* @returns none
*
* @remarks none
* This function will update the MB availability since intra inter decision
* should be done before the call
*
*******************************************************************************
*/
void isvce_mv_pred(isvce_process_ctxt_t *ps_proc, WORD32 i4_reflist);
/**
*******************************************************************************
*
* @brief This function approximates Pred. MV
*
* @par Description:
*
* @param[in] ps_proc
* Process context corresponding to the job
*
* @returns none
*
* @remarks none
* Motion estimation happens at nmb level. For cost calculations, mv is appro
* ximated using this function
*
*******************************************************************************
*/
void isvce_mv_pred_me(isvce_process_ctxt_t *ps_proc, WORD32 i4_ref_list);
#endif

View file

@ -0,0 +1,191 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
*******************************************************************************
* @file
* isvce_mode_stat_visualiser.c
*
* @brief
* Contains functions used for synthesising analysis YUV
*
*******************************************************************************
*/
#include "isvce_defs.h"
#if ENABLE_MODE_STAT_VISUALISER
#include "ih264_typedefs.h"
#include "isvc_macros.h"
#include "ih264_debug.h"
#include "isvc_defs.h"
#include "isvc_structs.h"
#include "isvce_structs.h"
#include "isvce_structs.h"
#include "ih264e_fmt_conv.h"
#include "isvce_mode_stat_visualiser.h"
#define MAX_NUM_MB_MODE_VISUALISATIONS 1
static const UWORD8 gau1_output_file_path[] = "out.yuv";
static const double gd_alpha = 0.5;
static const UWORD8 gau1_colors[MAX_NUM_MB_MODE_VISUALISATIONS][NUM_COMPONENTS] = {
/* Red */
{81, 90, 240},
};
UWORD32 isvce_get_msv_ctxt_size(UWORD32 u4_wd, UWORD32 u4_ht)
{
UWORD32 u4_size = sizeof(mode_stat_visualiser_t);
WORD32 i4_num_luma_samples = u4_wd * u4_ht;
WORD32 i4_num_chroma_samples = i4_num_luma_samples / 4;
u4_size += (i4_num_luma_samples + i4_num_chroma_samples * 2) * sizeof(UWORD8);
return u4_size;
}
void isvce_msv_ctxt_init(isvce_codec_t *ps_codec, iv_mem_rec_t *ps_mem_rec)
{
mode_stat_visualiser_t *ps_mode_stat_visualiser;
yuv_buf_props_t *ps_frame_buf;
WORD32 i;
UWORD32 u4_wd = ps_codec->s_cfg.u4_wd;
UWORD32 u4_ht = ps_codec->s_cfg.u4_ht;
WORD32 i4_num_luma_samples = u4_wd * u4_ht;
WORD32 i4_num_chroma_samples = i4_num_luma_samples / 4;
UWORD8 *pu1_buf = ps_mem_rec->pv_base;
WORD64 i8_alloc_mem_size = isvce_get_msv_ctxt_size(u4_wd, u4_ht);
ps_mode_stat_visualiser = ps_codec->ps_mode_stat_visualiser =
(mode_stat_visualiser_t *) pu1_buf;
pu1_buf += sizeof(ps_mode_stat_visualiser[0]);
i8_alloc_mem_size -= sizeof(ps_mode_stat_visualiser[0]);
ps_frame_buf = &ps_mode_stat_visualiser->s_frame_buf;
ps_mode_stat_visualiser->ps_output_file = fopen((const char *) gau1_output_file_path, "w");
ps_frame_buf->e_color_format = IV_YUV_420P;
ps_frame_buf->u1_bit_depth = 8;
ps_frame_buf->u4_width = u4_wd;
ps_frame_buf->u4_height = u4_ht;
for(i = 0; i < NUM_COMPONENTS; i++)
{
UWORD8 u1_is_chroma = (((COMPONENT_TYPE) i) != Y);
UWORD32 u4_buf_size = u1_is_chroma ? i4_num_chroma_samples : i4_num_luma_samples;
UWORD32 u4_stride = u4_wd >> u1_is_chroma;
ps_frame_buf->as_component_bufs[i].pv_data = pu1_buf;
ps_frame_buf->as_component_bufs[i].i4_data_stride = u4_stride;
pu1_buf += u4_buf_size;
i8_alloc_mem_size -= u4_buf_size;
}
ASSERT(i8_alloc_mem_size >= 0);
}
void isvce_msv_ctxt_delete(mode_stat_visualiser_t *ps_mode_stat_visualiser)
{
fclose(ps_mode_stat_visualiser->ps_output_file);
}
void isvce_msv_get_input_frame(mode_stat_visualiser_t *ps_mode_stat_visualiser,
isvce_inp_buf_t *ps_inp_buf)
{
svc_params_t *ps_svc_params = &ps_inp_buf->s_svc_params;
yuv_buf_props_t *ps_target_layer_yuv_buf =
&ps_inp_buf->as_layer_yuv_buf_props[ps_svc_params->u1_num_spatial_layers - 1];
yuv_buf_props_t *ps_frame_buf = &ps_mode_stat_visualiser->s_frame_buf;
ASSERT(ps_target_layer_yuv_buf->u4_width == ps_frame_buf->u4_width);
ASSERT(ps_target_layer_yuv_buf->u4_height == ps_frame_buf->u4_height);
ASSERT(ps_target_layer_yuv_buf->u1_bit_depth == ps_frame_buf->u1_bit_depth);
ASSERT(ps_target_layer_yuv_buf->e_color_format == IV_YUV_420SP_UV);
ASSERT(ps_frame_buf->u1_bit_depth == IV_YUV_420P);
ASSERT(ps_target_layer_yuv_buf->as_component_bufs[U].i4_data_stride ==
ps_target_layer_yuv_buf->as_component_bufs[V].i4_data_stride);
isvce_fmt_conv_420sp_to_420p(
ps_target_layer_yuv_buf->as_component_bufs[Y].pv_data,
ps_target_layer_yuv_buf->as_component_bufs[UV].pv_data,
ps_frame_buf->as_component_bufs[Y].pv_data, ps_frame_buf->as_component_bufs[U].pv_data,
ps_frame_buf->as_component_bufs[V].pv_data, ps_frame_buf->u4_width, ps_frame_buf->u4_height,
ps_target_layer_yuv_buf->as_component_bufs[Y].i4_data_stride,
ps_target_layer_yuv_buf->as_component_bufs[UV].i4_data_stride,
ps_frame_buf->as_component_bufs[Y].i4_data_stride,
ps_frame_buf->as_component_bufs[U].i4_data_stride, 1, 0);
}
void isvce_msv_set_mode(mode_stat_visualiser_t *ps_mode_stat_visualiser,
isvce_mb_info_t *ps_mb_info, coordinates_t *ps_mb_pos)
{
UWORD32 i, j, k;
for(i = 0; i < NUM_COMPONENTS; i++)
{
UWORD8 u1_is_chroma = (((COMPONENT_TYPE) i) != Y);
UWORD32 u4_wd = MB_SIZE >> u1_is_chroma;
UWORD32 u4_ht = MB_SIZE >> u1_is_chroma;
UWORD8 *pu1_buf = ps_mode_stat_visualiser->s_frame_buf.as_component_bufs[i].pv_data;
WORD32 i4_stride = ps_mode_stat_visualiser->s_frame_buf.as_component_bufs[i].i4_data_stride;
pu1_buf += ps_mb_pos->i4_abscissa * u4_wd + ps_mb_pos->i4_ordinate * u4_ht * i4_stride;
for(j = 0; j < u4_ht; j++)
{
for(k = 0; k < u4_wd; k++)
{
if(ps_mb_info->u1_residual_prediction_flag)
{
pu1_buf[k + j * i4_stride] =
(UWORD8) (gd_alpha * gau1_colors[0][i] +
(1. - gd_alpha) * pu1_buf[k + j * i4_stride] + 0.5);
}
}
}
}
}
void isvce_msv_dump_visualisation(mode_stat_visualiser_t *ps_mode_stat_visualiser)
{
WORD32 i;
FILE *ps_output_file = ps_mode_stat_visualiser->ps_output_file;
yuv_buf_props_t *ps_frame_buf = &ps_mode_stat_visualiser->s_frame_buf;
for(i = 0; i < NUM_COMPONENTS; i++)
{
UWORD8 u1_is_chroma = (((COMPONENT_TYPE) i) != Y);
UWORD32 u4_wd = ps_frame_buf->u4_width >> u1_is_chroma;
UWORD32 u4_ht = ps_frame_buf->u4_height >> u1_is_chroma;
UWORD32 u4_size = u4_wd * u4_ht;
ASSERT(u4_wd == ps_frame_buf->as_component_bufs[i].i4_data_stride);
fwrite(ps_frame_buf->as_component_bufs[i].pv_data, sizeof(UWORD8), u4_size, ps_output_file);
}
}
#endif

View file

@ -0,0 +1,72 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
*******************************************************************************
* @file
* isvce_mode_stat_visualiser.h
*
* @brief
* Contains function declarations for function declared in
* isvce_mode_stat_visualiser.c
*
* @author
* ittiam
*
* @remarks
* None
*
*******************************************************************************
*/
#ifndef _ISVCE_MODE_STAT_VISUALISER_H_
#define _ISVCE_MODE_STAT_VISUALISER_H_
#if ENABLE_MODE_STAT_VISUALISER
#include <stdio.h>
#include "ih264_typedefs.h"
#include "isvc_structs.h"
#include "isvce_structs.h"
typedef struct mode_stat_visualiser_t
{
FILE *ps_output_file;
yuv_buf_props_t s_frame_buf;
} mode_stat_visualiser_t;
extern UWORD32 isvce_get_msv_ctxt_size(UWORD32 u4_wd, UWORD32 u4_ht);
extern void isvce_msv_ctxt_init(isvce_codec_t *ps_codec, iv_mem_rec_t *ps_mem_rec);
extern void isvce_msv_ctxt_delete(mode_stat_visualiser_t *ps_mode_stat_visualiser);
extern void isvce_msv_get_input_frame(mode_stat_visualiser_t *ps_mode_stat_visualiser,
isvce_inp_buf_t *ps_inp_buf);
extern void isvce_msv_dump_visualisation(mode_stat_visualiser_t *ps_mode_stat_visualiser);
extern void isvce_msv_set_mode(mode_stat_visualiser_t *ps_mode_stat_visualiser,
isvce_mb_info_t *ps_mb_info, coordinates_t *ps_mb_pos);
#endif
#endif

View file

@ -0,0 +1,124 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
*******************************************************************************
* @file
* isvce_nalu_stat_aggregator.c
*
* @brief
* Contains objects used for aggregating nalu statistics
*
*******************************************************************************
*/
#include <stdio.h>
#include <string.h>
#include "ih264_typedefs.h"
#include "iv2.h"
#include "isvce_structs.h"
#include "isvce_nalu_stat_aggregator.h"
void isvce_nalu_info_au_init(nalu_descriptors_t *ps_nalu_descriptor, UWORD8 u1_num_spatial_layers)
{
WORD32 i;
for(i = 0; i < u1_num_spatial_layers; i++)
{
ps_nalu_descriptor[i].u1_num_nalus = 0;
}
}
void isvce_nalu_info_csv_translator(nalu_descriptors_t *ps_nalu_descriptor,
isvce_nalu_info_buf_t *ps_csv_buf)
{
char ac_csv_string[MAX_BYTES_PER_NALU_INFO];
WORD32 i;
WORD64 i8_num_bytes_available = ps_csv_buf->u4_buf_size - ps_csv_buf->u4_num_bytes;
for(i = 0; i < ps_nalu_descriptor->u1_num_nalus; i++)
{
if(ps_nalu_descriptor->as_nalu_info[i].b_is_vcl_nal)
{
snprintf(ac_csv_string, MAX_BYTES_PER_NALU_INFO, "%d,%u,%d,%d,%d,%d,%d\n",
ps_nalu_descriptor->as_nalu_info[i].e_nalu_type,
(UWORD32) (ps_nalu_descriptor->as_nalu_info[i].i8_num_bits / 8),
ps_nalu_descriptor->as_nalu_info[i].u1_spatial_layer_id,
ps_nalu_descriptor->as_nalu_info[i].u1_temporal_layer_id,
ps_nalu_descriptor->as_nalu_info[i].b_is_idr, 1, 1);
}
else
{
snprintf(ac_csv_string, MAX_BYTES_PER_NALU_INFO, "%d,%u,%d,%d,%d,%d,%d\n",
ps_nalu_descriptor->as_nalu_info[i].e_nalu_type,
(UWORD32) (ps_nalu_descriptor->as_nalu_info[i].i8_num_bits / 8), -1, -1, -1,
-1, -1);
}
snprintf((char *) (ps_csv_buf->pu1_buf + ps_csv_buf->u4_num_bytes), i8_num_bytes_available,
"%s", ac_csv_string);
ps_csv_buf->u4_num_bytes = (UWORD32) strlen((char *) ps_csv_buf->pu1_buf);
i8_num_bytes_available = ps_csv_buf->u4_buf_size - ps_csv_buf->u4_num_bytes;
ASSERT(i8_num_bytes_available >= 0);
}
}
nalu_info_t *isvce_get_next_nalu_info_buf(nalu_descriptors_t *ps_nalu_descriptor)
{
return &ps_nalu_descriptor->as_nalu_info[ps_nalu_descriptor->u1_num_nalus];
}
void isvce_nalu_info_buf_init(nalu_info_t *ps_nalu_info, WORD64 i8_init_bits,
NAL_UNIT_TYPE_T e_nalu_type, UWORD8 u1_spatial_layer_id,
UWORD8 u1_temporal_layer_id, UWORD8 u1_num_slices, bool b_is_idr)
{
ps_nalu_info->e_nalu_type = e_nalu_type;
ps_nalu_info->i8_num_bits = i8_init_bits;
ps_nalu_info->b_is_idr = b_is_idr;
switch(e_nalu_type)
{
case NAL_SLICE_NON_IDR:
case NAL_SLICE_IDR:
case NAL_CODED_SLICE_EXTENSION:
{
ps_nalu_info->b_is_vcl_nal = true;
ps_nalu_info->u1_spatial_layer_id = u1_spatial_layer_id;
ps_nalu_info->u1_temporal_layer_id = u1_temporal_layer_id;
ps_nalu_info->u1_num_slices = u1_num_slices;
break;
}
default:
{
ps_nalu_info->b_is_vcl_nal = false;
break;
}
}
}
void isvce_update_nalu_count(nalu_descriptors_t *ps_nalu_descriptor)
{
ps_nalu_descriptor->u1_num_nalus++;
}

View file

@ -0,0 +1,99 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
*******************************************************************************
* @file
* isvce_nalu_stat_aggregator.h
*
* @brief
* Contains objects used for aggregating nalu statistics
*
*******************************************************************************
*/
#ifndef _ISVCE_NALU_STAT_AGGREGATOR_H_
#define _ISVCE_NALU_STAT_AGGREGATOR_H_
#include <stdbool.h>
#include "ih264_typedefs.h"
#include "isvce.h"
#include "isvc_defs.h"
#include "isvce_defs.h"
/* Macros */
/* +1 for '\0' */
#define MAX_BYTES_PER_NALU_INFO (45 + 1)
/* SPS + (MAX_NUM_SPATIAL_LAYERS - 1) * SUBSET_SPS +
* MAX_NUM_SPATIAL_LAYERS * PPS + */
/* 1 PREFIX_NALU + 1 SLICE_[NON|]IDR + (MAX_NUM_SPATIAL_LAYERS - 1) *
* CODED_SLICE_EXTENSION */
#define MAX_NALU_PER_LAYER 10
/* Structs */
typedef struct nalu_info_t
{
NAL_UNIT_TYPE_T e_nalu_type;
WORD64 i8_num_bits;
bool b_is_vcl_nal;
bool b_is_idr;
UWORD8 u1_spatial_layer_id;
UWORD8 u1_temporal_layer_id;
UWORD8 u1_num_slices;
} nalu_info_t;
typedef struct nalu_descriptors_t
{
nalu_info_t as_nalu_info[MAX_NALU_PER_LAYER];
UWORD8 u1_num_nalus;
} nalu_descriptors_t;
/* Function declarations */
static FORCEINLINE UWORD32 isvce_get_nalu_info_buf_size(UWORD8 u1_num_spatial_layers)
{
return MAX_NALU_PER_LAYER * u1_num_spatial_layers * MAX_BYTES_PER_NALU_INFO;
}
extern void isvce_nalu_info_au_init(nalu_descriptors_t *ps_nalu_descriptor,
UWORD8 u1_num_spatial_layers);
extern void isvce_nalu_info_csv_translator(nalu_descriptors_t *ps_nalu_descriptor,
isvce_nalu_info_buf_t *ps_csv_buf);
extern nalu_info_t *isvce_get_next_nalu_info_buf(nalu_descriptors_t *ps_nalu_descriptor);
extern void isvce_nalu_info_buf_init(nalu_info_t *ps_nalu_info, WORD64 i8_init_bytes,
NAL_UNIT_TYPE_T e_nalu_type, UWORD8 u1_spatial_layer_id,
UWORD8 u1_temporal_layer_id, UWORD8 u1_num_slices,
bool b_is_idr);
extern void isvce_update_nalu_count(nalu_descriptors_t *ps_nalu_descriptor);
#endif

View file

@ -0,0 +1,156 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
*******************************************************************************
* @file
* isvce_pred_structs.h
*
* @brief
* Contains struct definition used for prediction
*
* @author
* ittiam
*
* @remarks
* None
*
*******************************************************************************
*/
#ifndef _ISVCE_PRED_STRUCTS_H_
#define _ISVCE_PRED_STRUCTS_H_
#include "ih264_typedefs.h"
#include "isvc_defs.h"
#include "isvc_structs.h"
#include "isvce_defs.h"
/**
* PU information
*/
typedef struct
{
/**
* Motion Vector
*/
mv_t s_mv;
/**
* Ref index
*/
WORD8 i1_ref_idx;
} isvce_enc_pu_mv_t;
/*
* Total Pu info for an MB
*/
typedef struct isvce_enc_pu_t
{
/* Array with ME info for all lists */
isvce_enc_pu_mv_t as_me_info[NUM_PRED_DIRS];
UWORD8 au1_mvp_idx[NUM_PRED_DIRS];
/**
* PU X position in terms of min PU (4x4) units
*/
UWORD8 u1_pos_x_in_4x4;
/**
* PU Y position in terms of min PU (4x4) units
*/
UWORD8 u1_pos_y_in_4x4;
/**
* PU width in pixels = (u1_wd_in_4x4_m1 + 1) << 2
*/
UWORD8 u1_wd_in_4x4_m1;
/**
* PU height in pixels = (u1_ht_in_4x4_m1 + 1) << 2
*/
UWORD8 u1_ht_in_4x4_m1;
/**
* PRED_L0, PRED_L1, PRED_BI
*/
UWORD8 u1_pred_mode;
} isvce_enc_pu_t;
typedef struct intra4x4_mode_data_t
{
UWORD8 u1_predicted_mode;
UWORD8 u1_mode;
} intra4x4_mode_data_t;
typedef intra4x4_mode_data_t intra8x8_mode_data_t;
typedef struct intra16x16_mode_data_t
{
UWORD8 u1_mode;
} intra16x16_mode_data_t;
typedef struct enc_intra_pu_t
{
intra4x4_mode_data_t as_i4x4_mode_data[MAX_TU_IN_MB];
intra8x8_mode_data_t as_i8x8_mode_data[MIN_TU_IN_MB];
intra16x16_mode_data_t s_i16x16_mode_data;
UWORD8 u1_chroma_intra_mode;
} enc_intra_pu_t;
typedef struct isvce_mb_info_t
{
isvce_enc_pu_t as_pu[ENC_MAX_PU_IN_MB];
enc_intra_pu_t s_intra_pu;
UWORD32 u4_cbp;
UWORD32 u4_csbp;
UWORD32 u4_res_csbp;
UWORD16 u2_mb_type;
WORD32 i4_mb_distortion;
UWORD8 u1_base_mode_flag;
UWORD8 u1_residual_prediction_flag;
UWORD8 u1_tx_size;
UWORD8 u1_mb_qp;
UWORD8 u1_is_intra;
} isvce_mb_info_t;
#endif

2794
encoder/svc/isvce_process.c Normal file

File diff suppressed because it is too large Load diff

285
encoder/svc/isvce_process.h Normal file
View file

@ -0,0 +1,285 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
*******************************************************************************
* @file
* isvce_process.h
*
* @brief
* Contains functions for codec thread
*
* @author
* ittiam
*
* @remarks
* None
*
*******************************************************************************
*/
#ifndef _ISVCE_PROCESS_H_
#define _ISVCE_PROCESS_H_
/*****************************************************************************/
/* Function Declarations */
/*****************************************************************************/
/**
******************************************************************************
*
* @brief This function generates sps, pps set on request
*
* @par Description
* When the encoder is set in header generation mode, the following function
* is called. This generates sps and pps headers and returns the control back
* to caller.
*
* @param[in] ps_codec
* pointer to codec context
*
* @return success or failure error code
*
******************************************************************************
*/
IH264E_ERROR_T isvce_generate_sps_pps(isvce_codec_t *ps_codec, isvce_inp_buf_t *ps_inp_buf);
/**
*******************************************************************************
*
* @brief initialize entropy context.
*
* @par Description:
* Before invoking the call to perform to entropy coding the entropy context
* associated with the job needs to be initialized. This involves the start
* mb address, end mb address, slice index and the pointer to location at
* which the mb residue info and mb header info are packed.
*
* @param[in] ps_proc
* Pointer to the current process context
*
* @returns error status
*
* @remarks none
*
*******************************************************************************
*/
IH264E_ERROR_T isvce_init_entropy_ctxt(isvce_process_ctxt_t *ps_proc);
/**
*******************************************************************************
*
* @brief entry point for entropy coding
*
* @par Description
* This function calls lower level functions to perform entropy coding for a
* group (n rows) of mb's. After encoding 1 row of mb's, the function takes
* back the control, updates the ctxt and calls lower level functions again.
* This process is repeated till all the rows or group of mb's (which ever is
* minimum) are coded
*
* @param[in] ps_proc
* process context
*
* @returns error status
*
* @remarks
* NOTE : It is assumed that this routine is invoked at the start of a slice,
* so the slice header is generated by default.
*
*******************************************************************************
*/
IH264E_ERROR_T isvce_entropy(isvce_process_ctxt_t *ps_proc);
/**
*******************************************************************************
*
* @brief Packs header information of a mb in to a buffer
*
* @par Description:
* After the deciding the mode info of a macroblock, the syntax elements
* associated with the mb are packed and stored. The entropy thread unpacks
* this buffer and generates the end bit stream.
*
* @param[in] ps_proc
* Pointer to the current process context
*
* @returns error status
*
* @remarks none
*
*******************************************************************************
*/
IH264E_ERROR_T isvce_pack_header_data(isvce_process_ctxt_t *ps_proc);
/**
*******************************************************************************
*
* @brief update process context after encoding an mb. This involves preserving
* the current mb information for later use, initialize the proc ctxt elements to
* encode next mb.
*
* @par Description:
* This function performs house keeping tasks after encoding an mb.
* After encoding an mb, various elements of the process context needs to be
* updated to encode the next mb. For instance, the source, recon and reference
* pointers, mb indices have to be adjusted to the next mb. The slice index of
* the current mb needs to be updated. If mb qp modulation is enabled, then if
* the qp changes the quant param structure needs to be updated. Also to
*encoding the next mb, the current mb info is used as part of mode prediction or
*mv prediction. Hence the current mb info has to preserved at top/top left/left
* locations.
*
* @param[in] ps_proc
* Pointer to the current process context
*
* @returns none
*
* @remarks none
*
*******************************************************************************
*/
WORD32 isvce_update_proc_ctxt(isvce_process_ctxt_t *ps_proc);
/**
*******************************************************************************
*
* @brief initialize process context.
*
* @par Description:
* Before dispatching the current job to process thread, the process context
* associated with the job is initialized. Usually every job aims to encode one
* row of mb's. Basing on the row indices provided by the job, the process
* context's buffer ptrs, slice indices and other elements that are necessary
* during core-coding are initialized.
*
* @param[in] ps_proc
* Pointer to the current process context
*
* @returns error status
*
* @remarks none
*
*******************************************************************************
*/
IH264E_ERROR_T isvce_init_proc_ctxt(isvce_process_ctxt_t *ps_proc);
/**
*******************************************************************************
*
* @brief This function performs luma & chroma padding
*
* @par Description:
*
* @param[in] ps_proc
* Process context corresponding to the job
*
* @param[in] pu1_curr_pic_luma
* Pointer to luma buffer
*
* @param[in] pu1_curr_pic_chroma
* Pointer to chroma buffer
*
* @param[in] i4_mb_x
* mb index x
*
* @param[in] i4_mb_y
* mb index y
*
* @param[in] i4_pad_ht
* number of rows to be padded
*
* @returns error status
*
* @remarks none
*
*******************************************************************************
*/
IH264E_ERROR_T isvce_pad_recon_buffer(isvce_process_ctxt_t *ps_proc, UWORD8 *pu1_curr_pic_luma,
WORD32 i4_luma_stride, UWORD8 *pu1_curr_pic_chroma,
WORD32 i4_chroma_stride, WORD32 i4_mb_x, WORD32 i4_mb_y,
WORD32 i4_pad_ht);
/**
*******************************************************************************
*
* @brief This function performs luma half pel planes generation
*
* @par Description:
*
* @param[in] ps_proc
* Process context corresponding to the job
*
* @returns error status
*
* @remarks none
*
*******************************************************************************
*/
IH264E_ERROR_T isvce_halfpel_generation(isvce_process_ctxt_t *ps_proc, UWORD8 *pu1_curr_pic_luma,
WORD32 i4_mb_x, WORD32 i4_mb_y);
/**
*******************************************************************************
*
* @brief This function performs luma & chroma core coding for a set of mb's.
*
* @par Description:
* The mb to be coded is taken and is evaluated over a predefined set of modes
* (intra (i16, i4, i8)/inter (mv, skip)) for best cost. The mode with least
*cost is selected and using intra/inter prediction filters, prediction is
*carried out. The deviation between src and pred signal constitutes error
*signal. This error signal is transformed (hierarchical transform if necessary)
*and quantized. The quantized residue is packed in to entropy buffer for entropy
*coding. This is repeated for all the mb's enlisted under the job.
*
* @param[in] ps_proc
* Process context corresponding to the job
*
* @returns error status
*
* @remarks none
*
*******************************************************************************
*/
WORD32 isvce_process(isvce_process_ctxt_t *ps_proc);
/**
*******************************************************************************
*
* @brief
* entry point of a spawned encoder thread
*
* @par Description:
* The encoder thread dequeues a proc/entropy job from the encoder queue and
* calls necessary routines.
*
* @param[in] pv_proc
* Process context corresponding to the thread
*
* @returns error status
*
* @remarks
*
*******************************************************************************
*/
WORD32 isvce_process_thread(void *pv_proc);
#endif

View file

@ -0,0 +1,716 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
*******************************************************************************
* @file
* isvce_rate_control.c
*
* @brief
* Contains api function definitions for h264 rate control
*
* @author
* ittiam
*
* @par List of Functions:
* - isvce_rc_init()
* - isvce_rc_get_picture_details()
* - isvce_rc_pre_enc()
* - isvce_update_rc_mb_info()
* - isvce_rc_get_buffer_status()
* - isvce_rc_post_enc()
* - isvce_update_rc_bits_info()
*
* @remarks
* None
*
*******************************************************************************
*/
/*****************************************************************************/
/* File Includes */
/*****************************************************************************/
#include "ih264_typedefs.h"
#include "irc_datatypes.h"
#include "iv2.h"
#include "ive2.h"
#include "isvce.h"
#include "isvc_defs.h"
#include "isvc_macros.h"
#include "isvc_structs.h"
#include "isvc_trans_quant_itrans_iquant.h"
#include "isvc_inter_pred_filters.h"
#include "isvc_mem_fns.h"
#include "ih264_padding.h"
#include "ih264_intra_pred_filters.h"
#include "ih264_deblk_edge_filters.h"
#include "isvc_common_tables.h"
#include "isvc_cabac_tables.h"
#include "isvce_defs.h"
#include "isvce_globals.h"
#include "irc_mem_req_and_acq.h"
#include "irc_cntrl_param.h"
#include "irc_frame_info_collector.h"
#include "irc_rate_control_api.h"
#include "ih264e_time_stamp.h"
#include "ih264e_modify_frm_rate.h"
#include "isvce_rate_control.h"
#include "ih264e_error.h"
#include "ih264e_time_stamp.h"
#include "ih264e_bitstream.h"
#include "ime_distortion_metrics.h"
#include "ime_defs.h"
#include "ime_structs.h"
#include "isvce_cabac_structs.h"
#include "isvce_structs.h"
#include "ih264e_utils.h"
#include "irc_trace_support.h"
/*****************************************************************************/
/* Function Definitions */
/*****************************************************************************/
/**
*******************************************************************************
*
* @brief
* This function initializes rate control context and variables
*
* @par Description
* This function initializes rate control type, source and target frame rate,
* average and peak bitrate, intra-inter frame interval and initial
* quantization parameter
*
* @param[in] pv_rc_api
* Handle to rate control api
*
* @param[in] pv_frame_time
* Handle to frame time context
*
* @param[in] pv_time_stamp
* Handle to time stamp context
*
* @param[in] pv_pd_frm_rate
* Handle to pull down frame time context
*
* @param[in] u4_max_frm_rate
* Maximum frame rate
*
* @param[in] u4_src_frm_rate
* Source frame rate
*
* @param[in] u4_tgt_frm_rate
* Target frame rate
*
* @param[in] e_rate_control_type
* Rate control type
*
* @param[in] u4_avg_bit_rate
* Average bit rate
*
* @param[in] u4_peak_bit_rate
* Peak bit rate
*
* @param[in] u4_max_delay
* Maximum delay between frames
*
* @param[in] u4_intra_frame_interval
* Intra frame interval
*
* @param[in] pu1_init_qp
* Initial qp
*
* @param[in] i4_max_inter_frm_int
* Maximum inter frame interval
*
* @param[in] pu1_min_max_qp
* Array of min/max qp
*
* @param[in] u1_profile_level
* Encoder profile level
*
* @returns none
*
* @remarks
*
*******************************************************************************
*/
void isvce_rc_init(void *pv_rc_api, void *pv_frame_time, void *pv_time_stamp, void *pv_pd_frm_rate,
UWORD32 u4_max_frm_rate, UWORD32 u4_src_frm_rate, UWORD32 u4_tgt_frm_rate,
rc_type_e e_rate_control_type, UWORD32 u4_avg_bit_rate, UWORD32 u4_peak_bit_rate,
UWORD32 u4_max_delay, UWORD32 u4_intra_frame_interval, WORD32 i4_inter_frm_int,
UWORD8 *pu1_init_qp, WORD32 i4_max_inter_frm_int, UWORD8 *pu1_min_max_qp,
UWORD8 u1_profile_level)
{
// UWORD8 u1_is_mb_level_rc_on = 0;
UWORD32 au4_peak_bit_rate[2] = {0, 0};
UWORD32 u4_min_bit_rate = 0;
WORD32 i4_is_gop_closed = 1;
// WORD32 i4_use_est_intra_sad = 1;
UWORD32 u4_src_ticks = 0;
UWORD32 u4_tgt_ticks = 0;
UWORD8 u1_level_idx = ih264e_get_lvl_idx(u1_profile_level);
UWORD32 u4_max_cpb_size = 1200 * gas_isvc_lvl_tbl[u1_level_idx].u4_max_cpb_size;
/* Fill the params needed for the RC init */
if(e_rate_control_type == CBR_NLDRC)
{
au4_peak_bit_rate[0] = u4_avg_bit_rate;
au4_peak_bit_rate[1] = u4_avg_bit_rate;
}
else
{
au4_peak_bit_rate[0] = u4_peak_bit_rate;
au4_peak_bit_rate[1] = u4_peak_bit_rate;
}
/* Initialize frame time computation module*/
ih264e_init_frame_time(pv_frame_time, u4_src_frm_rate, /* u4_src_frm_rate */
u4_tgt_frm_rate); /* u4_tgt_frm_rate */
/* Initialize the pull_down frame rate */
ih264e_init_pd_frm_rate(pv_pd_frm_rate, u4_src_frm_rate); /* u4_input_frm_rate */
/* Initialize time stamp structure */
ih264e_init_time_stamp(pv_time_stamp, u4_max_frm_rate, /* u4_max_frm_rate */
u4_src_frm_rate); /* u4_src_frm_rate */
u4_src_ticks = ih264e_frame_time_get_src_ticks(pv_frame_time);
u4_tgt_ticks = ih264e_frame_time_get_tgt_ticks(pv_frame_time);
/* Init max_inter_frame int */
i4_max_inter_frm_int = (i4_inter_frm_int == 1) ? 2 : (i4_inter_frm_int + 2);
/* Initialize the rate control */
irc_initialise_rate_control(
pv_rc_api, /* RC handle */
e_rate_control_type, /* RC algo type */
0, /* MB activity on/off */
u4_avg_bit_rate, /* Avg Bitrate */
au4_peak_bit_rate, /* Peak bitrate array[2]:[I][P] */
u4_min_bit_rate, /* Min Bitrate */
u4_src_frm_rate, /* Src frame_rate */
u4_max_delay, /* Max buffer delay */
u4_intra_frame_interval, /* Intra frm_interval */
i4_inter_frm_int, /* Inter frame interval */
pu1_init_qp, /* Init QP array[3]:[I][P][B] */
u4_max_cpb_size, /* Max VBV/CPB Buffer Size */
i4_max_inter_frm_int, /* Max inter frm_interval */
i4_is_gop_closed, /* Open/Closed GOP */
pu1_min_max_qp, /* Min-max QP
array[6]:[Imax][Imin][Pmax][Pmin][Bmax][Bmin] */
0, /* How to calc the I-frame estimated_sad */
u4_src_ticks, /* Src_ticks = LCM(src_frm_rate,tgt_frm_rate)/src_frm_rate
*/
u4_tgt_ticks); /* Tgt_ticks = LCM(src_frm_rate,tgt_frm_rate)/tgt_frm_rate
*/
}
/**
*******************************************************************************
*
* @brief Function to get picture details
*
* @par Description
* This function returns the Picture type(I/P/B)
*
* @param[in] pv_rc_api
* Handle to Rate control api
*
* @returns
* Picture type
*
* @remarks none
*
*******************************************************************************
*/
picture_type_e isvce_rc_get_picture_details(void *pv_rc_api, WORD32 *pi4_pic_id,
WORD32 *pi4_pic_disp_order_no)
{
picture_type_e e_rc_pic_type = P_PIC;
irc_get_picture_details(pv_rc_api, pi4_pic_id, pi4_pic_disp_order_no, &e_rc_pic_type);
return (e_rc_pic_type);
}
/**
*******************************************************************************
*
* @brief Function to get rate control output before encoding
*
* @par Description
* This function is called before queing the current frame. It decides if we
*should skip the current iput buffer due to frame rate mismatch. It also updates
*RC about the acehivble frame rate
*
* @param[in] ps_rate_control_api
* Handle to rate control api
*
* @param[in] ps_pd_frm_rate
* Handle to pull down frm rate context
*
* @param[in] ps_time_stamp
* Handle to time stamp context
*
* @param[in] ps_frame_time
* Handle to frame time context
*
* @param[in] i4_delta_time_stamp
* Time stamp difference between frames
*
* @param[in] i4_total_mb_in_frame
* Total Macro Blocks in frame
*
* @param[in/out] pe_vop_coding_type
* Picture coding type(I/P/B)
*
* @param[in/out] pu1_frame_qp
* QP for current frame
*
* @returns
* Skip or queue the current frame
*
* @remarks
*
*******************************************************************************
*/
WORD32 isvce_update_rc_framerates(void *ps_rate_control_api, void *ps_pd_frm_rate,
void *ps_time_stamp, void *ps_frame_time)
{
WORD8 i4_skip_src = 0;
UWORD32 u4_src_not_skipped_for_dts = 0;
/* Update the time stamp for the current frame */
ih264e_update_time_stamp(ps_time_stamp);
/* Check if a src not needs to be skipped */
i4_skip_src = ih264e_should_src_be_skipped(ps_frame_time, 1, &u4_src_not_skipped_for_dts);
if(i4_skip_src)
{
/***********************************************************************
*Based on difference in source and target frame rate frames are skipped
***********************************************************************/
/*update the missing frames frm_rate with 0 */
ih264e_update_pd_frm_rate(ps_pd_frm_rate, 0);
}
else
{
WORD32 i4_avg_frm_rate, i4_source_frame_rate;
i4_source_frame_rate = ih264e_frame_time_get_src_frame_rate(ps_frame_time);
/* Update the frame rate of the frame present with the tgt_frm_rate */
/* If the frm was not skipped due to delta_time_stamp, update the
frame_rate with double the tgt_frame_rate value, so that it makes
up for one of the frames skipped by the application */
ih264e_update_pd_frm_rate(ps_pd_frm_rate, i4_source_frame_rate);
/* Based on the update get the average frame rate */
i4_avg_frm_rate = ih264e_get_pd_avg_frm_rate(ps_pd_frm_rate);
/* Call the RC library function to change the frame_rate to the
actually achieved frm_rate */
irc_change_frm_rate_for_bit_alloc(ps_rate_control_api, i4_avg_frm_rate);
}
return (i4_skip_src);
}
/**
*******************************************************************************
*
* @brief Function to update mb info for rate control context
*
* @par Description
* After encoding a mb, information such as mb type, qp used, mb distortion
* resulted in encoding the block and so on needs to be preserved for modeling
* RC. This is preserved via this function call.
*
* @param[in] ps_frame_info
* Handle Frame info context
*
* @param[in] ps_proc
* Process context
*
* @returns
*
* @remarks
*
*******************************************************************************
*/
void isvce_update_rc_mb_info(frame_info_t *ps_frame_info, void *pv_proc)
{
/* proc ctxt */
isvce_process_ctxt_t *ps_proc = pv_proc;
/* is intra or inter */
WORD32 mb_type = !ps_proc->ps_mb_info->u1_is_intra;
/* distortion */
ps_frame_info->tot_mb_sad[mb_type] += ps_proc->i4_mb_distortion;
/* qp */
ps_frame_info->qp_sum[mb_type] += gau1_h264_to_mpeg2_qmap[ps_proc->u1_mb_qp];
/* mb cnt */
ps_frame_info->num_mbs[mb_type]++;
/* cost */
if(ps_proc->ps_mb_info->u1_is_intra)
{
ps_frame_info->intra_mb_cost_sum += ps_proc->i4_mb_cost;
}
}
/**
*******************************************************************************
*
* @brief Function to get rate control buffer status
*
* @par Description
* This function is used to get buffer status(underflow/overflow) by rate
* control module
*
* @param[in] pv_rc_api
* Handle to rate control api context
*
* @param[in] i4_total_frame_bits
* Total frame bits
*
* @param[in] u1_pic_type
* Picture type
*
* @param[in] pi4_num_bits_to_prevent_vbv_underflow
* Number of bits to prevent underflow
*
* @param[out] pu1_is_enc_buf_overflow
* Buffer overflow indication flag
*
* @param[out] pu1_is_enc_buf_underflow
* Buffer underflow indication flag
*
* @returns
*
* @remarks
*
*******************************************************************************
*/
void isvce_rc_get_buffer_status(void *pv_rc_api, WORD32 i4_total_frame_bits,
picture_type_e e_pic_type,
WORD32 *pi4_num_bits_to_prevent_vbv_underflow,
UWORD8 *pu1_is_enc_buf_overflow, UWORD8 *pu1_is_enc_buf_underflow)
{
vbv_buf_status_e e_vbv_buf_status = VBV_NORMAL;
e_vbv_buf_status = irc_get_buffer_status(pv_rc_api, i4_total_frame_bits, e_pic_type,
pi4_num_bits_to_prevent_vbv_underflow);
if(e_vbv_buf_status == VBV_OVERFLOW)
{
*pu1_is_enc_buf_underflow = 1;
*pu1_is_enc_buf_overflow = 0;
}
else if(e_vbv_buf_status == VBV_UNDERFLOW)
{
*pu1_is_enc_buf_underflow = 0;
*pu1_is_enc_buf_overflow = 1;
}
else
{
*pu1_is_enc_buf_underflow = 0;
*pu1_is_enc_buf_overflow = 0;
}
}
/**
*******************************************************************************
*
* @brief Function to update rate control module after encoding
*
* @par Description
* This function is used to update the rate control module after the current
* frame encoding is done with details such as bits consumed, SAD for I/P/B,
* intra cost ,mb type and other
*
* @param[in] ps_rate_control_api
* Handle to rate control api context
*
* @param[in] ps_frame_info
* Handle to frame info context
*
* @param[in] ps_pd_frm_rate
* Handle to pull down frame rate context
*
* @param[in] ps_time_stamp
* Handle to time stamp context
*
* @param[in] ps_frame_time
* Handle to frame time context
*
* @param[in] i4_total_mb_in_frame
* Total mb in frame
*
* @param[in] pe_vop_coding_type
* Picture coding type
*
* @param[in] i4_is_first_frame
* Is first frame
*
* @param[in] pi4_is_post_encode_skip
* Post encoding skip flag
*
* @param[in] u1_frame_qp
* Frame qp
*
* @param[in] pi4_num_intra_in_prev_frame
* Numberf of intra mbs in previous frame
*
* @param[in] pi4_avg_activity
* Average activity
*
* @returns
*
* @remarks
*
*******************************************************************************
*/
WORD32 isvce_rc_post_enc(void *ps_rate_control_api, frame_info_t *ps_frame_info,
void *ps_pd_frm_rate, void *ps_time_stamp, void *ps_frame_time,
WORD32 i4_total_mb_in_frame, picture_type_e *pe_vop_coding_type,
WORD32 i4_is_first_frame, WORD32 *pi4_is_post_encode_skip,
UWORD8 u1_frame_qp, WORD32 *pi4_num_intra_in_prev_frame,
WORD32 *pi4_avg_activity
#if ENABLE_RE_ENC_AS_SKIP
,
UWORD8 *u1_is_post_enc_skip
#endif
)
{
/* Variables for the update_frm_level_info */
WORD32 ai4_tot_mb_in_type[MAX_MB_TYPE];
WORD32 ai4_tot_mb_type_qp[MAX_MB_TYPE] = {0, 0};
WORD32 ai4_mb_type_sad[MAX_MB_TYPE] = {0, 0};
WORD32 ai4_mb_type_tex_bits[MAX_MB_TYPE] = {0, 0};
WORD32 i4_total_frame_bits = 0;
WORD32 i4_total_hdr_bits = 0;
WORD32 i4_total_texturebits;
WORD32 i4_avg_mb_activity = 0;
WORD32 i4_intra_frm_cost = 0;
UWORD8 u1_is_scd = 0;
WORD32 i4_cbr_bits_to_stuff = 0;
UWORD32 u4_num_intra_in_prev_frame = *pi4_num_intra_in_prev_frame;
UNUSED(ps_pd_frm_rate);
UNUSED(ps_time_stamp);
UNUSED(ps_frame_time);
UNUSED(u1_frame_qp);
UNUSED(i4_is_first_frame);
/* Accumulate RC stats */
ai4_tot_mb_in_type[MB_TYPE_INTRA] = irc_fi_get_total_mb(ps_frame_info, MB_TYPE_INTRA);
ai4_tot_mb_in_type[MB_TYPE_INTER] = irc_fi_get_total_mb(ps_frame_info, MB_TYPE_INTER);
ai4_tot_mb_type_qp[MB_TYPE_INTRA] = irc_fi_get_total_mb_qp(ps_frame_info, MB_TYPE_INTRA);
ai4_tot_mb_type_qp[MB_TYPE_INTER] = irc_fi_get_total_mb_qp(ps_frame_info, MB_TYPE_INTER);
ai4_mb_type_sad[MB_TYPE_INTRA] = irc_fi_get_total_mb_sad(ps_frame_info, MB_TYPE_INTRA);
ai4_mb_type_sad[MB_TYPE_INTER] = irc_fi_get_total_mb_sad(ps_frame_info, MB_TYPE_INTER);
i4_intra_frm_cost = irc_fi_get_total_intra_mb_cost(ps_frame_info);
i4_avg_mb_activity = irc_fi_get_avg_activity(ps_frame_info);
i4_total_hdr_bits = irc_fi_get_total_header_bits(ps_frame_info);
i4_total_texturebits = irc_fi_get_total_mb_texture_bits(ps_frame_info, MB_TYPE_INTRA);
i4_total_texturebits += irc_fi_get_total_mb_texture_bits(ps_frame_info, MB_TYPE_INTER);
i4_total_frame_bits = i4_total_hdr_bits + i4_total_texturebits;
*pi4_avg_activity = i4_avg_mb_activity;
/* Texture bits are not accumulated. Hence subtracting hdr bits from total
* bits */
ai4_mb_type_tex_bits[MB_TYPE_INTRA] = 0;
ai4_mb_type_tex_bits[MB_TYPE_INTER] = i4_total_frame_bits - i4_total_hdr_bits;
/* Set post encode skip to zero */
pi4_is_post_encode_skip[0] = 0;
/* For NLDRC, get the buffer status for stuffing or skipping */
if(irc_get_rc_type(ps_rate_control_api) == CBR_NLDRC)
{
WORD32 i4_get_num_bit_to_prevent_vbv_overflow;
UWORD8 u1_enc_buf_overflow, u1_enc_buf_underflow;
/* Getting the buffer status */
isvce_rc_get_buffer_status(ps_rate_control_api, i4_total_frame_bits, pe_vop_coding_type[0],
&i4_get_num_bit_to_prevent_vbv_overflow, &u1_enc_buf_overflow,
&u1_enc_buf_underflow);
/* We skip the frame if decoder buffer is underflowing. But we never skip
* first I frame */
#if !DISABLE_POST_ENC_SKIP
if((u1_enc_buf_overflow == 1) && (i4_is_first_frame != 1))
// if ((u1_enc_buf_overflow == 1) && (i4_is_first_frame != 0))
{
irc_post_encode_frame_skip(ps_rate_control_api, (picture_type_e) pe_vop_coding_type[0]);
// i4_total_frame_bits = imp4_write_skip_frame_header(ps_enc);
i4_total_frame_bits = 0;
*pi4_is_post_encode_skip = 1;
/* Adjust the GOP if in case we skipped an I-frame */
if(*pe_vop_coding_type == I_PIC) irc_force_I_frame(ps_rate_control_api);
/* Since this frame is skipped by writing 7 bytes header, we say this is a
* P frame */
// *pe_vop_coding_type = P;
/* Getting the buffer status again,to check if it underflows */
irc_get_buffer_status(ps_rate_control_api, i4_total_frame_bits,
(picture_type_e) pe_vop_coding_type[0],
&i4_get_num_bit_to_prevent_vbv_overflow);
}
#endif
#if ENABLE_RE_ENC_AS_SKIP
/* Check for VBV constraints - post encode skip */
if(u1_enc_buf_overflow == 1 && (pe_vop_coding_type[0] != I_PIC))
{
*u1_is_post_enc_skip = 1;
ai4_tot_mb_in_type[MB_TYPE_INTER] += ai4_tot_mb_in_type[MB_TYPE_INTRA];
ai4_tot_mb_in_type[MB_TYPE_INTRA] = 0;
ai4_tot_mb_type_qp[MB_TYPE_INTER] += ai4_tot_mb_type_qp[MB_TYPE_INTRA];
ai4_tot_mb_type_qp[MB_TYPE_INTRA] = 0;
ai4_mb_type_sad[MB_TYPE_INTER] += ai4_mb_type_sad[MB_TYPE_INTRA];
ai4_mb_type_sad[MB_TYPE_INTRA] = 0;
i4_intra_frm_cost = 0;
i4_total_hdr_bits = 0;
i4_total_texturebits = 0;
i4_total_frame_bits = i4_total_hdr_bits + i4_total_texturebits;
ai4_mb_type_tex_bits[MB_TYPE_INTRA] = 0;
ai4_mb_type_tex_bits[MB_TYPE_INTER] = i4_total_frame_bits - i4_total_hdr_bits;
/* Getting the buffer status again,to check if it underflows */
irc_get_buffer_status(ps_rate_control_api, i4_total_frame_bits,
(picture_type_e) pe_vop_coding_type[0],
&i4_get_num_bit_to_prevent_vbv_overflow);
}
#endif
/* In this case we stuff bytes as buffer is overflowing */
if(u1_enc_buf_underflow == 1)
{
/* The stuffing function is directly pulled out from split controller
workspace. encode_vop_data() function makes sure alignment data is
dumped at the end of a frame. Split controller was identifying this
alignment byte, overwriting it with the stuff data and then finally
aligning the buffer. Here every thing is inside the DSP. So, ideally
encode_vop_data needn't align, and we can start stuffing directly. But
in that case, it'll break the logic for a normal frame. Hence for
simplicity, not changing this part since it is ok to align and then
overwrite since stuffing is not done for every frame */
i4_cbr_bits_to_stuff = irc_get_bits_to_stuff(ps_rate_control_api, i4_total_frame_bits,
pe_vop_coding_type[0]);
/* Just add extra 32 bits to make sure we don't stuff lesser */
i4_cbr_bits_to_stuff += 32;
/* We can not stuff more than the outbuf size. So have a check here */
/* Add stuffed bits to total bits */
i4_total_frame_bits += i4_cbr_bits_to_stuff;
}
}
/* If number of intra MBs are more than 2/3rd of total MBs, assume it as a
* scene change */
if((ai4_tot_mb_in_type[MB_TYPE_INTRA] > ((2 * i4_total_mb_in_frame) / 3)) &&
(*pe_vop_coding_type == P_PIC) &&
(ai4_tot_mb_in_type[MB_TYPE_INTRA] > ((11 * (WORD32) u4_num_intra_in_prev_frame) / 10)))
{
u1_is_scd = 1;
}
/* Update num intra mbs of this frame */
if(pi4_is_post_encode_skip[0] == 0)
{
*pi4_num_intra_in_prev_frame = ai4_tot_mb_in_type[MB_TYPE_INTRA];
}
/* Reset intra count to zero, if u encounter an I frame */
if(*pe_vop_coding_type == I_PIC)
{
*pi4_num_intra_in_prev_frame = 0;
}
/* Do an update of rate control after post encode */
irc_update_frame_level_info(ps_rate_control_api, /* RC state */
pe_vop_coding_type[0], /* PIC type */
ai4_mb_type_sad, /* SAD for [Intra/Inter] */
i4_total_frame_bits, /* Total frame bits */
i4_total_hdr_bits, /* header bits for */
ai4_mb_type_tex_bits, /* for MB[Intra/Inter] */
ai4_tot_mb_type_qp, /* for MB[Intra/Inter] */
ai4_tot_mb_in_type, /* for MB[Intra/Inter] */
i4_avg_mb_activity, /* Average mb activity in frame */
u1_is_scd, /* Is a scene change detected */
0, /* Pre encode skip */
(WORD32) i4_intra_frm_cost, /* Intra cost for frame */
0); /* Not done outside */
return (i4_cbr_bits_to_stuff >> 3);
}
/**
*******************************************************************************
*
* @brief Function to update bits consumed info to rate control context
*
* @par Description
* Function to update bits consume info to rate control context
*
* @param[in] ps_frame_info
* Frame info context
*
* @param[in] ps_entropy
* Entropy context
*
* @returns
* total bits consumed by the frame
*
* @remarks
*
*******************************************************************************
*/
void isvce_update_rc_bits_info(frame_info_t *ps_frame_info, void *pv_entropy)
{
isvce_entropy_ctxt_t *ps_entropy = pv_entropy;
ps_frame_info->mb_header_bits[MB_TYPE_INTRA] += ps_entropy->u4_header_bits[MB_TYPE_INTRA];
ps_frame_info->mb_texture_bits[MB_TYPE_INTRA] += ps_entropy->u4_residue_bits[MB_TYPE_INTRA];
ps_frame_info->mb_header_bits[MB_TYPE_INTER] += ps_entropy->u4_header_bits[MB_TYPE_INTER];
ps_frame_info->mb_texture_bits[MB_TYPE_INTER] += ps_entropy->u4_residue_bits[MB_TYPE_INTER];
return;
}

View file

@ -0,0 +1,330 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
*******************************************************************************
* @file
* isvce_rate_control.h
*
* @brief
* This file contains function declarations of api functions for h264 rate
* control
*
* @author
* ittiam
*
* @remarks
* None
*
*******************************************************************************
*/
#ifndef _ISVCE_RATE_CONTROL_H_
#define _ISVCE_RATE_CONTROL_H_
#if ENABLE_RE_ENC_AS_SKIP
#include "isvce_structs.h"
#endif
/*****************************************************************************/
/* Function Declarations */
/*****************************************************************************/
/**
*******************************************************************************
*
* @brief
* This function initializes rate control context and variables
*
* @par Description
* This function initializes rate control type, source and target frame rate,
* average and peak bitrate, intra-inter frame interval and initial
* quantization parameter
*
* @param[in] pv_rc_api
* Handle to rate control api
*
* @param[in] pv_frame_time
* Handle to frame time context
*
* @param[in] pv_time_stamp
* Handle to time stamp context
*
* @param[in] pv_pd_frm_rate
* Handle to pull down frame time context
*
* @param[in] u4_max_frm_rate
* Maximum frame rate
*
* @param[in] u4_src_frm_rate
* Source frame rate
*
* @param[in] u4_tgt_frm_rate
* Target frame rate
*
* @param[in] e_rate_control_type
* Rate control type
*
* @param[in] u4_avg_bit_rate
* Average bit rate
*
* @param[in] u4_peak_bit_rate
* Peak bit rate
*
* @param[in] u4_max_delay
* Maximum delay between frames
*
* @param[in] u4_intra_frame_interval
* Intra frame interval
*
* @param[in] i4_inter_frm_int
* Inter frame interval
*
* @param[in] pu1_init_qp
* Initial qp
*
* @param[in] i4_max_inter_frm_int
* Maximum inter frame interval
*
* @param[in] pu1_min_max_qp
* Array of min/max qp
*
* @param[in] u1_profile_level
* Encoder profile level
*
* @returns none
*
* @remarks
*
*******************************************************************************
*/
void isvce_rc_init(void *pv_rc_api, void *pv_frame_time, void *pv_time_stamp, void *pv_pd_frm_rate,
UWORD32 u4_max_frm_rate, UWORD32 u4_src_frm_rate, UWORD32 u4_tgt_frm_rate,
rc_type_e e_rate_control_type, UWORD32 u4_avg_bit_rate, UWORD32 u4_peak_bit_rate,
UWORD32 u4_max_delay, UWORD32 u4_intra_frame_interval, WORD32 i4_inter_frm_int,
UWORD8 *pu1_init_qp, WORD32 i4_max_inter_frm_int, UWORD8 *pu1_min_max_qp,
UWORD8 u1_profile_level);
/**
*******************************************************************************
*
* @brief Function to get picture details
*
* @par Description
* This function returns the Picture type(I/P/B)
*
* @param[in] pv_rc_api
* Handle to Rate control api
*
* @returns
* Picture type
*
* @remarks none
*
*******************************************************************************
*/
picture_type_e isvce_rc_get_picture_details(void *pv_rc_api, WORD32 *pi4_pic_id,
WORD32 *pi4_pic_disp_order_no);
/**
*******************************************************************************
*
* @brief Function to set frame rate inside RC.
*
* @par Description
* This function is called before encoding the current frame and gets the qp
* for the current frame from rate control module
*
* @param[in] ps_rate_control_api
* Handle to rate control api
*
* @param[in] ps_pd_frm_rate
* Handle to pull down frm rate context
*
* @param[in] ps_time_stamp
* Handle to time stamp context
*
* @param[in] ps_frame_time
* Handle to frame time context
*
* @returns
* Skip or encode the current frame
*
* @remarks
*
*******************************************************************************
*/
WORD32 isvce_update_rc_framerates(void *ps_rate_control_api, void *ps_pd_frm_rate,
void *ps_time_stamp, void *ps_frame_time);
/**
*******************************************************************************
*
* @brief Function to update mb info for rate control context
*
* @par Description
* After encoding a mb, information such as mb type, qp used, mb distortion
* resulted in encoding the block and so on needs to be preserved for modelling
* RC. This is preserved via this function call.
*
* @param[in] ps_frame_info
* Handle Frame info context
*
* @param[in] ps_proc
* Process context
*
* @returns
*
* @remarks
*
*******************************************************************************
*/
void isvce_update_rc_mb_info(frame_info_t *ps_frame_info, void *pv_proc);
/**
*******************************************************************************
*
* @brief Function to get rate control buffer status
*
* @par Description
* This function is used to get buffer status(underflow/overflow) by rate
* control module
*
* @param[in] pv_rc_api
* Handle to rate control api context
*
* @param[in] i4_total_frame_bits
* Total frame bits
*
* @param[in] u1_pic_type
* Picture type
*
* @param[in] pi4_num_bits_to_prevent_vbv_underflow
* Number of bits to prevent underflow
*
* @param[out] pu1_is_enc_buf_overflow
* Buffer overflow indication flag
*
* @param[out] pu1_is_enc_buf_underflow
* Buffer underflow indication flag
*
* @returns
*
* @remarks
*
*******************************************************************************
*/
void isvce_rc_get_buffer_status(void *pv_rc_api, WORD32 i4_total_frame_bits,
picture_type_e e_pic_type,
WORD32 *pi4_num_bits_to_prevent_vbv_underflow,
UWORD8 *pu1_is_enc_buf_overflow, UWORD8 *pu1_is_enc_buf_underflow);
/**
*******************************************************************************
*
* @brief Function to update rate control module after encoding
*
* @par Description
* This function is used to update the rate control module after the current
* frame encoding is done with details such as bits consumed, SAD for I/P/B,
* intra cost ,mb type and other
*
* @param[in] ps_rate_control_api
* Handle to rate control api context
*
* @param[in] ps_frame_info
* Handle to frame info context
*
* @param[in] ps_pd_frm_rate
* Handle to pull down frame rate context
*
* @param[in] ps_time_stamp
* Handle to time stamp context
*
* @param[in] ps_frame_time
* Handle to frame time context
*
* @param[in] i4_total_mb_in_frame
* Total mb in frame
*
* @param[in] pe_vop_coding_type
* Picture coding type
*
* @param[in] i4_is_first_frame
* Is first frame
*
* @param[in] pi4_is_post_encode_skip
* Post encoding skip flag
*
* @param[in] u1_frame_qp
* Frame qp
*
* @param[in] pi4_num_intra_in_prev_frame
* Number of intra mbs in previous frame
*
* @param[in] pi4_avg_activity
* Average activity
*
* @returns
*
* @remarks
*
*******************************************************************************
*/
#if ENABLE_RE_ENC_AS_SKIP
WORD32 isvce_rc_post_enc(void *ps_rate_control_api, frame_info_t *ps_frame_info,
void *ps_pd_frm_rate, void *ps_time_stamp, void *ps_frame_time,
WORD32 i4_total_mb_in_frame, picture_type_e *pe_vop_coding_type,
WORD32 i4_is_first_frame, WORD32 *pi4_is_post_encode_skip,
UWORD8 u1_frame_qp, WORD32 *pi4_num_intra_in_prev_frame,
WORD32 *pi4_avg_activity, UWORD8 *u1_is_post_enc_skip);
#else
WORD32 isvce_rc_post_enc(void *ps_rate_control_api, frame_info_t *ps_frame_info,
void *ps_pd_frm_rate, void *ps_time_stamp, void *ps_frame_time,
WORD32 i4_total_mb_in_frame, picture_type_e *pe_vop_coding_type,
WORD32 i4_is_first_frame, WORD32 *pi4_is_post_encode_skip,
UWORD8 u1_frame_qp, WORD32 *pi4_num_intra_in_prev_frame,
WORD32 *pi4_avg_activity);
#endif
/**
*******************************************************************************
*
* @brief Function to update bits consumed info to rate control context
*
* @par Description
* Function to update bits consume info to rate control context
*
* @param[in] ps_frame_info
* Frame info context
*
* @param[in] ps_entropy
* Entropy context
*
* @returns
* total bits consumed by the frame
*
* @remarks
*
*******************************************************************************
*/
void isvce_update_rc_bits_info(frame_info_t *ps_frame_info, void *pv_entropy);
#endif

View file

@ -0,0 +1,325 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
******************************************************************************
* @file
* isvce_rc_mem_interface.c
*
* @brief
* This file contains api function definitions for rate control memtabs
*
* @author
* ittiam
*
* List of Functions
* - fill_memtab()
* - use_or_fill_base()
* - isvce_map_rc_mem_recs_to_itt_api()
* - isvce_map_itt_mem_rec_to_rc_mem_rec()
* - isvce_get_rate_control_mem_tab()
*
* @remarks
* None
*
*******************************************************************************
*/
/*****************************************************************************/
/* File Includes */
/*****************************************************************************/
/* System include files */
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <assert.h>
#include <stdarg.h>
#include <math.h>
/* User Include Files */
#include "ih264e_config.h"
#include "ih264_typedefs.h"
#include "ih264_size_defs.h"
#include "iv2.h"
#include "ive2.h"
#include "ime_distortion_metrics.h"
#include "ime_defs.h"
#include "ime_structs.h"
#include "isvce.h"
#include "ithread.h"
#include "isvc_defs.h"
#include "ih264_debug.h"
#include "isvc_macros.h"
#include "ih264_platform_macros.h"
#include "ih264_error.h"
#include "isvc_structs.h"
#include "isvc_trans_quant_itrans_iquant.h"
#include "isvc_inter_pred_filters.h"
#include "isvc_mem_fns.h"
#include "ih264_padding.h"
#include "ih264_intra_pred_filters.h"
#include "ih264_deblk_edge_filters.h"
#include "isvc_common_tables.h"
#include "ih264_list.h"
#include "isvc_cabac_tables.h"
#include "ih264e_error.h"
#include "isvce_defs.h"
#include "ih264e_bitstream.h"
#include "irc_cntrl_param.h"
#include "irc_frame_info_collector.h"
#include "isvce_rate_control.h"
#include "isvce_cabac_structs.h"
#include "isvce_structs.h"
#include "ih264e_master.h"
#include "ih264_buf_mgr.h"
#include "ih264_dpb_mgr.h"
#include "isvce_utils.h"
#include "ih264e_platform_macros.h"
#include "ih264_cavlc_tables.h"
#include "ih264e_statistics.h"
#include "ih264e_trace.h"
#include "ih264e_fmt_conv.h"
#include "isvce_cavlc.h"
#include "ih264e_rc_mem_interface.h"
#include "isvce_rc_mem_interface.h"
#include "ih264e_time_stamp.h"
#include "irc_common.h"
#include "irc_rd_model.h"
#include "irc_est_sad.h"
#include "irc_fixed_point_error_bits.h"
#include "irc_vbr_storage_vbv.h"
#include "irc_picture_type.h"
#include "irc_bit_allocation.h"
#include "irc_mb_model_based.h"
#include "irc_cbr_buffer_control.h"
#include "irc_vbr_str_prms.h"
#include "irc_rate_control_api.h"
#include "irc_rate_control_api_structs.h"
#include "ih264e_modify_frm_rate.h"
/*****************************************************************************/
/* Function Definitions */
/*****************************************************************************/
/**
******************************************************************************
*
* @brief This function maps rc mem records structure to encoder lib mem records
* structure
*
* @par Description
* This function maps rc mem records structure to encoder lib mem records
* structure
*
* @param[in] ps_mem
* pointer to encoder lib mem records
*
* @param[in] rc_memtab
* pointer to rc mem records
*
* @param[in] num_mem_recs
* number of memory records
*
* @return void
*
******************************************************************************
*/
static void isvce_map_rc_mem_recs_to_itt_api(iv_mem_rec_t *ps_mem, itt_memtab_t *rc_memtab,
UWORD32 num_mem_recs)
{
UWORD32 j;
UWORD32 Size, align;
for(j = 0; j < num_mem_recs; j++)
{
Size = rc_memtab->u4_size;
align = rc_memtab->i4_alignment;
/* we always ask for external persistent cacheable memory */
FILL_MEMTAB(ps_mem, j, Size, align, IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM);
rc_memtab++;
}
}
/**
*******************************************************************************
*
* @brief This function maps encoder lib mem records structure to RC memory
* records structure
*
* @par Description
* This function maps encoder lib mem records structure to RC memory
* records structure
*
* @param[in] ps_mem
* pointer to encoder lib mem records
*
* @param[in] rc_memtab
* pointer to rc mem records
*
* @param[in] num_mem_recs
* Number of memory records
* @returns none
*
* @remarks
*
*******************************************************************************
*/
static void isvce_map_itt_mem_rec_to_rc_mem_rec(iv_mem_rec_t *ps_mem, itt_memtab_t *rc_memtab,
UWORD32 num_mem_recs)
{
UWORD32 i;
for(i = 0; i < num_mem_recs; i++)
{
rc_memtab->i4_alignment = ps_mem->u4_mem_alignment;
rc_memtab->u4_size = ps_mem->u4_mem_size;
rc_memtab->pv_base = ps_mem->pv_base;
/* only DDR memory is available */
rc_memtab->e_mem_region = DDR;
rc_memtab->e_usage = PERSISTENT;
rc_memtab++;
ps_mem++;
}
}
/**
******************************************************************************
*
* @brief Get memtabs for rate control
*
* @par Description
* This routine is used to Get/init memtabs for rate control
*
* @param[in] pv_rate_control
* pointer to rate control context (handle)
*
* @param[in] ps_mem
* pointer to encoder lib mem records
*
* @param[in] e_func_type
* enum that dictates fill memory records or Init memory records
*
* @return total number of mem records
*
******************************************************************************
*/
WORD32 isvce_get_rate_control_mem_tab(void *pv_rate_control, iv_mem_rec_t *ps_mem,
ITT_FUNC_TYPE_E e_func_type)
{
itt_memtab_t as_itt_memtab[NUM_SVCE_RC_MEMTABS];
WORD32 i4_num_memtab = 0, j = 0;
void *refptr2[RC_MEM_CNT];
void **refptr1[RC_MEM_CNT];
isvce_rate_control_ctxt_t *ps_rate_control = pv_rate_control;
for(j = 0; j < RC_MEM_CNT; j++) refptr1[j] = &(refptr2[j]);
j = 0;
if(e_func_type == USE_BASE || e_func_type == FILL_BASE)
{
refptr1[RC_MEM_FRAME_TIME] = &ps_rate_control->pps_frame_time;
refptr1[RC_MEM_TIME_STAMP] = &ps_rate_control->pps_time_stamp;
refptr1[RC_MEM_FRAME_RATE] = &ps_rate_control->pps_pd_frm_rate;
refptr1[RC_MEM_API_L0] = &ps_rate_control->apps_rate_control_api[0];
refptr1[RC_MEM_API_L1] = &ps_rate_control->apps_rate_control_api[1];
refptr1[RC_MEM_API_L2] = &ps_rate_control->apps_rate_control_api[2];
}
/* Get the total number of memtabs used by Frame time Module */
i4_num_memtab = ih264e_frame_time_get_init_free_memtab(
(frame_time_t **) refptr1[RC_MEM_FRAME_TIME], NULL, GET_NUM_MEMTAB);
/* Few extra steps during init */
isvce_map_itt_mem_rec_to_rc_mem_rec((&ps_mem[j]), as_itt_memtab + j, i4_num_memtab);
/* Fill the memtabs used by Frame time Module */
i4_num_memtab = ih264e_frame_time_get_init_free_memtab(
(frame_time_t **) refptr1[RC_MEM_FRAME_TIME], as_itt_memtab + j, e_func_type);
/* Mapping ittiam memtabs to App. memtabs */
isvce_map_rc_mem_recs_to_itt_api((&ps_mem[j]), as_itt_memtab + j, i4_num_memtab);
j += i4_num_memtab;
/* Get the total number of memtabs used by Time stamp Module */
i4_num_memtab = ih264e_time_stamp_get_init_free_memtab(
(time_stamp_t **) refptr1[RC_MEM_TIME_STAMP], NULL, GET_NUM_MEMTAB);
/* Few extra steps during init */
isvce_map_itt_mem_rec_to_rc_mem_rec((&ps_mem[j]), as_itt_memtab + j, i4_num_memtab);
/* Fill the memtabs used by Time Stamp Module */
i4_num_memtab = ih264e_time_stamp_get_init_free_memtab(
(time_stamp_t **) refptr1[RC_MEM_TIME_STAMP], as_itt_memtab + j, e_func_type);
/* Mapping ittiam memtabs to App. memtabs */
isvce_map_rc_mem_recs_to_itt_api((&ps_mem[j]), as_itt_memtab + j, i4_num_memtab);
j += i4_num_memtab;
/* Get the total number of memtabs used by Frame rate Module */
i4_num_memtab = ih264e_pd_frm_rate_get_init_free_memtab(
(pd_frm_rate_t **) refptr1[RC_MEM_FRAME_RATE], NULL, GET_NUM_MEMTAB);
/* Few extra steps during init */
isvce_map_itt_mem_rec_to_rc_mem_rec((&ps_mem[j]), as_itt_memtab + j, i4_num_memtab);
/* Fill the memtabs used by Frame Rate Module */
i4_num_memtab = ih264e_pd_frm_rate_get_init_free_memtab(
(pd_frm_rate_t **) refptr1[RC_MEM_FRAME_RATE], as_itt_memtab + j, e_func_type);
/* Mapping ittiam memtabs to App. memtabs */
isvce_map_rc_mem_recs_to_itt_api((&ps_mem[j]), as_itt_memtab + j, i4_num_memtab);
j += i4_num_memtab;
/* Get the total number of memtabs used by Rate Controller */
i4_num_memtab = irc_rate_control_num_fill_use_free_memtab(
(rate_control_api_t **) refptr1[RC_MEM_API_L0], NULL, GET_NUM_MEMTAB);
/* Few extra steps during init */
isvce_map_itt_mem_rec_to_rc_mem_rec((&ps_mem[j]), as_itt_memtab + j, i4_num_memtab);
/* Fill the memtabs used by Rate Controller */
i4_num_memtab = irc_rate_control_num_fill_use_free_memtab(
(rate_control_api_t **) refptr1[RC_MEM_API_L0], as_itt_memtab + j, e_func_type);
/* Mapping ittiam memtabs to App. memtabs */
isvce_map_rc_mem_recs_to_itt_api((&ps_mem[j]), as_itt_memtab + j, i4_num_memtab);
j += i4_num_memtab;
/* Get the total number of memtabs used by Rate Controller */
i4_num_memtab = irc_rate_control_num_fill_use_free_memtab(
(rate_control_api_t **) refptr1[RC_MEM_API_L1], NULL, GET_NUM_MEMTAB);
/* Few extra steps during init */
isvce_map_itt_mem_rec_to_rc_mem_rec((&ps_mem[j]), as_itt_memtab + j, i4_num_memtab);
/* Fill the memtabs used by Rate Controller */
i4_num_memtab = irc_rate_control_num_fill_use_free_memtab(
(rate_control_api_t **) refptr1[RC_MEM_API_L1], as_itt_memtab + j, e_func_type);
/* Mapping ittiam memtabs to App. memtabs */
isvce_map_rc_mem_recs_to_itt_api((&ps_mem[j]), as_itt_memtab + j, i4_num_memtab);
j += i4_num_memtab;
/* Get the total number of memtabs used by Rate Controller */
i4_num_memtab = irc_rate_control_num_fill_use_free_memtab(
(rate_control_api_t **) refptr1[RC_MEM_API_L2], NULL, GET_NUM_MEMTAB);
/* Few extra steps during init */
isvce_map_itt_mem_rec_to_rc_mem_rec((&ps_mem[j]), as_itt_memtab + j, i4_num_memtab);
/* Fill the memtabs used by Rate Controller */
i4_num_memtab = irc_rate_control_num_fill_use_free_memtab(
(rate_control_api_t **) refptr1[RC_MEM_API_L2], as_itt_memtab + j, e_func_type);
/* Mapping ittiam memtabs to App. memtabs */
isvce_map_rc_mem_recs_to_itt_api((&ps_mem[j]), as_itt_memtab + j, i4_num_memtab);
j += i4_num_memtab;
return j; /* Total MemTabs Needed by Rate Control Module */
}

View file

@ -0,0 +1,77 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
******************************************************************************
* @file
* isvce_rc_mem_interface.h
*
* @brief
* This file contains function declaration and structures for rate control
* memtabs
*
* @author
* ittiam
*
* @remarks
* The rate control library is a global library across various codecs. It
* anticipates certain structures definitions. Those definitions are to be
* imported from global workspace. Instead of that, the structures needed for
* rc library are copied in to this file and exported to rc library. If the
* structures / enums / ... in the global workspace change, this file also needs
* to be modified accordingly.
*
******************************************************************************
*/
#ifndef _ISVCE_RC_MEM_INTERFACE_H_
#define _ISVCE_RC_MEM_INTERFACE_H_
#include "ih264e_rc_mem_interface.h"
/**
***************************************************************************
* Enum to hold mem records in RC
****************************************************************************
*/
typedef enum RC_MEM_TYPES_T
{
RC_MEM_FRAME_TIME,
RC_MEM_TIME_STAMP,
RC_MEM_FRAME_RATE,
RC_MEM_API_L0,
RC_MEM_API_L1,
RC_MEM_API_L2,
RC_MEM_CNT
/*
* Do not add anything below
*/
} RC_MEM_TYPES_T;
extern WORD32 isvce_get_rate_control_mem_tab(void *pv_rate_control, iv_mem_rec_t *ps_mem,
ITT_FUNC_TYPE_E e_func_type);
#endif

View file

@ -0,0 +1,286 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
*******************************************************************************
* @file
* isvce_rc_utils.c
*
* @brief
* Contains get gpp function required by the SVC encoder
*
* @author
* ittiam
*
* @par List of Functions:
* - isvce_get_gpp()
* - isvce_rc_utils_init()
* - isvce_get_rc_utils_data_size()
* - isvce_compute_gpp()
* - isvce_get_gpp_function_selector()
*
* @remarks
* None
*
*******************************************************************************
*/
#include "ih264_typedefs.h"
#include "ih264_macros.h"
#include "isvc_structs.h"
#include "isvce_rc_utils.h"
#include "isvce_rc_utils_private_defs.h"
/**
*******************************************************************************
*
* @brief
* get gpp function
*
* @par Description:
* computes gradient per pixel value for a given frame
*
* @param[in] ps_input_buf
* pointer to yuv buffer properties
*
* @returns
* calculated gpp value
*
* @remarks
* none
*
*******************************************************************************
*/
static DOUBLE isvce_get_gpp(yuv_buf_props_t *ps_input_buf)
{
UWORD32 i, j;
DOUBLE d_gpp_y = 0;
DOUBLE d_gpp_u = 0;
DOUBLE d_gpp_v = 0;
DOUBLE d_gpp = 0;
UWORD32 u4_width = ps_input_buf->u4_width;
UWORD32 u4_height = ps_input_buf->u4_height;
UWORD8 *pu1_input_buf = (UWORD8 *) ps_input_buf->as_component_bufs[0].pv_data;
WORD32 i4_input_stride = ps_input_buf->as_component_bufs[0].i4_data_stride;
for(i = 0; i < u4_height - 1; i++)
{
for(j = 0; j < u4_width - 1; j++)
{
UWORD8 u1_cur_pix = pu1_input_buf[j];
UWORD8 u1_bot_pix = pu1_input_buf[i4_input_stride + j];
UWORD8 u1_right_pix = pu1_input_buf[j + 1];
d_gpp_y += (ABS(u1_cur_pix - u1_bot_pix) + ABS(u1_cur_pix - u1_right_pix));
}
pu1_input_buf += i4_input_stride;
}
pu1_input_buf = (UWORD8 *) ps_input_buf->as_component_bufs[1].pv_data;
i4_input_stride = ps_input_buf->as_component_bufs[1].i4_data_stride;
for(i = 0; i < (u4_height >> 1) - 1; i++)
{
for(j = 0; j < u4_width - 2; j += 2)
{
UWORD8 u1_cur_pix = pu1_input_buf[j];
UWORD8 u1_bot_pix = pu1_input_buf[i4_input_stride + j];
UWORD8 u1_right_pix = pu1_input_buf[j + 2];
d_gpp_u += (ABS(u1_cur_pix - u1_bot_pix) + ABS(u1_cur_pix - u1_right_pix));
u1_cur_pix = pu1_input_buf[j + 1];
u1_bot_pix = pu1_input_buf[i4_input_stride + j + 1];
u1_right_pix = pu1_input_buf[j + 2 + 1];
d_gpp_v += (ABS(u1_cur_pix - u1_bot_pix) + ABS(u1_cur_pix - u1_right_pix));
}
pu1_input_buf += i4_input_stride;
}
d_gpp_y /= (u4_width * u4_height);
d_gpp_u /= ((u4_width >> 1) * (u4_height >> 1));
d_gpp_v /= ((u4_width >> 1) * (u4_height >> 1));
d_gpp = (DOUBLE) ((4 * d_gpp_y) + d_gpp_u + d_gpp_v) / 6;
return d_gpp;
}
/**
*******************************************************************************
*
* @brief
* gets the memory size required for compute gpp
*
* @par Description:
* returns the memory required by the rc utils context and state structs
* for allocation.
*
* @returns
*
* @remarks
*
*
*******************************************************************************
*/
UWORD32 isvce_get_rc_utils_data_size() { return sizeof(svc_rc_utils_state_t); }
/**
*******************************************************************************
*
* @brief
* compute gpp process
*
* @par Description:
* calls the function to compute gpp
*
* @param[in] ps_svc_rc_utils_ctxt
* pointer to svc rc utils context
*
* @param[in] ps_input_buf
* pointer to yuv buffer properties
*
* @returns
* calculated gpp value
*
* @remarks
* none
*
*******************************************************************************
*/
DOUBLE isvce_compute_gpp(svc_rc_utils_ctxt_t *ps_svc_rc_utils_ctxt, yuv_buf_props_t *ps_input_buf)
{
svc_rc_utils_state_t *ps_rc_utils_state =
(svc_rc_utils_state_t *) ps_svc_rc_utils_ctxt->pv_rc_utils_state;
return ps_rc_utils_state->pf_get_gpp(ps_input_buf);
}
/**
*******************************************************************************
*
* @brief
* selects which function to call for get gpp based on e_arch
*
* @par Description:
*
* @param[in] ps_rc_utils_state
* pointer to svc rc utils state
*
* @param[in] e_arch
* architecure type
*
* @returns
*
* @remarks
*
*******************************************************************************
*/
static void isvce_get_gpp_function_selector(svc_rc_utils_state_t *ps_rc_utils_state,
IV_ARCH_T e_arch)
{
switch(e_arch)
{
#if defined(X86)
case ARCH_X86_SSE42:
{
ps_rc_utils_state->pf_get_gpp = isvce_get_gpp_sse42;
break;
}
#elif defined(ARMV8)
case ARCH_ARM_A53:
case ARCH_ARM_A57:
case ARCH_ARM_V8_NEON:
{
ps_rc_utils_state->pf_get_gpp = isvce_get_gpp_neon;
break;
}
#elif !defined(DISABLE_NEON)
case ARCH_ARM_A9Q:
case ARCH_ARM_A9A:
case ARCH_ARM_A9:
case ARCH_ARM_A7:
case ARCH_ARM_A5:
case ARCH_ARM_A15:
{
ps_rc_utils_state->pf_get_gpp = isvce_get_gpp_neon;
break;
}
#endif
default:
{
ps_rc_utils_state->pf_get_gpp = isvce_get_gpp;
break;
}
}
}
/**
*******************************************************************************
*
* @brief
* initializes the rc utils context
*
* @par Description:
* initializes the rc utils context
*
* @param[in] ps_svc_rc_utils_ctxt
* pointer to svc rc utils context
*
* @param[in] ps_mem_rec
* pointer to memory allocated to compute gpp process
*
* @param[in] e_arch
* architecure type
*
* @returns
*
* @remarks
* none
*
*******************************************************************************
*/
void isvce_rc_utils_init(svc_rc_utils_ctxt_t *ps_svc_rc_utils_ctxt, iv_mem_rec_t *ps_mem_rec,
IV_ARCH_T e_arch)
{
svc_rc_utils_state_t *ps_rc_utils_state;
UWORD8 *pu1_buf = (UWORD8 *) ps_mem_rec->pv_base;
ps_rc_utils_state = (svc_rc_utils_state_t *) pu1_buf;
ps_svc_rc_utils_ctxt->pv_rc_utils_state = ps_rc_utils_state;
isvce_get_gpp_function_selector(ps_rc_utils_state, e_arch);
}

View file

@ -0,0 +1,134 @@
/******************************************************************************
*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
*******************************************************************************
* @file
* isvce_rc_utils.h
*
* @brief
* Contains get gpp function required by the SVC encoder
*
* @author
* ittiam
*
* @par List of Functions:
* - isvce_rc_utils_init()
* - isvce_get_rc_utils_data_size()
* - isvce_compute_gpp()
*
* @remarks
* None
*
*******************************************************************************
*/
#ifndef _ISVCE_RC_UTILS_H_
#define _ISVCE_RC_UTILS_H_
#include "ih264_typedefs.h"
#include "isvc_structs.h"
typedef struct
{
/**
* pointer to the state of rc utils
*/
void *pv_rc_utils_state;
} svc_rc_utils_ctxt_t;
/**
*******************************************************************************
*
* @brief
* initializes the rc utils context
*
* @par Description:
* initializes the rc utils context
*
* @param[in] ps_svc_rc_utils_ctxt
* pointer to svc rc utils context
*
* @param[in] ps_mem_rec
* pointer to memory allocated to compute gpp process
*
* @param[in] e_arch
* architecure type
*
* @returns
*
* @remarks
* none
*
*******************************************************************************
*/
extern void isvce_rc_utils_init(svc_rc_utils_ctxt_t *ps_svc_rc_utils_ctxt, iv_mem_rec_t *ps_mem_rec,
IV_ARCH_T e_arch);
/**
*******************************************************************************
*
* @brief
* gets the memory size required for compute gpp
*
* @par Description:
* returns the memory required by the rc utils context and state structs
* for allocation.
*
* @returns
*
* @remarks
*
*
*******************************************************************************
*/
extern UWORD32 isvce_get_rc_utils_data_size();
/**
*******************************************************************************
*
* @brief
* compute gpp process
*
* @par Description:
* calls the function to compute gpp
*
* @param[in] ps_svc_rc_utils_ctxt
* pointer to svc rc utils context
*
* @param[in] ps_input_buf
* pointer to yuv buffer properties
*
* @returns
* calculated gpp value
*
* @remarks
* none
*
*******************************************************************************
*/
extern DOUBLE isvce_compute_gpp(svc_rc_utils_ctxt_t *ps_svc_rc_utils_ctxt,
yuv_buf_props_t *ps_input_buf);
#endif

Some files were not shown because too many files have changed in this diff Show more