mirror of
https://github.com/ittiam-systems/libavc.git
synced 2026-04-02 20:30:48 +07:00
Encoder: SVC encoding support added
Added support for encoding 'Scalable Baseline' profile, corresponding to profile_idc of 83 in 'Rec. ITU-T H.264 (11/2007)'. Bug: 248891908 Test: svcenc -c enc.cfg Change-Id: Ib12ca4c4a8c0e674738ae2af01558a08cefe0929
This commit is contained in:
parent
da77ac1a5f
commit
bb0f31cb6b
132 changed files with 85450 additions and 154 deletions
348
Android.bp
348
Android.bp
|
|
@ -35,6 +35,17 @@ cc_library_headers {
|
|||
min_sdk_version: "29",
|
||||
}
|
||||
|
||||
cc_library_headers {
|
||||
name: "libsvcenc_headers",
|
||||
export_include_dirs: [
|
||||
"common",
|
||||
"common/svc",
|
||||
"encoder",
|
||||
"encoder/svc"
|
||||
],
|
||||
min_sdk_version: "29",
|
||||
}
|
||||
|
||||
cc_library_headers {
|
||||
name: "libavcenc_headers",
|
||||
export_include_dirs: [
|
||||
|
|
@ -44,6 +55,106 @@ cc_library_headers {
|
|||
min_sdk_version: "29",
|
||||
}
|
||||
|
||||
cc_defaults {
|
||||
name: "libavc_enc_defaults",
|
||||
vendor_available: true,
|
||||
host_supported: true,
|
||||
shared_libs: [
|
||||
"liblog",
|
||||
"libcutils",
|
||||
],
|
||||
cflags: [
|
||||
"-DNDEBUG",
|
||||
"-UHP_PL",
|
||||
"-DN_MB_ENABLE",
|
||||
"-fPIC",
|
||||
"-O3",
|
||||
"-Wall",
|
||||
"-Werror",
|
||||
"-Wno-error=constant-conversion",
|
||||
],
|
||||
arch: {
|
||||
arm: {
|
||||
local_include_dirs: [
|
||||
"common/arm",
|
||||
"encoder/arm",
|
||||
],
|
||||
|
||||
cflags: [
|
||||
"-DARM",
|
||||
// These will be overriden by armv7_a_neon
|
||||
"-DDISABLE_NEON",
|
||||
],
|
||||
|
||||
neon: {
|
||||
cflags: [
|
||||
"-UDISABLE_NEON",
|
||||
],
|
||||
},
|
||||
},
|
||||
|
||||
arm64: {
|
||||
cflags: [
|
||||
"-DARMV8",
|
||||
"-DARM",
|
||||
],
|
||||
local_include_dirs: [
|
||||
"common/arm",
|
||||
"common/armv8",
|
||||
"encoder/arm",
|
||||
"encoder/armv8",
|
||||
],
|
||||
},
|
||||
|
||||
riscv64: {
|
||||
local_include_dirs: [
|
||||
"common/riscv",
|
||||
"encoder/riscv",
|
||||
],
|
||||
},
|
||||
|
||||
x86: {
|
||||
cflags: [
|
||||
"-DX86",
|
||||
"-msse4.2",
|
||||
],
|
||||
|
||||
local_include_dirs: [
|
||||
"encoder/x86",
|
||||
"common/x86",
|
||||
],
|
||||
},
|
||||
|
||||
x86_64: {
|
||||
cflags: [
|
||||
"-DX86",
|
||||
"-msse4.2",
|
||||
],
|
||||
|
||||
local_include_dirs: [
|
||||
"encoder/x86",
|
||||
"common/x86",
|
||||
],
|
||||
},
|
||||
},
|
||||
|
||||
sanitize: {
|
||||
integer_overflow: true,
|
||||
misc_undefined: ["bounds"],
|
||||
cfi: true,
|
||||
config: {
|
||||
cfi_assembly_support: true,
|
||||
},
|
||||
blocklist: "libavc_blocklist.txt",
|
||||
},
|
||||
|
||||
apex_available: [
|
||||
"//apex_available:platform", //due to libstagefright_soft_avcenc
|
||||
"com.android.media.swcodec",
|
||||
],
|
||||
min_sdk_version: "29",
|
||||
}
|
||||
|
||||
cc_defaults {
|
||||
name: "libavc_mvc_dec_defaults",
|
||||
cflags: [
|
||||
|
|
@ -349,24 +460,7 @@ cc_library_static {
|
|||
|
||||
cc_library_static {
|
||||
name: "libavcenc",
|
||||
vendor_available: true,
|
||||
host_supported: true,
|
||||
shared_libs: [
|
||||
"liblog",
|
||||
"libcutils",
|
||||
],
|
||||
|
||||
cflags: [
|
||||
"-DNDEBUG",
|
||||
"-UHP_PL",
|
||||
"-DN_MB_ENABLE",
|
||||
"-fPIC",
|
||||
|
||||
"-O3",
|
||||
"-Wall",
|
||||
"-Werror",
|
||||
"-Wno-error=constant-conversion",
|
||||
],
|
||||
defaults: ["libavc_enc_defaults"],
|
||||
|
||||
export_include_dirs: [
|
||||
"common",
|
||||
|
|
@ -435,23 +529,11 @@ cc_library_static {
|
|||
|
||||
arch: {
|
||||
arm: {
|
||||
local_include_dirs: [
|
||||
"encoder/arm",
|
||||
"common/arm",
|
||||
],
|
||||
|
||||
srcs: [
|
||||
"encoder/arm/ih264e_function_selector.c",
|
||||
"common/arm/ih264_arm_memory_barrier.s",
|
||||
],
|
||||
|
||||
cflags: [
|
||||
"-DARM",
|
||||
|
||||
// This will be overriden by armv7_a_neon
|
||||
"-DDISABLE_NEON",
|
||||
],
|
||||
|
||||
neon: {
|
||||
srcs: [
|
||||
"encoder/arm/ih264e_function_selector_a9q.c",
|
||||
|
|
@ -479,25 +561,10 @@ cc_library_static {
|
|||
"encoder/arm/ih264e_fmt_conv.s",
|
||||
"encoder/arm/ime_distortion_metrics_a9q.s",
|
||||
],
|
||||
|
||||
cflags: [
|
||||
"-UDISABLE_NEON",
|
||||
],
|
||||
},
|
||||
},
|
||||
|
||||
arm64: {
|
||||
cflags: [
|
||||
"-DARMV8",
|
||||
"-DARM",
|
||||
],
|
||||
|
||||
local_include_dirs: [
|
||||
"encoder/arm",
|
||||
"encoder/armv8",
|
||||
"common/armv8",
|
||||
],
|
||||
|
||||
srcs: [
|
||||
"encoder/arm/ih264e_function_selector.c",
|
||||
"encoder/arm/ih264e_function_selector_av8.c",
|
||||
|
|
@ -525,27 +592,12 @@ cc_library_static {
|
|||
},
|
||||
|
||||
riscv64: {
|
||||
local_include_dirs: [
|
||||
"common/riscv",
|
||||
"encoder/riscv",
|
||||
],
|
||||
|
||||
srcs: [
|
||||
"encoder/riscv/ih264e_function_selector.c",
|
||||
],
|
||||
},
|
||||
|
||||
x86: {
|
||||
cflags: [
|
||||
"-DX86",
|
||||
"-msse4.2",
|
||||
],
|
||||
|
||||
local_include_dirs: [
|
||||
"encoder/x86",
|
||||
"common/x86",
|
||||
],
|
||||
|
||||
srcs: [
|
||||
"encoder/x86/ih264e_function_selector.c",
|
||||
"encoder/x86/ih264e_function_selector_sse42.c",
|
||||
|
|
@ -571,16 +623,6 @@ cc_library_static {
|
|||
},
|
||||
|
||||
x86_64: {
|
||||
cflags: [
|
||||
"-DX86",
|
||||
"-msse4.2",
|
||||
],
|
||||
|
||||
local_include_dirs: [
|
||||
"encoder/x86",
|
||||
"common/x86",
|
||||
],
|
||||
|
||||
srcs: [
|
||||
"encoder/x86/ih264e_function_selector.c",
|
||||
"encoder/x86/ih264e_function_selector_sse42.c",
|
||||
|
|
@ -605,21 +647,161 @@ cc_library_static {
|
|||
],
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
sanitize: {
|
||||
integer_overflow: true,
|
||||
misc_undefined: ["bounds"],
|
||||
cfi: true,
|
||||
config: {
|
||||
cfi_assembly_support: true,
|
||||
},
|
||||
blocklist: "libavc_blocklist.txt",
|
||||
},
|
||||
apex_available: [
|
||||
"//apex_available:platform", //due to libstagefright_soft_avcenc
|
||||
"com.android.media.swcodec",
|
||||
cc_library_static {
|
||||
name: "libsvcenc",
|
||||
defaults: ["libavc_enc_defaults"],
|
||||
whole_static_libs: [
|
||||
"libavcenc",
|
||||
],
|
||||
min_sdk_version: "29",
|
||||
|
||||
export_include_dirs: [
|
||||
"common",
|
||||
"common/svc",
|
||||
"encoder",
|
||||
"encoder/svc",
|
||||
],
|
||||
|
||||
srcs: [
|
||||
"common/svc/isvc_cabac_tables.c",
|
||||
"common/svc/isvc_common_tables.c",
|
||||
"common/svc/isvc_intra_resample.c",
|
||||
"common/svc/isvc_iquant_itrans_recon.c",
|
||||
"common/svc/isvc_mem_fns.c",
|
||||
"common/svc/isvc_resi_trans_quant.c",
|
||||
"encoder/svc/irc_svc_rate_control_api.c",
|
||||
"encoder/svc/isvce_api.c",
|
||||
"encoder/svc/isvce_cabac.c",
|
||||
"encoder/svc/isvce_cabac_encode.c",
|
||||
"encoder/svc/isvce_cabac_init.c",
|
||||
"encoder/svc/isvce_cavlc.c",
|
||||
"encoder/svc/isvce_core_coding.c",
|
||||
"encoder/svc/isvce_deblk.c",
|
||||
"encoder/svc/isvce_downscaler.c",
|
||||
"encoder/svc/isvce_encode.c",
|
||||
"encoder/svc/isvce_encode_header.c",
|
||||
"encoder/svc/isvce_fmt_conv.c",
|
||||
"encoder/svc/isvce_function_selector_generic.c",
|
||||
"encoder/svc/isvce_globals.c",
|
||||
"encoder/svc/isvce_ibl_eval.c",
|
||||
"encoder/svc/isvce_ilp_mv.c",
|
||||
"encoder/svc/isvce_intra_modes_eval.c",
|
||||
"encoder/svc/isvce_mc.c",
|
||||
"encoder/svc/isvce_me.c",
|
||||
"encoder/svc/isvce_mode_stat_visualiser.c",
|
||||
"encoder/svc/isvce_nalu_stat_aggregator.c",
|
||||
"encoder/svc/isvce_process.c",
|
||||
"encoder/svc/isvce_rate_control.c",
|
||||
"encoder/svc/isvce_rc_mem_interface.c",
|
||||
"encoder/svc/isvce_rc_utils.c",
|
||||
"encoder/svc/isvce_residual_pred.c",
|
||||
"encoder/svc/isvce_sub_pic_rc.c",
|
||||
"encoder/svc/isvce_utils.c",
|
||||
],
|
||||
|
||||
arch: {
|
||||
arm: {
|
||||
local_include_dirs: [
|
||||
"common/arm/svc",
|
||||
"encoder/arm/svc",
|
||||
],
|
||||
|
||||
srcs: [
|
||||
"encoder/arm/svc/isvce_function_selector.c",
|
||||
],
|
||||
|
||||
neon: {
|
||||
srcs: [
|
||||
"encoder/arm/svc/isvce_function_selector_a9q.c",
|
||||
"common/arm/svc/isvc_intra_sampling_neon.c",
|
||||
"common/arm/svc/isvc_iquant_itrans_recon_neon.c",
|
||||
"common/arm/svc/isvc_mem_fns_neon.c",
|
||||
"common/arm/svc/isvc_resi_trans_quant_neon.c",
|
||||
"encoder/arm/svc/isvce_downscaler_neon.c",
|
||||
"encoder/arm/svc/isvce_rc_utils_neon.c",
|
||||
"encoder/arm/svc/isvce_residual_pred_neon.c",
|
||||
],
|
||||
},
|
||||
},
|
||||
|
||||
arm64: {
|
||||
local_include_dirs: [
|
||||
"common/arm/svc",
|
||||
"encoder/arm/svc",
|
||||
],
|
||||
|
||||
srcs: [
|
||||
"encoder/arm/svc/isvce_function_selector.c",
|
||||
"encoder/arm/svc/isvce_function_selector_av8.c",
|
||||
"common/arm/svc/isvc_intra_sampling_neon.c",
|
||||
"common/arm/svc/isvc_iquant_itrans_recon_neon.c",
|
||||
"common/arm/svc/isvc_mem_fns_neon.c",
|
||||
"common/arm/svc/isvc_resi_trans_quant_neon.c",
|
||||
"encoder/arm/svc/isvce_downscaler_neon.c",
|
||||
"encoder/arm/svc/isvce_rc_utils_neon.c",
|
||||
"encoder/arm/svc/isvce_residual_pred_neon.c",
|
||||
],
|
||||
},
|
||||
|
||||
riscv64: {
|
||||
local_include_dirs: [
|
||||
"encoder/riscv/svc",
|
||||
],
|
||||
|
||||
srcs: [
|
||||
"encoder/riscv/svc/isvce_function_selector.c",
|
||||
],
|
||||
},
|
||||
|
||||
x86: {
|
||||
local_include_dirs: [
|
||||
"encoder/x86/svc",
|
||||
"common/x86/svc",
|
||||
],
|
||||
|
||||
srcs: [
|
||||
"common/x86/svc/isvc_intra_resample_sse42.c",
|
||||
"common/x86/svc/isvc_iquant_itrans_recon_dc_ssse3.c",
|
||||
"common/x86/svc/isvc_iquant_itrans_recon_sse42.c",
|
||||
"common/x86/svc/isvc_iquant_itrans_recon_ssse3.c",
|
||||
"common/x86/svc/isvc_mem_fns_sse42.c",
|
||||
"common/x86/svc/isvc_mem_fns_ssse3.c",
|
||||
"common/x86/svc/isvc_padding_ssse3.c",
|
||||
"common/x86/svc/isvc_resi_trans_quant_sse42.c",
|
||||
"encoder/x86/svc/isvce_downscaler_sse42.c",
|
||||
"encoder/x86/svc/isvce_function_selector.c",
|
||||
"encoder/x86/svc/isvce_function_selector_sse42.c",
|
||||
"encoder/x86/svc/isvce_function_selector_ssse3.c",
|
||||
"encoder/x86/svc/isvce_rc_utils_sse42.c",
|
||||
"encoder/x86/svc/isvce_residual_pred_sse42.c",
|
||||
],
|
||||
},
|
||||
|
||||
x86_64: {
|
||||
local_include_dirs: [
|
||||
"encoder/x86/svc",
|
||||
"common/x86/svc",
|
||||
],
|
||||
|
||||
srcs: [
|
||||
"common/x86/svc/isvc_intra_resample_sse42.c",
|
||||
"common/x86/svc/isvc_iquant_itrans_recon_dc_ssse3.c",
|
||||
"common/x86/svc/isvc_iquant_itrans_recon_sse42.c",
|
||||
"common/x86/svc/isvc_iquant_itrans_recon_ssse3.c",
|
||||
"common/x86/svc/isvc_mem_fns_sse42.c",
|
||||
"common/x86/svc/isvc_mem_fns_ssse3.c",
|
||||
"common/x86/svc/isvc_padding_ssse3.c",
|
||||
"common/x86/svc/isvc_resi_trans_quant_sse42.c",
|
||||
"encoder/x86/svc/isvce_downscaler_sse42.c",
|
||||
"encoder/x86/svc/isvce_function_selector.c",
|
||||
"encoder/x86/svc/isvce_function_selector_sse42.c",
|
||||
"encoder/x86/svc/isvce_function_selector_ssse3.c",
|
||||
"encoder/x86/svc/isvce_rc_utils_sse42.c",
|
||||
"encoder/x86/svc/isvce_residual_pred_sse42.c",
|
||||
],
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
subdirs = ["test"]
|
||||
|
|
|
|||
|
|
@ -4,6 +4,8 @@ enable_language(ASM)
|
|||
|
||||
set(AVC_ROOT "${CMAKE_CURRENT_SOURCE_DIR}")
|
||||
set(AVC_CONFIG_DIR "${CMAKE_CURRENT_BINARY_DIR}")
|
||||
option(ENABLE_MVC "Enables svcenc and svcdec builds" OFF)
|
||||
option(ENABLE_SVC "Enables svcenc and svcdec builds" OFF)
|
||||
|
||||
if("${AVC_ROOT}" STREQUAL "${AVC_CONFIG_DIR}")
|
||||
message(
|
||||
|
|
@ -36,13 +38,29 @@ libavc_set_link_libraries()
|
|||
|
||||
include("${AVC_ROOT}/common/common.cmake")
|
||||
include("${AVC_ROOT}/decoder/libavcdec.cmake")
|
||||
include("${AVC_ROOT}/decoder/mvc/libmvcdec.cmake")
|
||||
if (${ENABLE_MVC})
|
||||
include("${AVC_ROOT}/decoder/mvc/libmvcdec.cmake")
|
||||
endif()
|
||||
include("${AVC_ROOT}/encoder/libavcenc.cmake")
|
||||
if (${ENABLE_SVC})
|
||||
include("${AVC_ROOT}/common/svccommon.cmake")
|
||||
include("${AVC_ROOT}/encoder/svc/libsvcenc.cmake")
|
||||
endif()
|
||||
|
||||
include("${AVC_ROOT}/test/decoder/avcdec.cmake")
|
||||
include("${AVC_ROOT}/test/mvcdec/mvcdec.cmake")
|
||||
if (${ENABLE_MVC})
|
||||
include("${AVC_ROOT}/test/mvcdec/mvcdec.cmake")
|
||||
endif()
|
||||
include("${AVC_ROOT}/test/encoder/avcenc.cmake")
|
||||
if (${ENABLE_SVC})
|
||||
include("${AVC_ROOT}/test/svcenc/svcenc.cmake")
|
||||
endif()
|
||||
|
||||
include("${AVC_ROOT}/fuzzer/avc_dec_fuzzer.cmake")
|
||||
include("${AVC_ROOT}/fuzzer/mvc_dec_fuzzer.cmake")
|
||||
if (${ENABLE_MVC})
|
||||
include("${AVC_ROOT}/fuzzer/mvc_dec_fuzzer.cmake")
|
||||
endif()
|
||||
include("${AVC_ROOT}/fuzzer/avc_enc_fuzzer.cmake")
|
||||
if (${ENABLE_SVC})
|
||||
include("${AVC_ROOT}/fuzzer/svc_enc_fuzzer.cmake")
|
||||
endif()
|
||||
|
|
|
|||
485
common/arm/svc/isvc_intra_sampling_neon.c
Normal file
485
common/arm/svc/isvc_intra_sampling_neon.c
Normal file
|
|
@ -0,0 +1,485 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
/**
|
||||
* *******************************************************************************
|
||||
* * @file
|
||||
* isvc_intra_sampling_neon.c
|
||||
*
|
||||
* @brief
|
||||
* neon variants of intra sampling functions used by IBL mode
|
||||
*
|
||||
* *******************************************************************************
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "ih264_typedefs.h"
|
||||
#include "isvc_intra_resample.h"
|
||||
|
||||
void isvc_interpolate_base_luma_dyadic_neon(UWORD8 *pu1_inp_buf, WORD16 *pi2_tmp_filt_buf,
|
||||
UWORD8 *pu1_out_buf, WORD32 i4_out_stride)
|
||||
{
|
||||
WORD32 i4_y;
|
||||
WORD16 i4_coeff_0, i4_coeff_1, i4_coeff_2, i4_coeff_3;
|
||||
WORD32 i4_filt_stride, i4_src_stride;
|
||||
UWORD8 *pu1_inp = pu1_inp_buf;
|
||||
UWORD8 *pu1_out = pu1_out_buf;
|
||||
WORD16 *pi2_tmp = pi2_tmp_filt_buf;
|
||||
|
||||
int16x4_t i4_rslt_vert_16x4_1, i4_rslt_vert_16x4_2;
|
||||
uint8x8_t i4_samp_vert_8x8_0, i4_samp_vert_8x8_1, i4_samp_vert_8x8_2, i4_samp_vert_8x8_3;
|
||||
int16x8_t i4_rslt_vert_16x8_0, i4_rslt_vert_16x8_2;
|
||||
|
||||
/* Horizontal interpolation */
|
||||
int32x4_t i4_rslt_horz_r0_1, i4_rslt_horz_r1_1, i4_rslt_horz_r0_2, i4_rslt_horz_r1_2;
|
||||
uint16x4_t i4_rslt_horz_r0_1_tmp, i4_rslt_horz_r1_1_tmp, i4_rslt_horz_r0_2_tmp,
|
||||
i4_rslt_horz_r1_2_tmp;
|
||||
uint16x8_t rslt_16x8_t_1, rslt_16x8_t_2;
|
||||
|
||||
int16x4_t i4_samp_horz_16x4_0, i4_samp_horz_16x4_1, i4_samp_horz_16x4_2, i4_samp_horz_16x4_3,
|
||||
i4_samp_horz_16x4_4;
|
||||
int16x4_t i4_samp_horz_16x4_5, i4_samp_horz_16x4_6, i4_samp_horz_16x4_7, i4_samp_horz_16x4_8;
|
||||
int16_t i4_coeff_c0 = -3;
|
||||
int16_t i4_coeff_c1 = 28;
|
||||
int16_t i4_coeff_c2 = 8;
|
||||
int16_t i4_coeff_c3 = -1;
|
||||
int32x4x2_t i4_rslt_horz_r0_tmp32, i4_rslt_horz_r1_tmp32;
|
||||
int32x4_t const_512_32x4 = vdupq_n_s32(512);
|
||||
|
||||
/* Filter coefficient values for phase 4 */
|
||||
i4_coeff_0 = -3;
|
||||
i4_coeff_1 = 28;
|
||||
i4_coeff_2 = 8;
|
||||
i4_coeff_3 = -1;
|
||||
|
||||
i4_filt_stride = 12;
|
||||
i4_src_stride = DYADIC_REF_W_Y;
|
||||
|
||||
/* Vertical interpolation */
|
||||
{
|
||||
/* First 64 bits*/
|
||||
i4_samp_vert_8x8_0 = vld1_u8((const UWORD8 *) pu1_inp);
|
||||
pu1_inp += i4_src_stride;
|
||||
i4_samp_vert_8x8_1 = vld1_u8((const UWORD8 *) pu1_inp);
|
||||
pu1_inp += i4_src_stride;
|
||||
i4_samp_vert_8x8_2 = vld1_u8((const UWORD8 *) pu1_inp);
|
||||
pu1_inp += i4_src_stride;
|
||||
i4_samp_vert_8x8_3 = vld1_u8((const UWORD8 *) pu1_inp);
|
||||
pu1_inp += i4_src_stride;
|
||||
|
||||
i4_rslt_vert_16x8_0 =
|
||||
vmulq_n_s16(vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_0)), i4_coeff_3);
|
||||
i4_rslt_vert_16x8_0 = vmlaq_n_s16(
|
||||
i4_rslt_vert_16x8_0, vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_1)), i4_coeff_2);
|
||||
i4_rslt_vert_16x8_0 = vmlaq_n_s16(
|
||||
i4_rslt_vert_16x8_0, vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_2)), i4_coeff_1);
|
||||
i4_rslt_vert_16x8_0 = vmlaq_n_s16(
|
||||
i4_rslt_vert_16x8_0, vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_3)), i4_coeff_0);
|
||||
|
||||
vst1q_s16(pi2_tmp, i4_rslt_vert_16x8_0);
|
||||
pi2_tmp += i4_filt_stride;
|
||||
|
||||
for(i4_y = 1; i4_y < 15; i4_y += 2)
|
||||
{
|
||||
i4_samp_vert_8x8_0 = i4_samp_vert_8x8_1;
|
||||
i4_samp_vert_8x8_1 = i4_samp_vert_8x8_2;
|
||||
i4_samp_vert_8x8_2 = i4_samp_vert_8x8_3;
|
||||
i4_samp_vert_8x8_3 = vld1_u8((const UWORD8 *) pu1_inp);
|
||||
|
||||
i4_rslt_vert_16x8_0 =
|
||||
vmulq_n_s16(vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_0)), i4_coeff_0);
|
||||
i4_rslt_vert_16x8_0 =
|
||||
vmlaq_n_s16(i4_rslt_vert_16x8_0,
|
||||
vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_1)), i4_coeff_1);
|
||||
i4_rslt_vert_16x8_0 =
|
||||
vmlaq_n_s16(i4_rslt_vert_16x8_0,
|
||||
vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_2)), i4_coeff_2);
|
||||
i4_rslt_vert_16x8_0 =
|
||||
vmlaq_n_s16(i4_rslt_vert_16x8_0,
|
||||
vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_3)), i4_coeff_3);
|
||||
|
||||
i4_rslt_vert_16x8_2 =
|
||||
vmulq_n_s16(vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_0)), i4_coeff_3);
|
||||
i4_rslt_vert_16x8_2 =
|
||||
vmlaq_n_s16(i4_rslt_vert_16x8_2,
|
||||
vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_1)), i4_coeff_2);
|
||||
i4_rslt_vert_16x8_2 =
|
||||
vmlaq_n_s16(i4_rslt_vert_16x8_2,
|
||||
vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_2)), i4_coeff_1);
|
||||
i4_rslt_vert_16x8_2 =
|
||||
vmlaq_n_s16(i4_rslt_vert_16x8_2,
|
||||
vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_3)), i4_coeff_0);
|
||||
|
||||
vst1q_s16(pi2_tmp, (i4_rslt_vert_16x8_0));
|
||||
pi2_tmp += i4_filt_stride;
|
||||
vst1q_s16(pi2_tmp, (i4_rslt_vert_16x8_2));
|
||||
pi2_tmp += i4_filt_stride;
|
||||
pu1_inp += i4_src_stride;
|
||||
}
|
||||
|
||||
/* y = 15, y_phase = 4 */
|
||||
i4_samp_vert_8x8_0 = i4_samp_vert_8x8_1;
|
||||
i4_samp_vert_8x8_1 = i4_samp_vert_8x8_2;
|
||||
i4_samp_vert_8x8_2 = i4_samp_vert_8x8_3;
|
||||
i4_samp_vert_8x8_3 = vld1_u8((const UWORD8 *) pu1_inp);
|
||||
|
||||
i4_rslt_vert_16x8_0 =
|
||||
vmulq_n_s16(vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_0)), i4_coeff_0);
|
||||
i4_rslt_vert_16x8_0 = vmlaq_n_s16(
|
||||
i4_rslt_vert_16x8_0, vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_1)), i4_coeff_1);
|
||||
i4_rslt_vert_16x8_0 = vmlaq_n_s16(
|
||||
i4_rslt_vert_16x8_0, vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_2)), i4_coeff_2);
|
||||
i4_rslt_vert_16x8_0 = vmlaq_n_s16(
|
||||
i4_rslt_vert_16x8_0, vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_3)), i4_coeff_3);
|
||||
|
||||
vst1q_s16(pi2_tmp, (i4_rslt_vert_16x8_0));
|
||||
}
|
||||
|
||||
{
|
||||
/* Remaining 32 bits */
|
||||
pu1_inp = pu1_inp_buf + 8;
|
||||
pi2_tmp = pi2_tmp_filt_buf + 8;
|
||||
|
||||
i4_samp_vert_8x8_0 = vld1_u8((const UWORD8 *) pu1_inp);
|
||||
pu1_inp += i4_src_stride;
|
||||
i4_samp_vert_8x8_1 = vld1_u8((const UWORD8 *) pu1_inp);
|
||||
pu1_inp += i4_src_stride;
|
||||
i4_samp_vert_8x8_2 = vld1_u8((const UWORD8 *) pu1_inp);
|
||||
pu1_inp += i4_src_stride;
|
||||
i4_samp_vert_8x8_3 = vld1_u8((const UWORD8 *) pu1_inp);
|
||||
pu1_inp += i4_src_stride;
|
||||
|
||||
i4_rslt_vert_16x4_1 = vmul_n_s16(
|
||||
vreinterpret_s16_u16(vget_low_u16(vmovl_u8(i4_samp_vert_8x8_0))), i4_coeff_3);
|
||||
i4_rslt_vert_16x4_1 = vmla_n_s16(
|
||||
i4_rslt_vert_16x4_1, vreinterpret_s16_u16(vget_low_u16(vmovl_u8(i4_samp_vert_8x8_1))),
|
||||
i4_coeff_2);
|
||||
i4_rslt_vert_16x4_1 = vmla_n_s16(
|
||||
i4_rslt_vert_16x4_1, vreinterpret_s16_u16(vget_low_u16(vmovl_u8(i4_samp_vert_8x8_2))),
|
||||
i4_coeff_1);
|
||||
i4_rslt_vert_16x4_1 = vmla_n_s16(
|
||||
i4_rslt_vert_16x4_1, vreinterpret_s16_u16(vget_low_u16(vmovl_u8(i4_samp_vert_8x8_3))),
|
||||
i4_coeff_0);
|
||||
|
||||
vst1_s16(pi2_tmp, (i4_rslt_vert_16x4_1));
|
||||
pi2_tmp += i4_filt_stride;
|
||||
|
||||
for(i4_y = 1; i4_y < 15; i4_y += 2)
|
||||
{
|
||||
i4_samp_vert_8x8_0 = i4_samp_vert_8x8_1;
|
||||
i4_samp_vert_8x8_1 = i4_samp_vert_8x8_2;
|
||||
i4_samp_vert_8x8_2 = i4_samp_vert_8x8_3;
|
||||
i4_samp_vert_8x8_3 = vld1_u8((const UWORD8 *) pu1_inp);
|
||||
|
||||
i4_rslt_vert_16x4_1 = vmul_n_s16(
|
||||
vreinterpret_s16_u16(vget_low_u16(vmovl_u8(i4_samp_vert_8x8_0))), i4_coeff_0);
|
||||
i4_rslt_vert_16x4_1 = vmla_n_s16(
|
||||
i4_rslt_vert_16x4_1,
|
||||
vreinterpret_s16_u16(vget_low_u16(vmovl_u8(i4_samp_vert_8x8_1))), i4_coeff_1);
|
||||
i4_rslt_vert_16x4_1 = vmla_n_s16(
|
||||
i4_rslt_vert_16x4_1,
|
||||
vreinterpret_s16_u16(vget_low_u16(vmovl_u8(i4_samp_vert_8x8_2))), i4_coeff_2);
|
||||
i4_rslt_vert_16x4_1 = vmla_n_s16(
|
||||
i4_rslt_vert_16x4_1,
|
||||
vreinterpret_s16_u16(vget_low_u16(vmovl_u8(i4_samp_vert_8x8_3))), i4_coeff_3);
|
||||
|
||||
i4_rslt_vert_16x4_2 = vmul_n_s16(
|
||||
vreinterpret_s16_u16(vget_low_u16(vmovl_u8(i4_samp_vert_8x8_0))), i4_coeff_3);
|
||||
i4_rslt_vert_16x4_2 = vmla_n_s16(
|
||||
i4_rslt_vert_16x4_2,
|
||||
vreinterpret_s16_u16(vget_low_u16(vmovl_u8(i4_samp_vert_8x8_1))), i4_coeff_2);
|
||||
i4_rslt_vert_16x4_2 = vmla_n_s16(
|
||||
i4_rslt_vert_16x4_2,
|
||||
vreinterpret_s16_u16(vget_low_u16(vmovl_u8(i4_samp_vert_8x8_2))), i4_coeff_1);
|
||||
i4_rslt_vert_16x4_2 = vmla_n_s16(
|
||||
i4_rslt_vert_16x4_2,
|
||||
vreinterpret_s16_u16(vget_low_u16(vmovl_u8(i4_samp_vert_8x8_3))), i4_coeff_0);
|
||||
|
||||
vst1_s16(pi2_tmp, (i4_rslt_vert_16x4_1));
|
||||
pi2_tmp += i4_filt_stride;
|
||||
vst1_s16(pi2_tmp, (i4_rslt_vert_16x4_2));
|
||||
pi2_tmp += i4_filt_stride;
|
||||
pu1_inp += i4_src_stride;
|
||||
}
|
||||
|
||||
i4_samp_vert_8x8_0 = i4_samp_vert_8x8_1;
|
||||
i4_samp_vert_8x8_1 = i4_samp_vert_8x8_2;
|
||||
i4_samp_vert_8x8_2 = i4_samp_vert_8x8_3;
|
||||
i4_samp_vert_8x8_3 = vld1_u8((const UWORD8 *) pu1_inp);
|
||||
|
||||
i4_rslt_vert_16x4_1 = vmul_n_s16(
|
||||
vreinterpret_s16_u16(vget_low_u16(vmovl_u8(i4_samp_vert_8x8_0))), i4_coeff_0);
|
||||
i4_rslt_vert_16x4_1 = vmla_n_s16(
|
||||
i4_rslt_vert_16x4_1, vreinterpret_s16_u16(vget_low_u16(vmovl_u8(i4_samp_vert_8x8_1))),
|
||||
i4_coeff_1);
|
||||
i4_rslt_vert_16x4_1 = vmla_n_s16(
|
||||
i4_rslt_vert_16x4_1, vreinterpret_s16_u16(vget_low_u16(vmovl_u8(i4_samp_vert_8x8_2))),
|
||||
i4_coeff_2);
|
||||
i4_rslt_vert_16x4_1 = vmla_n_s16(
|
||||
i4_rslt_vert_16x4_1, vreinterpret_s16_u16(vget_low_u16(vmovl_u8(i4_samp_vert_8x8_3))),
|
||||
i4_coeff_3);
|
||||
|
||||
vst1_s16(pi2_tmp, (i4_rslt_vert_16x4_1));
|
||||
/* Reinitializing the ptrs */
|
||||
pu1_inp = pu1_inp_buf;
|
||||
pi2_tmp = pi2_tmp_filt_buf;
|
||||
}
|
||||
|
||||
/* Horizontal interpolation */
|
||||
for(i4_y = 0; i4_y < 16; i4_y++)
|
||||
{
|
||||
i4_samp_horz_16x4_0 = vld1_s16(pi2_tmp);
|
||||
i4_samp_horz_16x4_1 = vld1_s16(pi2_tmp + 1);
|
||||
i4_samp_horz_16x4_2 = vld1_s16(pi2_tmp + 2);
|
||||
i4_samp_horz_16x4_3 = vld1_s16(pi2_tmp + 3);
|
||||
i4_samp_horz_16x4_4 = vld1_s16(pi2_tmp + 4);
|
||||
i4_samp_horz_16x4_5 = vld1_s16(pi2_tmp + 5);
|
||||
i4_samp_horz_16x4_6 = vld1_s16(pi2_tmp + 6);
|
||||
i4_samp_horz_16x4_7 = vld1_s16(pi2_tmp + 7);
|
||||
i4_samp_horz_16x4_8 = vld1_s16(pi2_tmp + 8);
|
||||
|
||||
i4_rslt_horz_r0_1 =
|
||||
vmull_n_s16(i4_samp_horz_16x4_0, i4_coeff_c3); /* a0c3 a1c3 a2c3 a3c3 */
|
||||
i4_rslt_horz_r0_1 =
|
||||
vmlal_n_s16(i4_rslt_horz_r0_1, i4_samp_horz_16x4_1,
|
||||
i4_coeff_c2); /* a0c0+a1c1 a1c0+a2c1 a2c0+a3c1 a3c0+a4c1 */
|
||||
i4_rslt_horz_r0_1 = vmlal_n_s16(i4_rslt_horz_r0_1, i4_samp_horz_16x4_2, i4_coeff_c1);
|
||||
i4_rslt_horz_r0_1 = vmlal_n_s16(i4_rslt_horz_r0_1, i4_samp_horz_16x4_3, i4_coeff_c0);
|
||||
/* i4_rslt_horz_r0_1 : contains res at even pos:0,2,4,6 */
|
||||
|
||||
i4_rslt_horz_r1_1 =
|
||||
vmull_n_s16(i4_samp_horz_16x4_1, i4_coeff_c0); /* a0c0 a1c0 a2c0 a3c0 */
|
||||
i4_rslt_horz_r1_1 =
|
||||
vmlal_n_s16(i4_rslt_horz_r1_1, i4_samp_horz_16x4_2,
|
||||
i4_coeff_c1); /* a0c0+a1c1 a1c0+a2c1 a2c0+a3c1 a3c0+a4c1 */
|
||||
i4_rslt_horz_r1_1 = vmlal_n_s16(i4_rslt_horz_r1_1, i4_samp_horz_16x4_3, i4_coeff_c2);
|
||||
i4_rslt_horz_r1_1 = vmlal_n_s16(i4_rslt_horz_r1_1, i4_samp_horz_16x4_4, i4_coeff_c3);
|
||||
/* i4_rslt_horz_r1_1 : contains res at odd pos:1,3,5,7 */
|
||||
|
||||
i4_rslt_horz_r0_2 =
|
||||
vmull_n_s16(i4_samp_horz_16x4_4, i4_coeff_c3); /* a0c3 a1c3 a2c3 a3c3 */
|
||||
i4_rslt_horz_r0_2 =
|
||||
vmlal_n_s16(i4_rslt_horz_r0_2, i4_samp_horz_16x4_5,
|
||||
i4_coeff_c2); /* a0c0+a1c1 a1c0+a2c1 a2c0+a3c1 a3c0+a4c1 */
|
||||
i4_rslt_horz_r0_2 = vmlal_n_s16(i4_rslt_horz_r0_2, i4_samp_horz_16x4_6, i4_coeff_c1);
|
||||
i4_rslt_horz_r0_2 = vmlal_n_s16(i4_rslt_horz_r0_2, i4_samp_horz_16x4_7, i4_coeff_c0);
|
||||
/* i4_rslt_horz_r0_1 : contains res at even pos:8,10,12,14 */
|
||||
|
||||
i4_rslt_horz_r1_2 =
|
||||
vmull_n_s16(i4_samp_horz_16x4_5, i4_coeff_c0); /* a0c0 a1c0 a2c0 a3c0 */
|
||||
i4_rslt_horz_r1_2 =
|
||||
vmlal_n_s16(i4_rslt_horz_r1_2, i4_samp_horz_16x4_6,
|
||||
i4_coeff_c1); /* a0c0+a1c1 a1c0+a2c1 a2c0+a3c1 a3c0+a4c1 */
|
||||
i4_rslt_horz_r1_2 = vmlal_n_s16(i4_rslt_horz_r1_2, i4_samp_horz_16x4_7, i4_coeff_c2);
|
||||
i4_rslt_horz_r1_2 = vmlal_n_s16(i4_rslt_horz_r1_2, i4_samp_horz_16x4_8, i4_coeff_c3);
|
||||
/* i4_rslt_horz_r1_1 : contains res at odd pos:1,3,5,7 */
|
||||
|
||||
i4_rslt_horz_r0_tmp32 = vzipq_s32(i4_rslt_horz_r0_1, i4_rslt_horz_r1_1);
|
||||
i4_rslt_horz_r1_tmp32 = vzipq_s32(i4_rslt_horz_r0_2, i4_rslt_horz_r1_2);
|
||||
|
||||
i4_rslt_horz_r0_1 = vaddq_s32(i4_rslt_horz_r0_tmp32.val[0], const_512_32x4);
|
||||
i4_rslt_horz_r1_1 = vaddq_s32(i4_rslt_horz_r0_tmp32.val[1], const_512_32x4);
|
||||
i4_rslt_horz_r0_2 = vaddq_s32(i4_rslt_horz_r1_tmp32.val[0], const_512_32x4);
|
||||
i4_rslt_horz_r1_2 = vaddq_s32(i4_rslt_horz_r1_tmp32.val[1], const_512_32x4);
|
||||
|
||||
i4_rslt_horz_r0_1_tmp = vqshrun_n_s32(i4_rslt_horz_r0_1, 10);
|
||||
i4_rslt_horz_r1_1_tmp = vqshrun_n_s32(i4_rslt_horz_r1_1, 10);
|
||||
|
||||
i4_rslt_horz_r0_2_tmp = vqshrun_n_s32(i4_rslt_horz_r0_2, 10);
|
||||
i4_rslt_horz_r1_2_tmp = vqshrun_n_s32(i4_rslt_horz_r1_2, 10);
|
||||
|
||||
rslt_16x8_t_1 = vcombine_u16(i4_rslt_horz_r0_1_tmp, i4_rslt_horz_r1_1_tmp);
|
||||
rslt_16x8_t_2 = vcombine_u16(i4_rslt_horz_r0_2_tmp, i4_rslt_horz_r1_2_tmp);
|
||||
|
||||
vst1_u8(pu1_out, vqmovn_u16(rslt_16x8_t_1));
|
||||
vst1_u8(pu1_out + 8, vqmovn_u16(rslt_16x8_t_2));
|
||||
|
||||
pu1_out += i4_out_stride;
|
||||
pi2_tmp += i4_filt_stride;
|
||||
}
|
||||
}
|
||||
|
||||
void isvc_horz_interpol_chroma_dyadic_neon(WORD16 *pi2_tmp_filt_buf, UWORD8 *pu1_out_buf,
|
||||
WORD32 i4_out_stride, WORD32 i4_phase_0,
|
||||
WORD32 i4_phase_1)
|
||||
{
|
||||
WORD32 i4_y;
|
||||
WORD32 i4_coeff_0, i4_coeff_1, i4_coeff_2, i4_coeff_3;
|
||||
UWORD8 *pu1_out = pu1_out_buf;
|
||||
WORD16 *pi2_tmp = pi2_tmp_filt_buf;
|
||||
WORD32 i4_filt_stride = 6;
|
||||
WORD32 i4_dst_stride = i4_out_stride;
|
||||
|
||||
int16x8_t i4_samp_horz_16x8_r0_0, i4_samp_horz_16x8_r0_1, i4_samp_horz_16x8_r0_2;
|
||||
int16x8_t i4_samp_horz_16x8_r1_0, i4_samp_horz_16x8_r1_1, i4_samp_horz_16x8_r1_2;
|
||||
int16x8_t i4_rslt_horz_r0_1, i4_rslt_horz_r0_2;
|
||||
int16x8_t i4_rslt_horz_r1_1, i4_rslt_horz_r1_2;
|
||||
|
||||
int16x8x2_t temp_horz_16x8_r0;
|
||||
int16x8x2_t temp_horz_16x8_r1;
|
||||
int16x8_t final_horz_16x8_r0_1;
|
||||
int16x8_t final_horz_16x8_r1_1;
|
||||
|
||||
uint8x16_t i4_out_horz_8x16_r0, i4_out_horz_8x16_r1;
|
||||
uint8x16_t chroma_mask_8x16 = vreinterpretq_u8_u16(vdupq_n_u16(0x00ff));
|
||||
|
||||
i4_coeff_0 = 16 - i4_phase_0;
|
||||
i4_coeff_1 = i4_phase_0;
|
||||
i4_coeff_2 = 16 - i4_phase_1;
|
||||
i4_coeff_3 = i4_phase_1;
|
||||
|
||||
/* Horizontal interpolation */
|
||||
for(i4_y = 0; i4_y < 8; i4_y += 2)
|
||||
{
|
||||
i4_samp_horz_16x8_r0_0 = vld1q_s16(pi2_tmp); /* a0 a1 a2 a3 a4 a5 a6 a7 */
|
||||
i4_samp_horz_16x8_r0_1 = vld1q_s16(pi2_tmp + 1); /* a1 a2 a3 a4 */
|
||||
i4_samp_horz_16x8_r0_2 = vld1q_s16(pi2_tmp + 2); /* a2 a3 a4 a5 */
|
||||
|
||||
i4_samp_horz_16x8_r1_0 = vld1q_s16(pi2_tmp + i4_filt_stride);
|
||||
i4_samp_horz_16x8_r1_1 = vld1q_s16(pi2_tmp + i4_filt_stride + 1);
|
||||
i4_samp_horz_16x8_r1_2 = vld1q_s16(pi2_tmp + (i4_filt_stride + 2));
|
||||
|
||||
i4_rslt_horz_r0_1 =
|
||||
vmulq_n_s16(i4_samp_horz_16x8_r0_0, i4_coeff_0); /* a0c0 a1c0 a2c0 a3c0 */
|
||||
i4_rslt_horz_r0_2 =
|
||||
vmulq_n_s16(i4_samp_horz_16x8_r0_1, i4_coeff_2); /* a1c2 a2c2 a3c2 a4c2 */
|
||||
|
||||
i4_rslt_horz_r0_1 = vmlaq_n_s16(i4_rslt_horz_r0_1, i4_samp_horz_16x8_r0_1,
|
||||
i4_coeff_1); /* a0c0+a1c1 a1c0+a2c1 a2c0+a3c1 a3c0+a4c1 */
|
||||
i4_rslt_horz_r0_2 = vmlaq_n_s16(i4_rslt_horz_r0_2, i4_samp_horz_16x8_r0_2,
|
||||
i4_coeff_3); /* a1c2+a2c3 a2c2+a3c3 a3c2+a4c3 a4c2+a5c3 */
|
||||
|
||||
i4_rslt_horz_r1_1 = vmulq_n_s16(i4_samp_horz_16x8_r1_0, i4_coeff_0);
|
||||
i4_rslt_horz_r1_2 = vmulq_n_s16(i4_samp_horz_16x8_r1_1, i4_coeff_2);
|
||||
|
||||
i4_rslt_horz_r1_1 = vmlaq_n_s16(i4_rslt_horz_r1_1, i4_samp_horz_16x8_r1_1, i4_coeff_1);
|
||||
i4_rslt_horz_r1_2 = vmlaq_n_s16(i4_rslt_horz_r1_2, i4_samp_horz_16x8_r1_2, i4_coeff_3);
|
||||
|
||||
temp_horz_16x8_r0 = vzipq_s16(i4_rslt_horz_r0_1, i4_rslt_horz_r0_2);
|
||||
temp_horz_16x8_r1 = vzipq_s16(i4_rslt_horz_r1_1, i4_rslt_horz_r1_2);
|
||||
|
||||
final_horz_16x8_r0_1 = temp_horz_16x8_r0.val[0];
|
||||
final_horz_16x8_r1_1 = temp_horz_16x8_r1.val[0];
|
||||
|
||||
final_horz_16x8_r0_1 = vrshrq_n_s16(final_horz_16x8_r0_1, 8);
|
||||
final_horz_16x8_r1_1 = vrshrq_n_s16(final_horz_16x8_r1_1, 8);
|
||||
|
||||
i4_out_horz_8x16_r0 = vld1q_u8(pu1_out);
|
||||
i4_out_horz_8x16_r1 = vld1q_u8(pu1_out + i4_dst_stride);
|
||||
|
||||
i4_out_horz_8x16_r0 = vbslq_u8(chroma_mask_8x16, vreinterpretq_u8_s16(final_horz_16x8_r0_1),
|
||||
i4_out_horz_8x16_r0);
|
||||
i4_out_horz_8x16_r1 = vbslq_u8(chroma_mask_8x16, vreinterpretq_u8_s16(final_horz_16x8_r1_1),
|
||||
i4_out_horz_8x16_r1);
|
||||
|
||||
vst1q_u8(pu1_out, i4_out_horz_8x16_r0);
|
||||
vst1q_u8(pu1_out + i4_dst_stride, i4_out_horz_8x16_r1);
|
||||
|
||||
/* Incrementing ptr */
|
||||
pi2_tmp += (i4_filt_stride << 1);
|
||||
pu1_out += (i4_dst_stride << 1);
|
||||
}
|
||||
}
|
||||
|
||||
void isvc_vert_interpol_chroma_dyadic_neon(UWORD8 *pu1_inp_buf, WORD16 *pi2_tmp_filt_buf,
|
||||
WORD32 i4_phase_0, WORD32 i4_phase_1)
|
||||
{
|
||||
WORD32 i4_coeff_0, i4_coeff_1, i4_coeff_2, i4_coeff_3;
|
||||
WORD32 i4_src_stride = DYADIC_REF_W_C;
|
||||
UWORD8 *pu1_inp = pu1_inp_buf;
|
||||
WORD16 *pi2_tmp = pi2_tmp_filt_buf;
|
||||
|
||||
uint8x8_t i4_samp_vert_8x8_r0, i4_samp_vert_8x8_r1, i4_samp_vert_8x8_r2, i4_samp_vert_8x8_r3,
|
||||
i4_samp_vert_8x8_r4, i4_samp_vert_8x8_r5;
|
||||
|
||||
int16x8_t i4_rslt_vert_16x8_r0, i4_rslt_vert_16x8_r1, i4_rslt_vert_16x8_r2,
|
||||
i4_rslt_vert_16x8_r3, i4_rslt_vert_16x8_r4, i4_rslt_vert_16x8_r5, i4_rslt_vert_16x8_r6,
|
||||
i4_rslt_vert_16x8_r7;
|
||||
|
||||
i4_coeff_0 = 16 - i4_phase_0;
|
||||
i4_coeff_1 = i4_phase_0;
|
||||
i4_coeff_2 = 16 - i4_phase_1;
|
||||
i4_coeff_3 = i4_phase_1;
|
||||
|
||||
/* Vertical interpolation */
|
||||
i4_samp_vert_8x8_r0 = vld1_u8(pu1_inp);
|
||||
pu1_inp += i4_src_stride;
|
||||
i4_samp_vert_8x8_r1 = vld1_u8(pu1_inp);
|
||||
pu1_inp += i4_src_stride;
|
||||
i4_samp_vert_8x8_r2 = vld1_u8(pu1_inp);
|
||||
pu1_inp += i4_src_stride;
|
||||
i4_samp_vert_8x8_r3 = vld1_u8(pu1_inp);
|
||||
pu1_inp += i4_src_stride;
|
||||
i4_samp_vert_8x8_r4 = vld1_u8(pu1_inp);
|
||||
pu1_inp += i4_src_stride;
|
||||
i4_samp_vert_8x8_r5 = vld1_u8(pu1_inp);
|
||||
pu1_inp += i4_src_stride;
|
||||
|
||||
i4_rslt_vert_16x8_r0 =
|
||||
vmulq_n_s16(vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_r0)), i4_coeff_0);
|
||||
i4_rslt_vert_16x8_r0 = vmlaq_n_s16(
|
||||
i4_rslt_vert_16x8_r0, vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_r1)), i4_coeff_1);
|
||||
vst1q_s16(pi2_tmp, i4_rslt_vert_16x8_r0);
|
||||
|
||||
i4_rslt_vert_16x8_r1 =
|
||||
vmulq_n_s16(vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_r1)), i4_coeff_2);
|
||||
i4_rslt_vert_16x8_r1 = vmlaq_n_s16(
|
||||
i4_rslt_vert_16x8_r1, vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_r2)), i4_coeff_3);
|
||||
vst1q_s16(pi2_tmp + 6, i4_rslt_vert_16x8_r1);
|
||||
|
||||
i4_rslt_vert_16x8_r2 =
|
||||
vmulq_n_s16(vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_r1)), i4_coeff_0);
|
||||
i4_rslt_vert_16x8_r2 = vmlaq_n_s16(
|
||||
i4_rslt_vert_16x8_r2, vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_r2)), i4_coeff_1);
|
||||
vst1q_s16(pi2_tmp + 12, i4_rslt_vert_16x8_r2);
|
||||
|
||||
i4_rslt_vert_16x8_r3 =
|
||||
vmulq_n_s16(vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_r2)), i4_coeff_2);
|
||||
i4_rslt_vert_16x8_r3 = vmlaq_n_s16(
|
||||
i4_rslt_vert_16x8_r3, vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_r3)), i4_coeff_3);
|
||||
vst1q_s16(pi2_tmp + 18, i4_rslt_vert_16x8_r3);
|
||||
|
||||
i4_rslt_vert_16x8_r4 =
|
||||
vmulq_n_s16(vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_r2)), i4_coeff_0);
|
||||
i4_rslt_vert_16x8_r4 = vmlaq_n_s16(
|
||||
i4_rslt_vert_16x8_r4, vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_r3)), i4_coeff_1);
|
||||
vst1q_s16(pi2_tmp + 24, i4_rslt_vert_16x8_r4);
|
||||
|
||||
i4_rslt_vert_16x8_r5 =
|
||||
vmulq_n_s16(vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_r3)), i4_coeff_2);
|
||||
i4_rslt_vert_16x8_r5 = vmlaq_n_s16(
|
||||
i4_rslt_vert_16x8_r5, vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_r4)), i4_coeff_3);
|
||||
vst1q_s16(pi2_tmp + 30, i4_rslt_vert_16x8_r5);
|
||||
|
||||
i4_rslt_vert_16x8_r6 =
|
||||
vmulq_n_s16(vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_r3)), i4_coeff_0);
|
||||
i4_rslt_vert_16x8_r6 = vmlaq_n_s16(
|
||||
i4_rslt_vert_16x8_r6, vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_r4)), i4_coeff_1);
|
||||
vst1q_s16(pi2_tmp + 36, i4_rslt_vert_16x8_r6);
|
||||
|
||||
i4_rslt_vert_16x8_r7 =
|
||||
vmulq_n_s16(vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_r4)), i4_coeff_2);
|
||||
i4_rslt_vert_16x8_r7 = vmlaq_n_s16(
|
||||
i4_rslt_vert_16x8_r7, vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_r5)), i4_coeff_3);
|
||||
vst1_s16(pi2_tmp + 42, vget_low_s16(i4_rslt_vert_16x8_r7));
|
||||
vst1q_lane_s16(pi2_tmp + 46, i4_rslt_vert_16x8_r7, 4);
|
||||
vst1q_lane_s16(pi2_tmp + 47, i4_rslt_vert_16x8_r7, 5);
|
||||
}
|
||||
1783
common/arm/svc/isvc_iquant_itrans_recon_neon.c
Normal file
1783
common/arm/svc/isvc_iquant_itrans_recon_neon.c
Normal file
File diff suppressed because it is too large
Load diff
151
common/arm/svc/isvc_mem_fns_neon.c
Normal file
151
common/arm/svc/isvc_mem_fns_neon.c
Normal file
|
|
@ -0,0 +1,151 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
/**
|
||||
* *******************************************************************************
|
||||
* * @file
|
||||
* isvc_mem_fns_av8.c
|
||||
*
|
||||
* @brief
|
||||
* armv8 variants of
|
||||
* functions used for memory operations
|
||||
*
|
||||
* *******************************************************************************
|
||||
*/
|
||||
#include <arm_neon.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "ih264_typedefs.h"
|
||||
#include "isvc_mem_fns.h"
|
||||
|
||||
void isvc_memset_2d_neon(UWORD8 *pu1_dst, WORD32 i4_dst_stride, UWORD8 u1_val, WORD32 i4_blk_wd,
|
||||
WORD32 i4_blk_ht)
|
||||
{
|
||||
if(i4_blk_wd == 4)
|
||||
{
|
||||
vst1_lane_u32((UWORD32 *) pu1_dst, vreinterpret_u32_u8(vdup_n_u8(u1_val)), 0);
|
||||
pu1_dst += i4_dst_stride;
|
||||
|
||||
vst1_lane_u32((UWORD32 *) pu1_dst, vreinterpret_u32_u8(vdup_n_u8(u1_val)), 0);
|
||||
pu1_dst += i4_dst_stride;
|
||||
|
||||
vst1_lane_u32((UWORD32 *) pu1_dst, vreinterpret_u32_u8(vdup_n_u8(u1_val)), 0);
|
||||
pu1_dst += i4_dst_stride;
|
||||
|
||||
vst1_lane_u32((UWORD32 *) pu1_dst, vreinterpret_u32_u8(vdup_n_u8(u1_val)), 0);
|
||||
}
|
||||
else if(i4_blk_wd == 8)
|
||||
{
|
||||
vst1_u8(pu1_dst, vdup_n_u8(u1_val));
|
||||
pu1_dst += i4_dst_stride;
|
||||
|
||||
vst1_u8(pu1_dst, vdup_n_u8(u1_val));
|
||||
pu1_dst += i4_dst_stride;
|
||||
|
||||
vst1_u8(pu1_dst, vdup_n_u8(u1_val));
|
||||
pu1_dst += i4_dst_stride;
|
||||
|
||||
vst1_u8(pu1_dst, vdup_n_u8(u1_val));
|
||||
pu1_dst += i4_dst_stride;
|
||||
|
||||
vst1_u8(pu1_dst, vdup_n_u8(u1_val));
|
||||
pu1_dst += i4_dst_stride;
|
||||
|
||||
vst1_u8(pu1_dst, vdup_n_u8(u1_val));
|
||||
pu1_dst += i4_dst_stride;
|
||||
|
||||
vst1_u8(pu1_dst, vdup_n_u8(u1_val));
|
||||
pu1_dst += i4_dst_stride;
|
||||
|
||||
vst1_u8(pu1_dst, vdup_n_u8(u1_val));
|
||||
}
|
||||
else if((i4_blk_wd % 16 == 0) && (i4_blk_ht % 16 == 0))
|
||||
{
|
||||
WORD32 i, j;
|
||||
UWORD8 *pu1_dst_col_ptr, *pu1_dst_row_ptr;
|
||||
WORD32 i4_width_by_16 = i4_blk_wd / 16;
|
||||
WORD32 i4_height_by_16 = i4_blk_ht / 16;
|
||||
|
||||
for(i = 0; i < i4_height_by_16; i++)
|
||||
{
|
||||
pu1_dst_row_ptr = pu1_dst + i * 16 * i4_dst_stride;
|
||||
for(j = 0; j < i4_width_by_16; j++)
|
||||
{
|
||||
pu1_dst_col_ptr = pu1_dst_row_ptr + (j << 4);
|
||||
|
||||
vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val));
|
||||
pu1_dst_col_ptr += i4_dst_stride;
|
||||
|
||||
vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val));
|
||||
pu1_dst_col_ptr += i4_dst_stride;
|
||||
|
||||
vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val));
|
||||
pu1_dst_col_ptr += i4_dst_stride;
|
||||
|
||||
vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val));
|
||||
pu1_dst_col_ptr += i4_dst_stride;
|
||||
|
||||
vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val));
|
||||
pu1_dst_col_ptr += i4_dst_stride;
|
||||
|
||||
vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val));
|
||||
pu1_dst_col_ptr += i4_dst_stride;
|
||||
|
||||
vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val));
|
||||
pu1_dst_col_ptr += i4_dst_stride;
|
||||
|
||||
vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val));
|
||||
pu1_dst_col_ptr += i4_dst_stride;
|
||||
|
||||
vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val));
|
||||
pu1_dst_col_ptr += i4_dst_stride;
|
||||
|
||||
vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val));
|
||||
pu1_dst_col_ptr += i4_dst_stride;
|
||||
|
||||
vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val));
|
||||
pu1_dst_col_ptr += i4_dst_stride;
|
||||
|
||||
vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val));
|
||||
pu1_dst_col_ptr += i4_dst_stride;
|
||||
|
||||
vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val));
|
||||
pu1_dst_col_ptr += i4_dst_stride;
|
||||
|
||||
vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val));
|
||||
pu1_dst_col_ptr += i4_dst_stride;
|
||||
|
||||
vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val));
|
||||
pu1_dst_col_ptr += i4_dst_stride;
|
||||
|
||||
vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val));
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
WORD32 i;
|
||||
|
||||
for(i = 0; i < i4_blk_ht; i++)
|
||||
{
|
||||
memset(pu1_dst, u1_val, i4_blk_wd);
|
||||
pu1_dst += i4_dst_stride;
|
||||
}
|
||||
}
|
||||
}
|
||||
1085
common/arm/svc/isvc_resi_trans_quant_neon.c
Normal file
1085
common/arm/svc/isvc_resi_trans_quant_neon.c
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -141,11 +141,16 @@ typedef enum
|
|||
LAST_SIGNIFICANT_COEFF_FLAG_8X8_FRAME = 417,
|
||||
COEFF_ABS_LEVEL_MINUS1_8X8 = 426,
|
||||
SIGNIFICANT_COEFF_FLAG_8X8_FIELD = 436,
|
||||
LAST_SIGNIFICANT_COEFF_FLAG_8X8_FIELD = 451
|
||||
LAST_SIGNIFICANT_COEFF_FLAG_8X8_FIELD = 451,
|
||||
|
||||
/* SVC related CABAC offsets */
|
||||
BASE_MODE_FLAG = 460,
|
||||
MOTION_PREDICTION_FLAG_L0 = 463,
|
||||
MOTION_PREDICTION_FLAG_L1 = 464,
|
||||
RESIDUAL_PREDICTION_FLAG = 465,
|
||||
|
||||
} cabac_table_num_t;
|
||||
|
||||
|
||||
/**
|
||||
******************************************************************************
|
||||
* @enum ctxIdxOffset
|
||||
|
|
|
|||
|
|
@ -135,6 +135,9 @@ enum
|
|||
ISLICE = 2,
|
||||
SPSLICE = 3,
|
||||
SISLICE = 4,
|
||||
EPSLICE = 5,
|
||||
EBSLICE = 6,
|
||||
EISLICE = 7,
|
||||
MAXSLICE_TYPE,
|
||||
};
|
||||
|
||||
|
|
@ -144,27 +147,28 @@ enum
|
|||
* @brief Defines the set of possible nal unit types
|
||||
******************************************************************************
|
||||
*/
|
||||
enum
|
||||
typedef enum NAL_UNIT_TYPE_T
|
||||
{
|
||||
NAL_UNSPEC_0 = 0,
|
||||
NAL_SLICE_NON_IDR = 1,
|
||||
NAL_SLICE_DPA = 2,
|
||||
NAL_SLICE_DPB = 3,
|
||||
NAL_SLICE_DPC = 4,
|
||||
NAL_SLICE_IDR = 5,
|
||||
NAL_SEI = 6,
|
||||
NAL_SPS = 7,
|
||||
NAL_PPS = 8,
|
||||
NAL_AUD = 9,
|
||||
NAL_EOSEQ = 10,
|
||||
NAL_EOSTR = 11,
|
||||
NAL_FILLER = 12,
|
||||
NAL_SPSE = 13,
|
||||
NAL_RES_18 = 14,
|
||||
NAL_AUX_PIC = 19,
|
||||
NAL_RES_23 = 20,
|
||||
NAL_UNSPEC_31 = 24,
|
||||
};
|
||||
NAL_UNSPEC_0 = 0,
|
||||
NAL_SLICE_NON_IDR = 1,
|
||||
NAL_SLICE_DPA = 2,
|
||||
NAL_SLICE_DPB = 3,
|
||||
NAL_SLICE_DPC = 4,
|
||||
NAL_SLICE_IDR = 5,
|
||||
NAL_SEI = 6,
|
||||
NAL_SPS = 7,
|
||||
NAL_PPS = 8,
|
||||
NAL_AUD = 9,
|
||||
NAL_EOSEQ = 10,
|
||||
NAL_EOSTR = 11,
|
||||
NAL_FILLER = 12,
|
||||
NAL_SPSE = 13,
|
||||
NAL_PREFIX = 14,
|
||||
NAL_SUBSET_SPS = 15,
|
||||
NAL_AUX_PIC = 19,
|
||||
NAL_CODED_SLICE_EXTENSION = 20,
|
||||
NAL_UNSPEC_31 = 24,
|
||||
} NAL_UNIT_TYPE_T;
|
||||
|
||||
/**
|
||||
******************************************************************************
|
||||
|
|
@ -261,27 +265,29 @@ typedef enum
|
|||
*/
|
||||
typedef enum
|
||||
{
|
||||
I16x16 = 0,
|
||||
I4x4 = 1,
|
||||
I8x8 = 2,
|
||||
P16x16 = 3,
|
||||
P16x8 = 4,
|
||||
P8x16 = 5,
|
||||
P8x8 = 6,
|
||||
PSKIP = 7,
|
||||
IPCM = 8,
|
||||
B16x16 = 9,
|
||||
BSKIP = 10,
|
||||
BDIRECT = 11,
|
||||
INVALID_MB_TYPE = -1,
|
||||
I16x16 = 0,
|
||||
I4x4 = 1,
|
||||
I8x8 = 2,
|
||||
P16x16 = 3,
|
||||
P16x8 = 4,
|
||||
P8x16 = 5,
|
||||
P8x8 = 6,
|
||||
PSKIP = 7,
|
||||
IPCM = 8,
|
||||
B16x16 = 9,
|
||||
BSKIP = 10,
|
||||
BDIRECT = 11,
|
||||
BASE_MODE = 12,
|
||||
MAX_MBTYPES,
|
||||
}MBTYPES_T;
|
||||
} MBTYPES_T;
|
||||
|
||||
/* Pred Modes */
|
||||
enum
|
||||
{
|
||||
BLOCK_TYPE_INTER_MB = 0,
|
||||
BLOCK_TYPE_INTRA_MB = 1,
|
||||
BLOCK_TYPE_SKIP_MB = 2
|
||||
BLOCK_TYPE_SKIP_MB = 2
|
||||
};
|
||||
|
||||
/* Prediction list */
|
||||
|
|
@ -521,9 +527,16 @@ typedef enum
|
|||
/* Number of max TU in a MB row */
|
||||
#define MAX_TU_IN_MB_ROW ((MB_SIZE / MIN_TU_SIZE))
|
||||
|
||||
#define MIN_TU_IN_MB_ROW ((MB_SIZE / MAX_TU_SIZE))
|
||||
|
||||
/* Number of max PU in a CTb row */
|
||||
#define MAX_PU_IN_MB_ROW ((MB_SIZE / MIN_PU_SIZE))
|
||||
|
||||
#define MAX_TU_IN_MB_COL MAX_TU_IN_MB_ROW
|
||||
|
||||
#define MIN_TU_IN_MB_COL MIN_TU_IN_MB_ROW
|
||||
|
||||
#define MAX_PU_IN_MB_COL MAX_PU_IN_MB_ROW
|
||||
|
||||
/* Number of max PU in a MB */
|
||||
/*****************************************************************************/
|
||||
|
|
@ -537,7 +550,11 @@ typedef enum
|
|||
#define MAX_TU_IN_MB ((MB_SIZE / MIN_TU_SIZE) * \
|
||||
(MB_SIZE / MIN_TU_SIZE))
|
||||
|
||||
#define MIN_TU_IN_MB (MIN_TU_IN_MB_ROW * MIN_TU_IN_MB_COL)
|
||||
|
||||
#define NUM_4x4_IN_8x8 4
|
||||
|
||||
#define NUM_COEFFS_IN_MIN_TU (MIN_TU_SIZE * MIN_TU_SIZE)
|
||||
|
||||
/**
|
||||
* Maximum transform depths
|
||||
|
|
|
|||
|
|
@ -44,6 +44,8 @@
|
|||
/*Width of a 4x4 block*/
|
||||
#define SUB_BLK_WIDTH_4x4 4
|
||||
|
||||
#define SUB_BLK_HEIGHT_4x4 4
|
||||
|
||||
/*Width of an 8x8 block*/
|
||||
#define SUB_BLK_WIDTH_8x8 8
|
||||
|
||||
|
|
|
|||
6542
common/svc/isvc_cabac_tables.c
Normal file
6542
common/svc/isvc_cabac_tables.c
Normal file
File diff suppressed because it is too large
Load diff
57
common/svc/isvc_cabac_tables.h
Normal file
57
common/svc/isvc_cabac_tables.h
Normal file
|
|
@ -0,0 +1,57 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
|
||||
/**
|
||||
******************************************************************************
|
||||
* @file isvc_cabac_tables.h
|
||||
*
|
||||
* @brief
|
||||
* This file contains enumerations, macros and extern declarations of H264
|
||||
* cabac tables
|
||||
*
|
||||
* @author
|
||||
* Ittiam
|
||||
*
|
||||
* @remarks
|
||||
* none
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef _ISVC_CABAC_TABLES_H_
|
||||
#define _ISVC_CABAC_TABLES_H_
|
||||
|
||||
#include "ih264_cabac_tables.h"
|
||||
/**
|
||||
******************************************************************************
|
||||
* @brief max range of cabac contexts in H264 (0-459)
|
||||
******************************************************************************
|
||||
*/
|
||||
#define NUM_SVC_CABAC_CTXTS 467
|
||||
|
||||
extern const UWORD32 (*gau4_isvc_cabac_table)[4];
|
||||
|
||||
/*****************************************************************************/
|
||||
/* Cabac tables for context initialization depending upon type of Slice, */
|
||||
/* cabac init Idc value and Qp. */
|
||||
/*****************************************************************************/
|
||||
extern const UWORD8 gau1_isvc_cabac_ctxt_init_table[NUM_CAB_INIT_IDC_PLUS_ONE][QP_RANGE]
|
||||
[NUM_SVC_CABAC_CTXTS];
|
||||
|
||||
#endif
|
||||
81
common/svc/isvc_common_tables.c
Normal file
81
common/svc/isvc_common_tables.c
Normal file
|
|
@ -0,0 +1,81 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
/**
|
||||
*******************************************************************************
|
||||
* @file
|
||||
* isvc_common_tables.c
|
||||
*
|
||||
* @brief
|
||||
* Contains common global tables
|
||||
*
|
||||
* @author
|
||||
* Harish M
|
||||
*
|
||||
* @par List of Functions:
|
||||
*
|
||||
* @remarks
|
||||
* None
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
/*****************************************************************************/
|
||||
/* File Includes */
|
||||
/*****************************************************************************/
|
||||
|
||||
/* User include files */
|
||||
#include "ih264_typedefs.h"
|
||||
#include "isvc_defs.h"
|
||||
#include "isvc_macros.h"
|
||||
#include "isvc_structs.h"
|
||||
#include "ih264_common_tables.h"
|
||||
#include "isvc_common_tables.h"
|
||||
|
||||
/*****************************************************************************/
|
||||
/* Extern global definitions */
|
||||
/*****************************************************************************/
|
||||
|
||||
/**
|
||||
******************************************************************************
|
||||
* @brief while encoding, basing on the input configuration parameters, the
|
||||
* the level of the bitstream is computed basing on the table below.
|
||||
* input : table_idx
|
||||
* output : level_idc or cpb size
|
||||
* @remarks Table A-1 – level table limits
|
||||
******************************************************************************
|
||||
*/
|
||||
const level_tables_t gas_isvc_lvl_tbl[16] = {
|
||||
{IH264_LEVEL_10, 1485, 99, 396, 64, 175, 64},
|
||||
{IH264_LEVEL_1B, 1485, 99, 396, 128, 350, 64},
|
||||
{IH264_LEVEL_11, 3000, 396, 900, 192, 500, 128},
|
||||
{IH264_LEVEL_12, 6000, 396, 2376, 384, 1000, 128},
|
||||
{IH264_LEVEL_13, 11880, 396, 2376, 768, 2000, 128},
|
||||
{IH264_LEVEL_20, 11880, 396, 2376, 2000, 2000, 128},
|
||||
{IH264_LEVEL_21, 19800, 792, 4752, 4000, 4000, 256},
|
||||
{IH264_LEVEL_22, 20250, 1620, 8100, 4000, 4000, 256},
|
||||
{IH264_LEVEL_30, 40500, 1620, 8100, 10000, 10000, 256},
|
||||
{IH264_LEVEL_31, 108000, 3600, 18000, 14000, 14000, 512},
|
||||
{IH264_LEVEL_32, 216000, 5120, 20480, 20000, 20000, 512},
|
||||
{IH264_LEVEL_40, 245760, 8192, 32768, 20000, 25000, 512},
|
||||
{IH264_LEVEL_41, 245760, 8192, 32768, 50000, 62500, 512},
|
||||
{IH264_LEVEL_42, 522240, 8704, 34816, 50000, 62500, 512},
|
||||
{IH264_LEVEL_50, 589824, 22080, 110400, 135000, 135000, 512},
|
||||
{IH264_LEVEL_51, 983040, 36864, 184320, 240000, 240000, 512},
|
||||
};
|
||||
50
common/svc/isvc_common_tables.h
Normal file
50
common/svc/isvc_common_tables.h
Normal file
|
|
@ -0,0 +1,50 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
/**
|
||||
*******************************************************************************
|
||||
* @file
|
||||
* isvc_common_tables.h
|
||||
*
|
||||
* @brief
|
||||
* Common tables
|
||||
*
|
||||
* @author
|
||||
* Harish
|
||||
*
|
||||
* @par List of Functions:
|
||||
*
|
||||
* @remarks
|
||||
* None
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef _ISVC_COMMON_TABLES_H_
|
||||
#define _ISVC_COMMON_TABLES_H_
|
||||
|
||||
/* Dependencies of ih264_common_tables.h */
|
||||
#include "ih264_defs.h"
|
||||
#include "ih264_structs.h"
|
||||
|
||||
#include "ih264_common_tables.h"
|
||||
|
||||
extern const level_tables_t gas_isvc_lvl_tbl[16];
|
||||
|
||||
#endif
|
||||
88
common/svc/isvc_defs.h
Normal file
88
common/svc/isvc_defs.h
Normal file
|
|
@ -0,0 +1,88 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
* @file
|
||||
* isvc_defs.h
|
||||
*
|
||||
* @brief
|
||||
* Contains macro defintions, and other typedefs used for SVC encoding
|
||||
*
|
||||
* @author
|
||||
* ittiam
|
||||
*
|
||||
* @remarks
|
||||
* None
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef _ISVC_DEFS_H_
|
||||
#define _ISVC_DEFS_H_
|
||||
|
||||
#define MAX_NUM_TEMPORAL_LAYERS 3
|
||||
|
||||
#define MAX_NUM_SPATIAL_LAYERS 3
|
||||
|
||||
#define MAX_VUI_EXT_NUM_ENTRIES (MAX_NUM_TEMPORAL_LAYERS * MAX_NUM_SPATIAL_LAYERS)
|
||||
|
||||
#define SVC_INTER_MB (1 << 0) /*!< Intra MBs other than IPCM and I_BL */
|
||||
|
||||
#define SVC_INTRA_MB (1 << 1) /*!< P or B MBs decoded or inferred*/
|
||||
|
||||
#define SVC_IPCM_MB (1 << 2) /*!< IPCM_MB decoder or inferred*/
|
||||
|
||||
#define SVC_IBL_MB (1 << 3) /*!< I_BL MB always inferred */
|
||||
|
||||
#define SVC_INTRA_INTER_MB \
|
||||
(1 << 4) /*!< Intra Inter MB will have an alternate prediction \
|
||||
process*/
|
||||
|
||||
#define MB_WIDTH_SHIFT 4
|
||||
|
||||
#define MB_HEIGHT_SHIFT 4
|
||||
|
||||
#define UV 1
|
||||
|
||||
#define NUM_SP_COMPONENTS 2
|
||||
|
||||
#define NUM_COMPONENTS 3
|
||||
|
||||
#define SVC_EXTRACT_MB_MODE(x) ((x) &0x1F)
|
||||
|
||||
#define GET_BIT_TX_SIZE(x, y) ((x) & (1 << (7 - (y))))
|
||||
|
||||
typedef enum SVC_PROFILES_T
|
||||
{
|
||||
IH264_SCALABLE_BASELINE = 83,
|
||||
IH264_SCALABLE_HIGH_PROFILE = 86
|
||||
} SVC_PROFILES_T;
|
||||
|
||||
typedef enum PRED_MODE_T
|
||||
{
|
||||
L0 = 0,
|
||||
L1 = 1,
|
||||
BI = 2,
|
||||
NUM_PRED_DIRS = 2,
|
||||
INVALID_PRED_MODE = 4,
|
||||
} PRED_MODE_T;
|
||||
|
||||
#endif
|
||||
219
common/svc/isvc_inter_pred_filters.h
Normal file
219
common/svc/isvc_inter_pred_filters.h
Normal file
|
|
@ -0,0 +1,219 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
* @file
|
||||
* isvc_inter_pred_filters.h
|
||||
*
|
||||
* @brief
|
||||
* Declarations of functions used for inter prediction
|
||||
*
|
||||
* @author
|
||||
* Ittiam
|
||||
*
|
||||
* @par List of Functions:
|
||||
* -ih264_inter_pred_luma_copy
|
||||
* -ih264_interleave_copy
|
||||
* -ih264_inter_pred_luma_horz
|
||||
* -ih264_inter_pred_luma_vert
|
||||
* -ih264_inter_pred_luma_horz_hpel_vert_hpel
|
||||
* -ih264_inter_pred_luma_vert_qpel
|
||||
* -ih264_inter_pred_luma_horz_qpel
|
||||
* -ih264_inter_pred_luma_horz_qpel_vert_qpel
|
||||
* -ih264_inter_pred_luma_horz_qpel_vert_hpel
|
||||
* -ih264_inter_pred_luma_horz_hpel_vert_qpel
|
||||
* -ih264_inter_pred_luma_bilinear
|
||||
* -ih264_inter_pred_chroma
|
||||
* -ih264_inter_pred_luma_copy_a9q
|
||||
* -ih264_interleave_copy_a9
|
||||
* -ih264_inter_pred_luma_horz_a9q
|
||||
* -ih264_inter_pred_luma_vert_a9q
|
||||
* -ih264_inter_pred_luma_bilinear_a9q
|
||||
* -ih264_inter_pred_luma_horz_hpel_vert_hpel_a9q
|
||||
* -ih264_inter_pred_luma_horz_qpel_a9q
|
||||
* -ih264_inter_pred_luma_vert_qpel_a9q
|
||||
* -ih264_inter_pred_luma_horz_qpel_vert_qpel_a9q
|
||||
* -ih264_inter_pred_luma_horz_qpel_vert_hpel_a9q
|
||||
* -ih264_inter_pred_luma_horz_hpel_vert_qpel_a9q
|
||||
* -ih264_inter_pred_chroma_a9q
|
||||
* -ih264_inter_pred_luma_copy_av8
|
||||
* -ih264_interleave_copy_av8
|
||||
* -ih264_inter_pred_luma_horz_av8
|
||||
* -ih264_inter_pred_luma_vert_av8
|
||||
* -ih264_inter_pred_luma_bilinear_av8
|
||||
* -ih264_inter_pred_luma_horz_hpel_vert_hpel_av8
|
||||
* -ih264_inter_pred_luma_horz_qpel_av8
|
||||
* -ih264_inter_pred_luma_vert_qpel_av8
|
||||
* -ih264_inter_pred_luma_horz_qpel_vert_qpel_av8
|
||||
* -ih264_inter_pred_luma_horz_qpel_vert_hpel_av8
|
||||
* -ih264_inter_pred_luma_horz_hpel_vert_qpel_av8
|
||||
* -ih264_inter_pred_chroma_av8
|
||||
* -ih264_inter_pred_chroma_dx_zero_av8
|
||||
* -ih264_inter_pred_chroma_dy_zero_av8
|
||||
* -ih264_inter_pred_luma_copy_ssse3
|
||||
* -ih264_inter_pred_luma_copy_ssse3
|
||||
* -ih264_inter_pred_luma_horz_ssse3
|
||||
* -ih264_inter_pred_luma_vert_ssse3
|
||||
* -ih264_inter_pred_luma_bilinear_ssse3
|
||||
* -ih264_inter_pred_luma_horz_hpel_vert_hpel_ssse3
|
||||
* -ih264_inter_pred_luma_horz_qpel_ssse3
|
||||
* -ih264_inter_pred_luma_vert_qpel_ssse3
|
||||
* -ih264_inter_pred_luma_horz_qpel_vert_qpel_ssse3
|
||||
* -ih264_inter_pred_luma_horz_qpel_vert_hpel_ssse3
|
||||
* -ih264_inter_pred_luma_horz_hpel_vert_qpel_ssse3
|
||||
* -ih264_inter_pred_chroma_ssse3
|
||||
*
|
||||
* @remarks
|
||||
* None
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef _ISVC_INTER_PRED_FILTERS_H_
|
||||
#define _ISVC_INTER_PRED_FILTERS_H_
|
||||
|
||||
/*****************************************************************************/
|
||||
/* Constant Data variables */
|
||||
/*****************************************************************************/
|
||||
|
||||
extern const WORD32 ih264_g_six_tap[3]; /* coefficients for 6 tap filtering*/
|
||||
|
||||
/*****************************************************************************/
|
||||
/* Extern Function Declarations */
|
||||
/*****************************************************************************/
|
||||
|
||||
typedef void FT_INTER_PRED_LUMA(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, WORD32 dst_strd,
|
||||
WORD32 ht, WORD32 wd, UWORD8 *pu1_tmp, WORD32 dydx);
|
||||
|
||||
typedef void FT_INTERLEAVE_COPY(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, WORD32 dst_strd,
|
||||
WORD32 ht, WORD32 wd);
|
||||
|
||||
typedef void FT_INTER_PRED_LUMA_BILINEAR(UWORD8 *pu1_src1, UWORD8 *pu1_src2, UWORD8 *pu1_dst,
|
||||
WORD32 src_strd1, WORD32 src_strd2, WORD32 dst_strd,
|
||||
WORD32 height, WORD32 width);
|
||||
|
||||
typedef void FT_INTER_PRED_CHROMA(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd,
|
||||
WORD32 dst_strd, WORD32 dx, WORD32 dy, WORD32 ht, WORD32 wd);
|
||||
|
||||
/* No NEON Declarations */
|
||||
|
||||
FT_INTER_PRED_LUMA ih264_inter_pred_luma_copy;
|
||||
|
||||
FT_INTERLEAVE_COPY ih264_interleave_copy;
|
||||
|
||||
FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz;
|
||||
|
||||
FT_INTER_PRED_LUMA ih264_inter_pred_luma_vert;
|
||||
|
||||
FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_hpel_vert_hpel;
|
||||
|
||||
FT_INTER_PRED_LUMA ih264_inter_pred_luma_vert_qpel;
|
||||
|
||||
FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_qpel;
|
||||
|
||||
FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_qpel_vert_qpel;
|
||||
|
||||
FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_qpel_vert_hpel;
|
||||
|
||||
FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_hpel_vert_qpel;
|
||||
|
||||
FT_INTER_PRED_LUMA_BILINEAR ih264_inter_pred_luma_bilinear;
|
||||
|
||||
FT_INTER_PRED_CHROMA ih264_inter_pred_chroma;
|
||||
|
||||
/* A9 NEON Declarations */
|
||||
FT_INTER_PRED_LUMA ih264_inter_pred_luma_copy_a9q;
|
||||
|
||||
FT_INTERLEAVE_COPY ih264_interleave_copy_a9;
|
||||
|
||||
FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_a9q;
|
||||
|
||||
FT_INTER_PRED_LUMA ih264_inter_pred_luma_vert_a9q;
|
||||
|
||||
FT_INTER_PRED_LUMA_BILINEAR ih264_inter_pred_luma_bilinear_a9q;
|
||||
|
||||
FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_hpel_vert_hpel_a9q;
|
||||
|
||||
FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_qpel_a9q;
|
||||
|
||||
FT_INTER_PRED_LUMA ih264_inter_pred_luma_vert_qpel_a9q;
|
||||
|
||||
FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_qpel_vert_qpel_a9q;
|
||||
|
||||
FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_qpel_vert_hpel_a9q;
|
||||
|
||||
FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_hpel_vert_qpel_a9q;
|
||||
|
||||
FT_INTER_PRED_CHROMA ih264_inter_pred_chroma_a9q;
|
||||
|
||||
/* AV8 NEON Declarations */
|
||||
FT_INTER_PRED_LUMA ih264_inter_pred_luma_copy_av8;
|
||||
|
||||
FT_INTERLEAVE_COPY ih264_interleave_copy_av8;
|
||||
|
||||
FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_av8;
|
||||
|
||||
FT_INTER_PRED_LUMA ih264_inter_pred_luma_vert_av8;
|
||||
|
||||
FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_hpel_vert_hpel_av8;
|
||||
|
||||
FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_qpel_av8;
|
||||
|
||||
FT_INTER_PRED_LUMA ih264_inter_pred_luma_vert_qpel_av8;
|
||||
|
||||
FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_qpel_vert_qpel_av8;
|
||||
|
||||
FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_qpel_vert_hpel_av8;
|
||||
|
||||
FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_hpel_vert_qpel_av8;
|
||||
|
||||
FT_INTER_PRED_CHROMA ih264_inter_pred_chroma_av8;
|
||||
|
||||
FT_INTER_PRED_CHROMA ih264_inter_pred_chroma_dx_zero_av8;
|
||||
|
||||
FT_INTER_PRED_CHROMA ih264_inter_pred_chroma_dy_zero_av8;
|
||||
|
||||
/* SSSE3 Intrinsic Declarations */
|
||||
FT_INTER_PRED_LUMA ih264_inter_pred_luma_copy_ssse3;
|
||||
|
||||
FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_ssse3;
|
||||
|
||||
FT_INTER_PRED_LUMA ih264_inter_pred_luma_vert_ssse3;
|
||||
|
||||
FT_INTER_PRED_LUMA_BILINEAR ih264_inter_pred_luma_bilinear_ssse3;
|
||||
|
||||
FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_hpel_vert_hpel_ssse3;
|
||||
|
||||
FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_qpel_ssse3;
|
||||
|
||||
FT_INTER_PRED_LUMA ih264_inter_pred_luma_vert_qpel_ssse3;
|
||||
|
||||
FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_qpel_vert_qpel_ssse3;
|
||||
|
||||
FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_qpel_vert_hpel_ssse3;
|
||||
|
||||
FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_hpel_vert_qpel_ssse3;
|
||||
|
||||
FT_INTER_PRED_CHROMA ih264_inter_pred_chroma_ssse3;
|
||||
|
||||
/** Nothing past this point */
|
||||
|
||||
#endif
|
||||
3257
common/svc/isvc_intra_resample.c
Normal file
3257
common/svc/isvc_intra_resample.c
Normal file
File diff suppressed because it is too large
Load diff
251
common/svc/isvc_intra_resample.h
Normal file
251
common/svc/isvc_intra_resample.h
Normal file
|
|
@ -0,0 +1,251 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
|
||||
#ifndef _ISVC_INTRA_RESAMPLE_H_
|
||||
#define _ISVC_INTRA_RESAMPLE_H_
|
||||
|
||||
#include "ih264_typedefs.h"
|
||||
#include "isvc_macros.h"
|
||||
#include "ih264_platform_macros.h"
|
||||
#include "isvc_structs.h"
|
||||
|
||||
#define DYADIC_REF_W_Y 20
|
||||
#define DYADIC_REF_H_Y 20
|
||||
#define DYADIC_REF_W_C 10
|
||||
#define DYADIC_REF_H_C 10
|
||||
|
||||
#define MAX_NUM_RES_LYRS 4
|
||||
|
||||
#define MAX_PIX_FILL_LUMA 4
|
||||
#define MAX_PIX_FILL_CHROMA 2
|
||||
|
||||
#define MAX_REF_ARR_WD_HT 48
|
||||
#define MAX_REF_IDX_ARRAY (MAX_REF_ARR_WD_HT + MB_SIZE)
|
||||
|
||||
#define CLIPUCHAR(x) CLIP3(0, 255, (x))
|
||||
|
||||
#define REF_ARRAY_WIDTH 48
|
||||
#define REF_ARRAY_HEIGHT 48
|
||||
|
||||
typedef void FT_INTERPOLATE_LUMA_2X(UWORD8 *pu1_inp_buf, WORD16 *pi2_tmp_filt_buf,
|
||||
UWORD8 *pu1_out_buf, WORD32 i4_out_stride);
|
||||
|
||||
typedef void FT_VERT_INTERPOLATE_CHROMA_2X(UWORD8 *pu1_inp_buf, WORD16 *pi2_tmp_filt_buf,
|
||||
WORD32 i4_phase_0, WORD32 i4_phase_1);
|
||||
|
||||
typedef void FT_HORZ_INTERPOLATE_CHROMA_2X(WORD16 *pi2_tmp_filt_buf, UWORD8 *pu1_out_buf,
|
||||
WORD32 i4_out_stride, WORD32 i4_phase_0,
|
||||
WORD32 i4_phase_1);
|
||||
|
||||
typedef struct mem_element_t
|
||||
{
|
||||
/* Buffer pointer */
|
||||
void *pv_buffer;
|
||||
|
||||
/* size of the structure or unit */
|
||||
WORD32 i4_element_size;
|
||||
|
||||
/* Stride of buffer in terms of number of elements.*/
|
||||
WORD32 i4_num_element_stride;
|
||||
} mem_element_t;
|
||||
|
||||
typedef struct seg_description_t
|
||||
{
|
||||
/* describes segment dimension */
|
||||
UWORD8 u1_seg_dim;
|
||||
|
||||
/* describes offset from start */
|
||||
UWORD8 u1_seg_off;
|
||||
|
||||
/* describes whether mb is adjoining the segment
|
||||
0 => not adjoining 1 => adjoining */
|
||||
UWORD8 u1_mb_adjoin;
|
||||
|
||||
/* distance to nearest MB */
|
||||
WORD8 i1_dist_idx;
|
||||
|
||||
/* describes the nearest mb boundary
|
||||
+1 => rightMB/bottomMB
|
||||
-1 => leftMB/topMB */
|
||||
WORD8 i1_nearst_mb_bdry;
|
||||
} seg_description_t;
|
||||
|
||||
typedef struct seg_lookup_desc_t
|
||||
{
|
||||
/* place holder to store the number of segments */
|
||||
UWORD8 u1_num_segments;
|
||||
|
||||
/* this variable indicates where is start locatiion of the segment with
|
||||
respect to less the block_width or greater than block width*/
|
||||
UWORD8 u4_start_pos;
|
||||
|
||||
/* place holder to store per segment description */
|
||||
seg_description_t s_segments[4];
|
||||
} seg_lookup_desc_t;
|
||||
|
||||
typedef struct intra_samp_lyr_ctxt
|
||||
{
|
||||
/* mb position */
|
||||
coordinates_t *ps_mb_pos;
|
||||
|
||||
/* reference layer width in terms luma samples */
|
||||
WORD32 i4_ref_width;
|
||||
|
||||
/* reference layer height in terms luma samples */
|
||||
WORD32 i4_ref_height;
|
||||
|
||||
/* Constrained intra resampling flag. Range is [0,1]. */
|
||||
WORD8 i1_constrained_intra_rsmpl_flag;
|
||||
|
||||
/* Chroma xPhase for even values of x for dyadic cases */
|
||||
WORD32 i4_x_phase_0;
|
||||
|
||||
/* Chroma xPhase for odd values of x for dyadic cases */
|
||||
WORD32 i4_x_phase_1;
|
||||
|
||||
/* Chroma yPhase for even values of y for dyadic cases */
|
||||
WORD32 i4_y_phase_0;
|
||||
|
||||
/* Chroma yPhase for odd values of y for dyadic cases */
|
||||
WORD32 i4_y_phase_1;
|
||||
|
||||
FT_INTERPOLATE_LUMA_2X *pf_interpolate_luma;
|
||||
|
||||
FT_VERT_INTERPOLATE_CHROMA_2X *pf_vert_interpol_chroma;
|
||||
|
||||
FT_HORZ_INTERPOLATE_CHROMA_2X *pf_horz_interpol_chroma;
|
||||
|
||||
WORD16 i2_x_min_pos;
|
||||
|
||||
WORD16 i2_x_max_pos;
|
||||
|
||||
WORD16 i2_y_min_pos;
|
||||
|
||||
WORD16 i2_y_max_pos;
|
||||
|
||||
coordinates_t *ps_phase;
|
||||
|
||||
WORD32 *pi4_ref_array_positions_x;
|
||||
|
||||
WORD32 *pi4_ref_array_positions_y;
|
||||
|
||||
coordinates_t *ps_offsets;
|
||||
|
||||
coordinates_t *ps_ref_array_dims;
|
||||
|
||||
/* buffers to store lookup for horizontal segment description */
|
||||
seg_lookup_desc_t as_seg_lookup_horz[MB_SIZE];
|
||||
|
||||
/* buffers to store lookup for vertical segment description */
|
||||
seg_lookup_desc_t as_seg_lookup_vert[MB_SIZE];
|
||||
|
||||
/* buffers to store lookup for x indexes to get
|
||||
availability from 4x4 availability grid */
|
||||
UWORD8 au1_refarray_x_idx[MAX_REF_IDX_ARRAY];
|
||||
|
||||
/* buffers to store lookup for y indexes to get
|
||||
availability from 4x4 availability grid */
|
||||
UWORD8 au1_refarray_y_idx[MAX_REF_IDX_ARRAY];
|
||||
} intra_samp_lyr_ctxt;
|
||||
|
||||
typedef struct intra_sampling_ctxt_t
|
||||
{
|
||||
/* Array of resolution layer ctxt. */
|
||||
intra_samp_lyr_ctxt as_res_lyrs[MAX_NUM_RES_LYRS];
|
||||
|
||||
/* pointer to array of SPS */
|
||||
void *ps_sps;
|
||||
|
||||
/* buffer to store the reference layer data before intra sampling */
|
||||
UWORD8 *pu1_refarray_buffer;
|
||||
|
||||
/* buffer to hold the reference layer Cb data before intra
|
||||
resampling (used for dyadic cases only) */
|
||||
UWORD8 *pu1_refarray_cb;
|
||||
|
||||
/* buffer to hold the reference layer Cr data before intra
|
||||
resampling (used for dyadic cases only) */
|
||||
UWORD8 *pu1_refarray_cr;
|
||||
|
||||
/* intermideate buffer for interpolation */
|
||||
WORD32 *pi4_temp_interpolation_buffer;
|
||||
|
||||
/* resolution id of the layer which is to be processed */
|
||||
WORD32 i4_res_lyr_id;
|
||||
|
||||
/* reference layer width in terms luma samples */
|
||||
WORD32 i4_ref_width;
|
||||
|
||||
/* reference layer width in terms luma samples */
|
||||
WORD32 i4_refarray_stride;
|
||||
|
||||
/* reference layer height in terms luma samples */
|
||||
WORD32 i4_ref_height;
|
||||
} intra_sampling_ctxt_t;
|
||||
|
||||
typedef struct inter_lyr_mb_prms_t
|
||||
{
|
||||
/* NNZs of Chroma. Here each bit corresonds
|
||||
to a NNZs of 4x4 sub block. Lower 4 bits are
|
||||
used for Cb and upper are used for Cr */
|
||||
UWORD8 u1_chroma_nnz;
|
||||
|
||||
/* NNZs of Luma. Here each bit corresonds
|
||||
to a NNZs of 4x4 sub block in raster scan order. */
|
||||
UWORD16 u2_luma_nnz;
|
||||
|
||||
/* Packed MB mode transform size of an MB */
|
||||
WORD8 i1_mb_mode;
|
||||
} inter_lyr_mb_prms_t;
|
||||
|
||||
/* Function declarations */
|
||||
extern void isvc_intra_samp_mb_dyadic(void *pv_intra_samp_ctxt, mem_element_t *ps_ref_luma,
|
||||
mem_element_t *ps_ref_chroma,
|
||||
mem_element_t *ps_ref_mb_mode_map,
|
||||
mem_element_t *ps_curr_luma, mem_element_t *ps_curr_chroma,
|
||||
UWORD16 u2_mb_x, UWORD16 u2_mb_y,
|
||||
WORD32 i4_scaled_ref_layer_left_offset,
|
||||
WORD32 i4_scaled_ref_layer_top_offset);
|
||||
|
||||
extern void isvc_intra_samp_mb(void *pv_intra_samp_ctxt_luma, void *pv_intra_samp_ctxt_chroma,
|
||||
mem_element_t *ps_ref_luma, mem_element_t *ps_ref_chroma,
|
||||
mem_element_t *ps_ref_mb_mode_map, mem_element_t *ps_curr_luma,
|
||||
mem_element_t *ps_curr_chroma);
|
||||
|
||||
extern void isvc_intra_resamp_generate_segment_lookup(seg_lookup_desc_t *ps_seg_lookup_table,
|
||||
WORD32 i4_dimension, WORD32 i4_mb_size,
|
||||
WORD32 i4_shift_val);
|
||||
|
||||
/* C Declarations */
|
||||
extern FT_INTERPOLATE_LUMA_2X isvc_interpolate_base_luma_dyadic;
|
||||
extern FT_VERT_INTERPOLATE_CHROMA_2X isvc_vert_interpol_chroma_dyadic;
|
||||
extern FT_HORZ_INTERPOLATE_CHROMA_2X isvc_horz_interpol_chroma_dyadic;
|
||||
|
||||
/* SSE42 Declarations */
|
||||
extern FT_INTERPOLATE_LUMA_2X isvc_interpolate_base_luma_dyadic_sse42;
|
||||
extern FT_VERT_INTERPOLATE_CHROMA_2X isvc_vert_interpol_chroma_dyadic_sse42;
|
||||
extern FT_HORZ_INTERPOLATE_CHROMA_2X isvc_horz_interpol_chroma_dyadic_sse42;
|
||||
|
||||
/* NEON Declarations */
|
||||
extern FT_INTERPOLATE_LUMA_2X isvc_interpolate_base_luma_dyadic_neon;
|
||||
extern FT_VERT_INTERPOLATE_CHROMA_2X isvc_vert_interpol_chroma_dyadic_neon;
|
||||
extern FT_HORZ_INTERPOLATE_CHROMA_2X isvc_horz_interpol_chroma_dyadic_neon;
|
||||
|
||||
#endif
|
||||
1094
common/svc/isvc_iquant_itrans_recon.c
Normal file
1094
common/svc/isvc_iquant_itrans_recon.c
Normal file
File diff suppressed because it is too large
Load diff
37
common/svc/isvc_macros.h
Normal file
37
common/svc/isvc_macros.h
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
* @file
|
||||
* isvc_macros.h
|
||||
*
|
||||
* @brief
|
||||
* Contains macro definitions used in SVC
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef _ISVC_MACROS_H_
|
||||
#define _ISVC_MACROS_H_
|
||||
|
||||
#define FORCEINLINE __attribute__((always_inline)) inline
|
||||
|
||||
#endif
|
||||
317
common/svc/isvc_mem_fns.c
Normal file
317
common/svc/isvc_mem_fns.c
Normal file
|
|
@ -0,0 +1,317 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
/**
|
||||
*******************************************************************************
|
||||
* @file
|
||||
* isvc_mem_fns.c
|
||||
*
|
||||
* @brief
|
||||
* Functions used for memory operations
|
||||
*
|
||||
* @author
|
||||
* Ittiam
|
||||
*
|
||||
* @par List of Functions:
|
||||
* isvc_memcpy()
|
||||
* isvc_memcpy_mul_8()
|
||||
* isvc_memset()
|
||||
* isvc_memset_mul_8()
|
||||
* isvc_memset_16bit()
|
||||
* isvc_memset_16bit_mul_8()
|
||||
* isvc_memory_alloc()
|
||||
* isvc_memory_free()
|
||||
*
|
||||
* @remarks
|
||||
* None
|
||||
*
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
/*****************************************************************************/
|
||||
/* File Includes */
|
||||
/*****************************************************************************/
|
||||
/* System include files */
|
||||
#include <stdio.h>
|
||||
#include <stddef.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
|
||||
/* User include files */
|
||||
#include "ih264_typedefs.h"
|
||||
#include "isvc_mem_fns.h"
|
||||
|
||||
/**
|
||||
********************************************************************************
|
||||
* @brief copies a 2d blk from one location to another
|
||||
*
|
||||
* @param[out] pu1_dst : dst pointer
|
||||
*
|
||||
* @param[in] i4_dst_stride: stride of destination
|
||||
*
|
||||
* @param[in] pu1_src : src ptr
|
||||
*
|
||||
* @param[in] i4_src_stride: stride of src
|
||||
*
|
||||
* @param[in] i4_blk_wd : blk width
|
||||
*
|
||||
* @param[in] i4_blk_ht : blk height
|
||||
*
|
||||
* @return void
|
||||
********************************************************************************
|
||||
*/
|
||||
|
||||
void isvc_copy_2d(UWORD8 *pu1_dst, WORD32 i4_dst_stride, UWORD8 *pu1_src, WORD32 i4_src_stride,
|
||||
WORD32 i4_blk_wd, WORD32 i4_blk_ht)
|
||||
{
|
||||
WORD32 i;
|
||||
|
||||
for(i = 0; i < i4_blk_ht; i++)
|
||||
{
|
||||
memmove(pu1_dst, pu1_src, i4_blk_wd * sizeof(pu1_dst[0]));
|
||||
|
||||
pu1_dst += i4_dst_stride;
|
||||
pu1_src += i4_src_stride;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
********************************************************************************
|
||||
* @brief memsets a 2d blk
|
||||
*
|
||||
* @param[out] pu1_dst : dst pointer
|
||||
*
|
||||
* @param[in] i4_dst_stride: stride of destination
|
||||
*
|
||||
* @param[in] i4_blk_wd : blk width
|
||||
*
|
||||
* @param[in] i4_blk_ht : blk height
|
||||
*
|
||||
* @return void
|
||||
********************************************************************************
|
||||
*/
|
||||
void isvc_memset_2d(UWORD8 *pu1_dst, WORD32 i4_dst_stride, UWORD8 u1_val, WORD32 i4_blk_wd,
|
||||
WORD32 i4_blk_ht)
|
||||
{
|
||||
WORD32 i;
|
||||
|
||||
for(i = 0; i < i4_blk_ht; i++)
|
||||
{
|
||||
memset(pu1_dst, u1_val, i4_blk_wd);
|
||||
|
||||
pu1_dst += i4_dst_stride;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* Function for copying to an interleaved destination
|
||||
*
|
||||
* @par Description:
|
||||
* Copies the array of width 'wd' and height 'ht' from the location pointed
|
||||
* by 'src' to the location pointed by 'dst'
|
||||
*
|
||||
* @param[in] pu1_src
|
||||
* UWORD8 pointer to the source
|
||||
*
|
||||
* @param[out] pu1_dst
|
||||
* UWORD8 pointer to the destination
|
||||
*
|
||||
* @param[in] src_strd
|
||||
* integer source stride
|
||||
*
|
||||
* @param[in] dst_strd
|
||||
* integer destination stride
|
||||
*
|
||||
* @param[in] ht
|
||||
* integer height of the array
|
||||
*
|
||||
* @param[in] wd
|
||||
* integer width of the array
|
||||
*
|
||||
* @returns
|
||||
*
|
||||
* @remarks
|
||||
* The alternate elements of src will be copied to alternate locations in dsr
|
||||
* Other locations are not touched
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
void isvc_interleaved_copy(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, WORD32 dst_strd,
|
||||
WORD32 ht, WORD32 wd)
|
||||
{
|
||||
WORD32 row, col;
|
||||
wd *= 2;
|
||||
|
||||
for(row = 0; row < ht; row++)
|
||||
{
|
||||
for(col = 0; col < wd; col += 2)
|
||||
{
|
||||
pu1_dst[col] = pu1_src[col];
|
||||
}
|
||||
|
||||
pu1_src += src_strd;
|
||||
pu1_dst += dst_strd;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* Function for copying to an interleaved destination
|
||||
*
|
||||
* @par Description:
|
||||
* Copies the array of width 'wd' and height 'ht' from the location pointed
|
||||
* by 'src' to the location pointed by 'dst'
|
||||
*
|
||||
* @param[in] pu1_src
|
||||
* UWORD8 pointer to the source
|
||||
*
|
||||
* @param[out] pu1_dst
|
||||
* UWORD8 pointer to the destination
|
||||
*
|
||||
* @param[in] src_strd
|
||||
* integer source stride
|
||||
*
|
||||
* @param[in] dst_strd
|
||||
* integer destination stride
|
||||
*
|
||||
* @param[in] ht
|
||||
* integer height of the array
|
||||
*
|
||||
* @param[in] wd
|
||||
* integer width of the array
|
||||
*
|
||||
* @returns
|
||||
*
|
||||
* @remarks
|
||||
* The alternate elements of src will be copied to alternate locations in dsr
|
||||
* Other locations are not touched
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
void isvc_16bit_interleaved_copy(WORD16 *pi2_src, WORD16 *pi2_dst, WORD32 src_strd, WORD32 dst_strd,
|
||||
WORD32 ht, WORD32 wd)
|
||||
{
|
||||
WORD32 row, col;
|
||||
wd *= 2;
|
||||
|
||||
for(row = 0; row < ht; row++)
|
||||
{
|
||||
for(col = 0; col < wd; col += 2)
|
||||
{
|
||||
pi2_dst[col] = pi2_src[col];
|
||||
}
|
||||
|
||||
pi2_src += src_strd;
|
||||
pi2_dst += dst_strd;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* Function for memsetting to an interleaved destination
|
||||
*
|
||||
* @par Description:
|
||||
* Memsets the array of width 'wd' and height 'ht' pointed by 'src'
|
||||
*
|
||||
* @param[in] pu1_src
|
||||
* UWORD8 pointer to the source
|
||||
*
|
||||
* @param[in] src_strd
|
||||
* integer source stride
|
||||
*
|
||||
* @param[in] value
|
||||
* Value to set
|
||||
*
|
||||
* @param[in] ht
|
||||
* integer height of the array
|
||||
*
|
||||
* @param[in] wd
|
||||
* integer width of the array
|
||||
*
|
||||
* @returns
|
||||
*
|
||||
* @remarks
|
||||
* The alternate elements of src will be copied to alternate locations in dsr
|
||||
* Other locations are not touched
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
void isvc_16bit_interleaved_memset(WORD16 *pi2_src, WORD32 i4_src_strd, WORD16 i2_value,
|
||||
WORD32 i4_wd, WORD32 i4_ht)
|
||||
{
|
||||
WORD32 row, col;
|
||||
|
||||
i4_wd *= 2;
|
||||
|
||||
for(row = 0; row < i4_ht; row++)
|
||||
{
|
||||
for(col = 0; col < i4_wd; col += 2)
|
||||
{
|
||||
pi2_src[col] = i2_value;
|
||||
}
|
||||
|
||||
pi2_src += i4_src_strd;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* Checks if any pixel in a block is non-zero
|
||||
*
|
||||
* @param[in] pu1_data
|
||||
* UWORD8 pointer to the block to be checked
|
||||
*
|
||||
* @param[in] i4_data_strd
|
||||
* Stride of data buffer
|
||||
*
|
||||
* @param[in] u4_wd
|
||||
* Width of the block
|
||||
*
|
||||
* @param[in] u4_ht
|
||||
* Height of the block
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
UWORD8 isvc_is_nonzero_blk(UWORD8 *pu1_data, WORD32 i4_data_strd, UWORD32 u4_wd, UWORD32 u4_ht)
|
||||
{
|
||||
UWORD32 i, j;
|
||||
|
||||
for(i = 0; i < u4_ht; i++)
|
||||
{
|
||||
for(j = 0; j < u4_wd; j++)
|
||||
{
|
||||
if(pu1_data[j + i * i4_data_strd])
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
109
common/svc/isvc_mem_fns.h
Normal file
109
common/svc/isvc_mem_fns.h
Normal file
|
|
@ -0,0 +1,109 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
/**
|
||||
*******************************************************************************
|
||||
* @file
|
||||
* isvc_mem_fns.h
|
||||
*
|
||||
* @brief
|
||||
* Function declarations used for memory functions
|
||||
*
|
||||
* @author
|
||||
* Ittiam
|
||||
*
|
||||
* @remarks
|
||||
* None
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
#ifndef _ISVC_MEM_FNS_H_
|
||||
#define _ISVC_MEM_FNS_H_
|
||||
|
||||
#include "ih264_typedefs.h"
|
||||
|
||||
typedef void *FT_MEM_ALLOC(UWORD32 u4_size);
|
||||
|
||||
typedef void FT_MEM_FREE(void *pv_mem);
|
||||
|
||||
typedef void FT_MEMCPY(UWORD8 *pu1_dst, UWORD8 *pu1_src, UWORD32 num_bytes);
|
||||
|
||||
typedef void FT_COPY_2D(UWORD8 *pu1_dst, WORD32 i4_dst_stride, UWORD8 *pu1_src,
|
||||
WORD32 i4_src_stride, WORD32 i4_blk_wd, WORD32 i4_blk_ht);
|
||||
|
||||
typedef void FT_MEMSET_2D(UWORD8 *pu1_dst, WORD32 i4_dst_stride, UWORD8 u1_val, WORD32 i4_blk_wd,
|
||||
WORD32 i4_blk_ht);
|
||||
|
||||
typedef void FT_MEMSET(UWORD8 *pu1_dst, UWORD8 value, UWORD32 num_bytes);
|
||||
|
||||
typedef void FT_MEMSET_16BIT(UWORD16 *pu2_dst, UWORD16 value, UWORD32 num_words);
|
||||
|
||||
typedef void FT_16BIT_INTERLEAVED_COPY(WORD16 *pi2_src, WORD16 *pi2_dst, WORD32 src_strd,
|
||||
WORD32 dst_strd, WORD32 ht, WORD32 wd);
|
||||
|
||||
typedef void FT_16BIT_INTERLEAVED_MEMSET(WORD16 *pi2_src, WORD32 i4_src_strd, WORD16 i2_value,
|
||||
WORD32 i4_wd, WORD32 i4_ht);
|
||||
|
||||
typedef UWORD8 FT_NONZERO_CHECKER(UWORD8 *pu1_data, WORD32 i4_data_strd, UWORD32 u4_wd,
|
||||
UWORD32 u4_ht);
|
||||
|
||||
/* C function declarations */
|
||||
extern FT_MEMCPY ih264_memcpy;
|
||||
extern FT_MEMCPY ih264_memcpy_mul_8;
|
||||
extern FT_MEMSET ih264_memset;
|
||||
extern FT_MEMSET ih264_memset_mul_8;
|
||||
extern FT_MEMSET_16BIT ih264_memset_16bit;
|
||||
extern FT_MEMSET_16BIT ih264_memset_16bit_mul_8;
|
||||
extern FT_COPY_2D isvc_copy_2d;
|
||||
extern FT_MEMSET_2D isvc_memset_2d;
|
||||
extern FT_16BIT_INTERLEAVED_COPY isvc_16bit_interleaved_copy;
|
||||
extern FT_16BIT_INTERLEAVED_MEMSET isvc_16bit_interleaved_memset;
|
||||
extern FT_NONZERO_CHECKER isvc_is_nonzero_blk;
|
||||
extern FT_MEM_ALLOC isvc_memory_alloc;
|
||||
extern FT_MEM_FREE isvc_memory_free;
|
||||
|
||||
/* A9 Q function declarations */
|
||||
extern FT_MEMCPY isvc_memcpy_a9q;
|
||||
extern FT_MEMCPY ih264_memcpy_mul_8_a9q;
|
||||
extern FT_MEMSET ih264_memset_a9q;
|
||||
extern FT_MEMSET ih264_memset_mul_8_a9q;
|
||||
extern FT_MEMSET_16BIT ih264_memset_16bit_a9q;
|
||||
extern FT_MEMSET_16BIT ih264_memset_16bit_mul_8_a9q;
|
||||
|
||||
/* AV8 function declarations */
|
||||
extern FT_MEMCPY ih264_memcpy_av8;
|
||||
extern FT_MEMCPY ih264_memcpy_mul_8_av8;
|
||||
extern FT_MEMSET ih264_memset_av8;
|
||||
extern FT_MEMSET ih264_memset_mul_8_av8;
|
||||
extern FT_MEMSET_16BIT ih264_memset_16bit_av8;
|
||||
extern FT_MEMSET_16BIT ih264_memset_16bit_mul_8_av8;
|
||||
|
||||
/* NEON function declarations */
|
||||
extern FT_MEMSET_2D isvc_memset_2d_neon;
|
||||
|
||||
/* SSSE3 variants */
|
||||
extern FT_MEMCPY ih264_memcpy_mul_8_ssse3;
|
||||
extern FT_MEMSET ih264_memset_mul_8_ssse3;
|
||||
extern FT_MEMSET_16BIT ih264_memset_16bit_mul_8_ssse3;
|
||||
extern FT_COPY_2D isvc_copy_2d_ssse3;
|
||||
|
||||
/* SSE4.2 variants */
|
||||
extern FT_MEMSET_2D isvc_memset_2d_sse42;
|
||||
|
||||
#endif
|
||||
840
common/svc/isvc_resi_trans_quant.c
Normal file
840
common/svc/isvc_resi_trans_quant.c
Normal file
|
|
@ -0,0 +1,840 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
/**
|
||||
*******************************************************************************
|
||||
* @file
|
||||
* ih264_resi_trans_quant.c
|
||||
*
|
||||
* @brief
|
||||
* Contains function definitions single stage forward transform for H.264
|
||||
* It will calculate the residue, do the cf and then do quantization
|
||||
*
|
||||
* @author
|
||||
* Ittiam
|
||||
*
|
||||
* @par List of Functions:
|
||||
* - ih264_resi_trans_quant_4x4()
|
||||
* - ih264_resi_trans_quant_chroma_4x4
|
||||
* - ih264_hadamard_quant_4x4
|
||||
* - ih264_hadamard_quant_2x2_uv
|
||||
* - ih264_resi_trans_quant_8x8
|
||||
*
|
||||
* @remarks
|
||||
*******************************************************************************
|
||||
*/
|
||||
/* System include files */
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
|
||||
/* User include files */
|
||||
#include "ih264_typedefs.h"
|
||||
#include "ih264_defs.h"
|
||||
#include "ih264_size_defs.h"
|
||||
#include "ih264_macros.h"
|
||||
#include "ih264_trans_macros.h"
|
||||
#include "ih264_trans_data.h"
|
||||
#include "ih264_structs.h"
|
||||
#include "isvc_trans_quant_itrans_iquant.h"
|
||||
|
||||
static FORCEINLINE WORD16 isvc_subtract_upsampled_res(WORD16 i2_residue, WORD16 i2_upsampled_res)
|
||||
{
|
||||
return (CLIP3(-((WORD16) UINT8_MAX), ((WORD16) UINT8_MAX), i2_residue - i2_upsampled_res));
|
||||
}
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* This function performs forward transform and quantization on a 4*4 block
|
||||
*
|
||||
* @par Description:
|
||||
* The function accepts source buffer and estimation buffer. From these, it
|
||||
* computes the residue. This is residue is then transformed and quantized.
|
||||
* The transform and quantization are in placed computed. They use the residue
|
||||
* buffer for this.
|
||||
*
|
||||
* @param[in] pu1_src
|
||||
* Pointer to source sub-block
|
||||
*
|
||||
* @param[in] pu1_pred
|
||||
* Pointer to prediction sub-block
|
||||
*
|
||||
* @param[in] pi2_out
|
||||
* Pointer to residual sub-block
|
||||
*
|
||||
* @param[in] i4_src_stride
|
||||
* Source stride
|
||||
*
|
||||
* @param[in] i4_pred_stride
|
||||
* Prediction stride
|
||||
*
|
||||
* @param[in] dst_strd
|
||||
* Destination stride
|
||||
*
|
||||
* @param[in] u4_qbits
|
||||
* QP_BITS_h264_4x4 + floor(QP/6)
|
||||
*
|
||||
* @param[in] pu2_threshold_matrix
|
||||
* Pointer to Forward Quant Threshold Matrix
|
||||
*
|
||||
* @param[in] pu2_scale_matrix
|
||||
* Pointer to Forward Quant Scale Matrix
|
||||
*
|
||||
* @param[in] u4_round_factor
|
||||
* Quantization Round factor
|
||||
*
|
||||
* @param[out] pu1_nnz
|
||||
* Total non-zero coefficients in the current sub-block
|
||||
*
|
||||
* @returns
|
||||
*
|
||||
* @remarks
|
||||
* None
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
void isvc_resi_trans_quant_4x4(buffer_container_t *ps_src, buffer_container_t *ps_pred,
|
||||
buffer_container_t *ps_out, buffer_container_t *ps_upsampled_res,
|
||||
resi_trans_quant_constants_t *ps_quant_constants, UWORD8 *pu1_nnz,
|
||||
WORD16 *pi2_dc_out, UWORD8 u1_use_upsampled_res)
|
||||
{
|
||||
UWORD32 i;
|
||||
WORD32 x0, x1, x2, x3, x4, x5, x6, x7;
|
||||
WORD32 i4_value;
|
||||
|
||||
UWORD8 *pu1_src = ps_src->pv_data;
|
||||
UWORD8 *pu1_pred = ps_pred->pv_data;
|
||||
WORD16 *pi2_out = ps_out->pv_data;
|
||||
WORD16 *pi2_upsampled_res = ps_upsampled_res ? ps_upsampled_res->pv_data : NULL;
|
||||
WORD32 i4_src_stride = ps_src->i4_data_stride;
|
||||
WORD32 i4_pred_stride = ps_pred->i4_data_stride;
|
||||
WORD32 i4_upsampled_res_stride = ps_upsampled_res ? ps_upsampled_res->i4_data_stride : 0;
|
||||
WORD16 *pi2_out_tmp = pi2_out;
|
||||
UWORD32 u4_nonzero_coeff = 0;
|
||||
const UWORD16 *pu2_scale_matrix = ps_quant_constants->pu2_scale_matrix;
|
||||
const UWORD16 *pu2_threshold_matrix = ps_quant_constants->pu2_threshold_matrix;
|
||||
UWORD32 u4_qbits = ps_quant_constants->u4_qbits;
|
||||
UWORD32 u4_round_factor = ps_quant_constants->u4_round_factor;
|
||||
|
||||
for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
|
||||
{
|
||||
/* computing prediction error (residue) */
|
||||
x4 = pu1_src[0] - pu1_pred[0];
|
||||
x5 = pu1_src[1] - pu1_pred[1];
|
||||
x6 = pu1_src[2] - pu1_pred[2];
|
||||
x7 = pu1_src[3] - pu1_pred[3];
|
||||
|
||||
if(u1_use_upsampled_res)
|
||||
{
|
||||
x4 = isvc_subtract_upsampled_res(x4, pi2_upsampled_res[0]);
|
||||
x5 = isvc_subtract_upsampled_res(x5, pi2_upsampled_res[1]);
|
||||
x6 = isvc_subtract_upsampled_res(x6, pi2_upsampled_res[2]);
|
||||
x7 = isvc_subtract_upsampled_res(x7, pi2_upsampled_res[3]);
|
||||
}
|
||||
|
||||
/* Horizontal transform */
|
||||
x0 = x4 + x7;
|
||||
x1 = x5 + x6;
|
||||
x2 = x5 - x6;
|
||||
x3 = x4 - x7;
|
||||
|
||||
pi2_out_tmp[0] = x0 + x1;
|
||||
pi2_out_tmp[1] = (x3 << 1) + x2;
|
||||
pi2_out_tmp[2] = x0 - x1;
|
||||
pi2_out_tmp[3] = x3 - (x2 << 1);
|
||||
|
||||
/* pointing to next row; */
|
||||
pu1_src += i4_src_stride;
|
||||
pu1_pred += i4_pred_stride;
|
||||
pi2_out_tmp += 4;
|
||||
pi2_upsampled_res += i4_upsampled_res_stride;
|
||||
}
|
||||
|
||||
pi2_out_tmp = pi2_out;
|
||||
|
||||
for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
|
||||
{
|
||||
/* Vertical transform and quantization */
|
||||
x4 = pi2_out_tmp[0];
|
||||
x5 = pi2_out_tmp[4];
|
||||
x6 = pi2_out_tmp[8];
|
||||
x7 = pi2_out_tmp[12];
|
||||
|
||||
x0 = x4 + x7;
|
||||
x1 = x5 + x6;
|
||||
x2 = x5 - x6;
|
||||
x3 = x4 - x7;
|
||||
|
||||
/* quantization is done in place */
|
||||
|
||||
i4_value = x0 + x1;
|
||||
|
||||
if(i == 0)
|
||||
{
|
||||
(*pi2_dc_out) = i4_value;
|
||||
}
|
||||
|
||||
FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits,
|
||||
u4_nonzero_coeff);
|
||||
pi2_out_tmp[0] = i4_value;
|
||||
|
||||
i4_value = (x3 << 1) + x2;
|
||||
FWD_QUANT(i4_value, pu2_threshold_matrix[4], pu2_scale_matrix[4], u4_round_factor, u4_qbits,
|
||||
u4_nonzero_coeff);
|
||||
pi2_out_tmp[4] = i4_value;
|
||||
|
||||
i4_value = x0 - x1;
|
||||
FWD_QUANT(i4_value, pu2_threshold_matrix[8], pu2_scale_matrix[8], u4_round_factor, u4_qbits,
|
||||
u4_nonzero_coeff);
|
||||
pi2_out_tmp[8] = i4_value;
|
||||
|
||||
i4_value = x3 - (x2 << 1);
|
||||
FWD_QUANT(i4_value, pu2_threshold_matrix[12], pu2_scale_matrix[12], u4_round_factor,
|
||||
u4_qbits, u4_nonzero_coeff);
|
||||
pi2_out_tmp[12] = i4_value;
|
||||
|
||||
pi2_out_tmp++;
|
||||
pu2_scale_matrix++;
|
||||
pu2_threshold_matrix++;
|
||||
}
|
||||
|
||||
/* Return total nonzero coefficients in the current sub block */
|
||||
*pu1_nnz = u4_nonzero_coeff;
|
||||
}
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* This function performs forward transform and quantization on a 4*4 chroma
|
||||
*block with interleaved values
|
||||
*
|
||||
* @par Description:
|
||||
* The function accepts source buffer and estimation buffer. From these, it
|
||||
* computes the residue. This is residue is then transformed and quantized.
|
||||
* The transform and quantization are in placed computed. They use the residue
|
||||
* buffer for this.
|
||||
*
|
||||
* @param[in] pu1_src
|
||||
* Pointer to source sub-block
|
||||
*
|
||||
* @param[in] pu1_pred
|
||||
* Pointer to prediction sub-block
|
||||
*
|
||||
* @param[in] pi2_out
|
||||
* Pointer to residual sub-block
|
||||
*
|
||||
* @param[in] i4_src_stride
|
||||
* Source stride
|
||||
*
|
||||
* @param[in] i4_pred_stride
|
||||
* Prediction stride
|
||||
*
|
||||
* @param[in] dst_strd
|
||||
* Destination stride
|
||||
*
|
||||
* @param[in] u4_qbits
|
||||
* QP_BITS_h264_4x4 + floor(QP/6)
|
||||
*
|
||||
* @param[in] pu2_threshold_matrix
|
||||
* Pointer to Forward Quant Threshold Matrix
|
||||
*
|
||||
* @param[in] pu2_scale_matrix
|
||||
* Pointer to Forward Quant Scale Matrix
|
||||
*
|
||||
* @param[in] u4_round_factor
|
||||
* Quantization Round factor
|
||||
*
|
||||
* @param[out] pu1_nnz
|
||||
* Total non-zero coefficients in the current sub-block
|
||||
*
|
||||
* @returns
|
||||
*
|
||||
* @remarks
|
||||
* None
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
void isvc_resi_trans_quant_chroma_4x4(buffer_container_t *ps_src, buffer_container_t *ps_pred,
|
||||
buffer_container_t *ps_out,
|
||||
buffer_container_t *ps_upsampled_res,
|
||||
resi_trans_quant_constants_t *ps_quant_constants,
|
||||
UWORD8 *pu1_nnz, WORD16 *pi2_dc_out,
|
||||
UWORD8 u1_use_upsampled_res)
|
||||
{
|
||||
UWORD32 i;
|
||||
WORD32 x0, x1, x2, x3, x4, x5, x6, x7;
|
||||
WORD32 i4_value;
|
||||
|
||||
UWORD8 *pu1_src = ps_src->pv_data;
|
||||
UWORD8 *pu1_pred = ps_pred->pv_data;
|
||||
WORD16 *pi2_out = ps_out->pv_data;
|
||||
WORD16 *pi2_upsampled_res = ps_upsampled_res ? ps_upsampled_res->pv_data : NULL;
|
||||
WORD32 i4_src_stride = ps_src->i4_data_stride;
|
||||
WORD32 i4_pred_stride = ps_pred->i4_data_stride;
|
||||
WORD32 i4_upsampled_res_stride = ps_upsampled_res ? ps_upsampled_res->i4_data_stride : 0;
|
||||
WORD16 *pi2_out_tmp = pi2_out;
|
||||
UWORD32 u4_nonzero_coeff = 0;
|
||||
const UWORD16 *pu2_scale_matrix = ps_quant_constants->pu2_scale_matrix;
|
||||
const UWORD16 *pu2_threshold_matrix = ps_quant_constants->pu2_threshold_matrix;
|
||||
UWORD32 u4_qbits = ps_quant_constants->u4_qbits;
|
||||
UWORD32 u4_round_factor = ps_quant_constants->u4_round_factor;
|
||||
|
||||
for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
|
||||
{
|
||||
/* computing prediction error (residue) */
|
||||
x4 = pu1_src[0] - pu1_pred[0];
|
||||
x5 = pu1_src[2] - pu1_pred[2];
|
||||
x6 = pu1_src[4] - pu1_pred[4];
|
||||
x7 = pu1_src[6] - pu1_pred[6];
|
||||
|
||||
if(u1_use_upsampled_res)
|
||||
{
|
||||
x4 = isvc_subtract_upsampled_res(x4, pi2_upsampled_res[0]);
|
||||
x5 = isvc_subtract_upsampled_res(x5, pi2_upsampled_res[1]);
|
||||
x6 = isvc_subtract_upsampled_res(x6, pi2_upsampled_res[2]);
|
||||
x7 = isvc_subtract_upsampled_res(x7, pi2_upsampled_res[3]);
|
||||
}
|
||||
|
||||
/* Horizontal transform */
|
||||
x0 = x4 + x7;
|
||||
x1 = x5 + x6;
|
||||
x2 = x5 - x6;
|
||||
x3 = x4 - x7;
|
||||
|
||||
pi2_out_tmp[0] = x0 + x1;
|
||||
pi2_out_tmp[1] = (x3 << 1) + x2;
|
||||
pi2_out_tmp[2] = x0 - x1;
|
||||
pi2_out_tmp[3] = x3 - (x2 << 1);
|
||||
|
||||
/* pointing to next row; */
|
||||
pu1_src += i4_src_stride;
|
||||
pu1_pred += i4_pred_stride;
|
||||
pi2_out_tmp += 4;
|
||||
pi2_upsampled_res += i4_upsampled_res_stride;
|
||||
}
|
||||
pi2_out_tmp = pi2_out;
|
||||
for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
|
||||
{
|
||||
/* Vertical transform and quantization */
|
||||
x4 = pi2_out_tmp[0];
|
||||
x5 = pi2_out_tmp[4];
|
||||
x6 = pi2_out_tmp[8];
|
||||
x7 = pi2_out_tmp[12];
|
||||
|
||||
x0 = x4 + x7;
|
||||
x1 = x5 + x6;
|
||||
x2 = x5 - x6;
|
||||
x3 = x4 - x7;
|
||||
|
||||
/* quantization is done in place */
|
||||
|
||||
i4_value = x0 + x1;
|
||||
|
||||
if(i == 0)
|
||||
{
|
||||
*pi2_dc_out = i4_value;
|
||||
}
|
||||
|
||||
FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits,
|
||||
u4_nonzero_coeff);
|
||||
pi2_out_tmp[0] = i4_value;
|
||||
|
||||
i4_value = (x3 << 1) + x2;
|
||||
FWD_QUANT(i4_value, pu2_threshold_matrix[4], pu2_scale_matrix[4], u4_round_factor, u4_qbits,
|
||||
u4_nonzero_coeff);
|
||||
pi2_out_tmp[4] = i4_value;
|
||||
|
||||
i4_value = x0 - x1;
|
||||
FWD_QUANT(i4_value, pu2_threshold_matrix[8], pu2_scale_matrix[8], u4_round_factor, u4_qbits,
|
||||
u4_nonzero_coeff);
|
||||
pi2_out_tmp[8] = i4_value;
|
||||
|
||||
i4_value = x3 - (x2 << 1);
|
||||
FWD_QUANT(i4_value, pu2_threshold_matrix[12], pu2_scale_matrix[12], u4_round_factor,
|
||||
u4_qbits, u4_nonzero_coeff);
|
||||
pi2_out_tmp[12] = i4_value;
|
||||
|
||||
pi2_out_tmp++;
|
||||
pu2_scale_matrix++;
|
||||
pu2_threshold_matrix++;
|
||||
}
|
||||
|
||||
/* Return total nonzero coefficients in the current sub block */
|
||||
*pu1_nnz = u4_nonzero_coeff;
|
||||
}
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* This function performs forward hadamard transform and quantization on a 4*4
|
||||
*block
|
||||
*
|
||||
* @par Description:
|
||||
* The function accepts source buffer and estimation buffer. From these, it
|
||||
* computes the residue. This is residue is then transformed and quantized.
|
||||
* The transform and quantization are in placed computed. They use the residue
|
||||
* buffer for this.
|
||||
*
|
||||
* @param[in] pu1_src
|
||||
* Pointer to source sub-block
|
||||
*
|
||||
* @param[in] pu1_pred
|
||||
* Pointer to prediction sub-block
|
||||
*
|
||||
* @param[in] pi2_out
|
||||
* Pointer to residual sub-block
|
||||
*
|
||||
* @param[in] i4_src_stride
|
||||
* Source stride
|
||||
*
|
||||
* @param[in] i4_pred_stride
|
||||
* Prediction stride
|
||||
*
|
||||
* @param[in] dst_strd
|
||||
* Destination stride
|
||||
*
|
||||
* @param[in] u4_qbits
|
||||
* QP_BITS_h264_4x4 + floor(QP/6)
|
||||
*
|
||||
* @param[in] pu2_threshold_matrix
|
||||
* Pointer to Forward Quant Threshold Matrix
|
||||
*
|
||||
* @param[in] pu2_scale_matrix
|
||||
* Pointer to Forward Quant Scale Matrix
|
||||
*
|
||||
* @param[in] u4_round_factor
|
||||
* Quantization Round factor
|
||||
*
|
||||
* @param[out] pu1_nnz
|
||||
* Total non-zero coefficients in the current sub-block
|
||||
*
|
||||
* @returns
|
||||
*
|
||||
* @remarks
|
||||
* None
|
||||
*
|
||||
*/
|
||||
|
||||
void isvc_hadamard_quant_4x4(WORD16 *pi2_src, WORD16 *pi2_dst,
|
||||
resi_trans_quant_constants_t *ps_quant_constants, UWORD8 *pu1_nnz)
|
||||
{
|
||||
WORD32 i;
|
||||
WORD32 x0, x1, x2, x3, x4, x5, x6, x7, i4_value;
|
||||
|
||||
const UWORD16 *pu2_scale_matrix = ps_quant_constants->pu2_scale_matrix;
|
||||
const UWORD16 *pu2_threshold_matrix = ps_quant_constants->pu2_threshold_matrix;
|
||||
UWORD32 u4_qbits = ps_quant_constants->u4_qbits;
|
||||
UWORD32 u4_round_factor = ps_quant_constants->u4_round_factor;
|
||||
|
||||
*pu1_nnz = 0;
|
||||
|
||||
for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
|
||||
{
|
||||
x4 = pi2_src[0];
|
||||
x5 = pi2_src[1];
|
||||
x6 = pi2_src[2];
|
||||
x7 = pi2_src[3];
|
||||
|
||||
x0 = x4 + x7;
|
||||
x1 = x5 + x6;
|
||||
x2 = x5 - x6;
|
||||
x3 = x4 - x7;
|
||||
|
||||
pi2_dst[0] = x0 + x1;
|
||||
pi2_dst[1] = x3 + x2;
|
||||
pi2_dst[2] = x0 - x1;
|
||||
pi2_dst[3] = x3 - x2;
|
||||
|
||||
pi2_src += 4;
|
||||
pi2_dst += 4;
|
||||
}
|
||||
|
||||
/* Vertical transform and quantization */
|
||||
pi2_dst -= SUB_BLK_WIDTH_4x4 << 2;
|
||||
|
||||
for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
|
||||
{
|
||||
x4 = pi2_dst[0];
|
||||
x5 = pi2_dst[4];
|
||||
x6 = pi2_dst[8];
|
||||
x7 = pi2_dst[12];
|
||||
|
||||
x0 = x4 + x7;
|
||||
x1 = x5 + x6;
|
||||
x2 = x5 - x6;
|
||||
x3 = x4 - x7;
|
||||
|
||||
i4_value = (x0 + x1) >> 1;
|
||||
FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits,
|
||||
pu1_nnz[0]);
|
||||
pi2_dst[0] = i4_value;
|
||||
|
||||
i4_value = (x3 + x2) >> 1;
|
||||
FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits,
|
||||
pu1_nnz[0]);
|
||||
pi2_dst[4] = i4_value;
|
||||
|
||||
i4_value = (x0 - x1) >> 1;
|
||||
FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits,
|
||||
pu1_nnz[0]);
|
||||
pi2_dst[8] = i4_value;
|
||||
|
||||
i4_value = (x3 - x2) >> 1;
|
||||
FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits,
|
||||
pu1_nnz[0]);
|
||||
pi2_dst[12] = i4_value;
|
||||
|
||||
pi2_dst++;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* This function performs forward hadamard transform and quantization on a 2*2
|
||||
*block for both U and V planes
|
||||
*
|
||||
* @par Description:
|
||||
* The function accepts source buffer and estimation buffer. From these, it
|
||||
* computes the residue. This is residue is then transformed and quantized.
|
||||
* The transform and quantization are in placed computed. They use the residue
|
||||
* buffer for this.
|
||||
*
|
||||
* @param[in] pu1_src
|
||||
* Pointer to source sub-block
|
||||
*
|
||||
* @param[in] pu1_pred
|
||||
* Pointer to prediction sub-block
|
||||
*
|
||||
* @param[in] pi2_out
|
||||
* Pointer to residual sub-block
|
||||
*
|
||||
* @param[in] i4_src_stride
|
||||
* Source stride
|
||||
*
|
||||
* @param[in] i4_pred_stride
|
||||
* Prediction stride
|
||||
*
|
||||
* @param[in] dst_strd
|
||||
* Destination stride
|
||||
*
|
||||
* @param[in] u4_qbits
|
||||
* QP_BITS_h264_4x4 + floor(QP/6)
|
||||
*
|
||||
* @param[in] pu2_threshold_matrix
|
||||
* Pointer to Forward Quant Threshold Matrix
|
||||
*
|
||||
* @param[in] pu2_scale_matrix
|
||||
* Pointer to Forward Quant Scale Matrix
|
||||
*
|
||||
* @param[in] u4_round_factor
|
||||
* Quantization Round factor
|
||||
*
|
||||
* @param[out] pu1_nnz
|
||||
* Total non-zero coefficients in the current sub-block
|
||||
*
|
||||
* @returns
|
||||
*
|
||||
* @remarks
|
||||
* NNZ for dc is populated at 0 and 5th position of pu1_nnz
|
||||
*
|
||||
*/
|
||||
|
||||
void isvc_hadamard_quant_2x2_uv(WORD16 *pi2_src, WORD16 *pi2_dst,
|
||||
resi_trans_quant_constants_t *ps_quant_constants, UWORD8 *pu1_nnz)
|
||||
{
|
||||
WORD32 x0, x1, x2, x3, x4, x5, x6, x7;
|
||||
WORD32 i4_value, plane;
|
||||
|
||||
const UWORD16 *pu2_scale_matrix = ps_quant_constants->pu2_scale_matrix;
|
||||
const UWORD16 *pu2_threshold_matrix = ps_quant_constants->pu2_threshold_matrix;
|
||||
UWORD32 u4_qbits = ps_quant_constants->u4_qbits;
|
||||
UWORD32 u4_round_factor = ps_quant_constants->u4_round_factor;
|
||||
|
||||
for(plane = 0; plane < 2; plane++)
|
||||
{
|
||||
pu1_nnz[plane] = 0;
|
||||
|
||||
/* Horizontal transform */
|
||||
x4 = pi2_src[0];
|
||||
x5 = pi2_src[1];
|
||||
x6 = pi2_src[2];
|
||||
x7 = pi2_src[3];
|
||||
|
||||
x0 = x4 + x5;
|
||||
x1 = x4 - x5;
|
||||
x2 = x6 + x7;
|
||||
x3 = x6 - x7;
|
||||
|
||||
/* Vertical transform and quantization */
|
||||
i4_value = (x0 + x2);
|
||||
FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits,
|
||||
pu1_nnz[plane]);
|
||||
pi2_dst[0] = i4_value;
|
||||
|
||||
i4_value = (x0 - x2);
|
||||
FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits,
|
||||
pu1_nnz[plane]);
|
||||
pi2_dst[2] = i4_value;
|
||||
|
||||
i4_value = (x1 - x3);
|
||||
FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits,
|
||||
pu1_nnz[plane]);
|
||||
pi2_dst[3] = i4_value;
|
||||
|
||||
i4_value = (x1 + x3);
|
||||
FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits,
|
||||
pu1_nnz[plane]);
|
||||
pi2_dst[1] = i4_value;
|
||||
|
||||
pi2_dst += 4;
|
||||
pi2_src += 4;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* This function performs Single stage forward transform CF8 and quantization
|
||||
*on 8*8 blocks for h.264
|
||||
*
|
||||
* @par Description:
|
||||
* Performs single stage 8x8 forward transform CF8 after calculating the
|
||||
*residue The result is then quantized
|
||||
*
|
||||
* @param[in] pu1_src
|
||||
* Input 8x8 pixels
|
||||
*
|
||||
* @param[in] pu1_pred
|
||||
* Input 8x8 pixels
|
||||
*
|
||||
* @param[in] pi1_out
|
||||
* Output 8x8 pixels
|
||||
*
|
||||
* @param[in] u4_thresh
|
||||
* Threshold under which the coeffs are not quantized
|
||||
*
|
||||
* @param[in] u4_qp_div
|
||||
* QP/6
|
||||
*
|
||||
* @param[in] u4_qp_rem
|
||||
* QP%6
|
||||
*
|
||||
* @param[in] u2_src_stride
|
||||
* Source stride
|
||||
*
|
||||
* @param[in] i4_pred_stride
|
||||
* stride for prediciton buffer
|
||||
*
|
||||
* @param[in] dst_strd
|
||||
* stride for destination buffer
|
||||
*
|
||||
* @param[in] pu4_quant_mat
|
||||
* Pointer to the 4x4 quantization matrix
|
||||
*
|
||||
* @returns Void
|
||||
*
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
void isvc_resi_trans_quant_8x8(buffer_container_t *ps_src, buffer_container_t *ps_pred,
|
||||
buffer_container_t *ps_out, buffer_container_t *ps_upsampled_res,
|
||||
resi_trans_quant_constants_t *ps_quant_constants, UWORD8 *pu1_nnz,
|
||||
WORD16 *pi2_dc_out, UWORD8 u1_use_upsampled_res)
|
||||
{
|
||||
UWORD32 i;
|
||||
WORD32 a0, a1, a2, a3, a4, a5, a6, a7;
|
||||
WORD32 r0, r1, r2, r3, r4, r5, r6, r7;
|
||||
|
||||
UWORD8 *pu1_src = ps_src->pv_data;
|
||||
UWORD8 *pu1_pred = ps_pred->pv_data;
|
||||
WORD16 *pi2_out = ps_out->pv_data;
|
||||
WORD16 *pi2_upsampled_res = ps_upsampled_res ? ps_upsampled_res->pv_data : NULL;
|
||||
WORD32 i4_src_stride = ps_src->i4_data_stride;
|
||||
WORD32 i4_pred_stride = ps_pred->i4_data_stride;
|
||||
WORD32 i4_upsampled_res_stride = ps_upsampled_res ? ps_upsampled_res->i4_data_stride : 0;
|
||||
WORD16 *pi2_out_tmp = pi2_out;
|
||||
UWORD32 u4_nonzero_coeff = 0;
|
||||
const UWORD16 *pu2_scale_matrix = ps_quant_constants->pu2_scale_matrix;
|
||||
const UWORD16 *pu2_threshold_matrix = ps_quant_constants->pu2_threshold_matrix;
|
||||
UWORD32 u4_qbits = ps_quant_constants->u4_qbits;
|
||||
UWORD32 u4_round_factor = ps_quant_constants->u4_round_factor;
|
||||
|
||||
UNUSED(pi2_dc_out);
|
||||
|
||||
/*Horizontal transform */
|
||||
/* we are going to use the a's and r's in a twisted way since */
|
||||
/*i dont want to declare more variables */
|
||||
for(i = 0; i < SUB_BLK_WIDTH_8x8; ++i)
|
||||
{
|
||||
r0 = pu1_src[0];
|
||||
r0 -= pu1_pred[0];
|
||||
r1 = pu1_src[1];
|
||||
r1 -= pu1_pred[1];
|
||||
r2 = pu1_src[2];
|
||||
r2 -= pu1_pred[2];
|
||||
r3 = pu1_src[3];
|
||||
r3 -= pu1_pred[3];
|
||||
r4 = pu1_src[4];
|
||||
r4 -= pu1_pred[4];
|
||||
r5 = pu1_src[5];
|
||||
r5 -= pu1_pred[5];
|
||||
r6 = pu1_src[6];
|
||||
r6 -= pu1_pred[6];
|
||||
r7 = pu1_src[7];
|
||||
r7 -= pu1_pred[7];
|
||||
|
||||
if(u1_use_upsampled_res)
|
||||
{
|
||||
r0 = isvc_subtract_upsampled_res(r0, pi2_upsampled_res[0]);
|
||||
r1 = isvc_subtract_upsampled_res(r1, pi2_upsampled_res[1]);
|
||||
r2 = isvc_subtract_upsampled_res(r2, pi2_upsampled_res[2]);
|
||||
r3 = isvc_subtract_upsampled_res(r3, pi2_upsampled_res[3]);
|
||||
r4 = isvc_subtract_upsampled_res(r4, pi2_upsampled_res[4]);
|
||||
r5 = isvc_subtract_upsampled_res(r5, pi2_upsampled_res[5]);
|
||||
r6 = isvc_subtract_upsampled_res(r6, pi2_upsampled_res[6]);
|
||||
r7 = isvc_subtract_upsampled_res(r7, pi2_upsampled_res[7]);
|
||||
}
|
||||
|
||||
a0 = r0 + r7;
|
||||
a1 = r1 + r6;
|
||||
a2 = r2 + r5;
|
||||
a3 = r3 + r4;
|
||||
|
||||
a4 = a0 + a3;
|
||||
a5 = a1 + a2;
|
||||
a6 = a0 - a3;
|
||||
a7 = a1 - a2;
|
||||
|
||||
pi2_out_tmp[0] = a4 + a5;
|
||||
|
||||
pi2_out_tmp[2] = a6 + (a7 >> 1);
|
||||
pi2_out_tmp[4] = a4 - a5;
|
||||
pi2_out_tmp[6] = (a6 >> 1) - a7;
|
||||
|
||||
a0 = r0 - r7;
|
||||
a1 = r1 - r6;
|
||||
a2 = r2 - r5;
|
||||
a3 = r3 - r4;
|
||||
|
||||
a4 = a1 + a2 + ((a0 >> 1) + a0);
|
||||
a5 = a0 - a3 - ((a2 >> 1) + a2);
|
||||
a6 = a0 + a3 - ((a1 >> 1) + a1);
|
||||
a7 = a1 - a2 + ((a3 >> 1) + a3);
|
||||
|
||||
pi2_out_tmp[1] = a4 + (a7 >> 2);
|
||||
pi2_out_tmp[3] = a5 + (a6 >> 2);
|
||||
pi2_out_tmp[5] = a6 - (a5 >> 2);
|
||||
pi2_out_tmp[7] = (a4 >> 2) - a7;
|
||||
|
||||
pu1_src += i4_src_stride;
|
||||
pu1_pred += i4_pred_stride;
|
||||
pi2_out_tmp += 8;
|
||||
pi2_upsampled_res += i4_upsampled_res_stride;
|
||||
}
|
||||
|
||||
/*vertical transform and quant */
|
||||
|
||||
pi2_out_tmp = pi2_out;
|
||||
|
||||
for(i = 0; i < SUB_BLK_WIDTH_8x8; ++i)
|
||||
{
|
||||
r0 = pi2_out_tmp[0];
|
||||
r1 = pi2_out_tmp[8];
|
||||
r2 = pi2_out_tmp[16];
|
||||
r3 = pi2_out_tmp[24];
|
||||
r4 = pi2_out_tmp[32];
|
||||
r5 = pi2_out_tmp[40];
|
||||
r6 = pi2_out_tmp[48];
|
||||
r7 = pi2_out_tmp[56];
|
||||
|
||||
a0 = r0 + r7;
|
||||
a1 = r1 + r6;
|
||||
a2 = r2 + r5;
|
||||
a3 = r3 + r4;
|
||||
|
||||
a4 = a0 + a3;
|
||||
a5 = a1 + a2;
|
||||
a6 = a0 - a3;
|
||||
a7 = a1 - a2;
|
||||
|
||||
a0 = r0 - r7;
|
||||
a1 = r1 - r6;
|
||||
a2 = r2 - r5;
|
||||
a3 = r3 - r4;
|
||||
|
||||
r0 = a4 + a5;
|
||||
r2 = a6 + (a7 >> 1);
|
||||
r4 = a4 - a5;
|
||||
r6 = (a6 >> 1) - a7;
|
||||
|
||||
a4 = a1 + a2 + ((a0 >> 1) + a0);
|
||||
a5 = a0 - a3 - ((a2 >> 1) + a2);
|
||||
a6 = a0 + a3 - ((a1 >> 1) + a1);
|
||||
a7 = a1 - a2 + ((a3 >> 1) + a3);
|
||||
|
||||
r1 = a4 + (a7 >> 2);
|
||||
r3 = a5 + (a6 >> 2);
|
||||
r5 = a6 - (a5 >> 2);
|
||||
r7 = (a4 >> 2) - a7;
|
||||
|
||||
FWD_QUANT(r0, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits,
|
||||
u4_nonzero_coeff);
|
||||
pi2_out_tmp[0] = r0;
|
||||
|
||||
FWD_QUANT(r1, pu2_threshold_matrix[8], pu2_scale_matrix[8], u4_round_factor, u4_qbits,
|
||||
u4_nonzero_coeff);
|
||||
pi2_out_tmp[8] = r1;
|
||||
|
||||
FWD_QUANT(r2, pu2_threshold_matrix[16], pu2_scale_matrix[16], u4_round_factor, u4_qbits,
|
||||
u4_nonzero_coeff);
|
||||
pi2_out_tmp[16] = r2;
|
||||
|
||||
FWD_QUANT(r3, pu2_threshold_matrix[24], pu2_scale_matrix[24], u4_round_factor, u4_qbits,
|
||||
u4_nonzero_coeff);
|
||||
pi2_out_tmp[24] = r3;
|
||||
|
||||
FWD_QUANT(r4, pu2_threshold_matrix[32], pu2_scale_matrix[32], u4_round_factor, u4_qbits,
|
||||
u4_nonzero_coeff);
|
||||
pi2_out_tmp[32] = r4;
|
||||
|
||||
FWD_QUANT(r5, pu2_threshold_matrix[40], pu2_scale_matrix[40], u4_round_factor, u4_qbits,
|
||||
u4_nonzero_coeff);
|
||||
pi2_out_tmp[40] = r5;
|
||||
|
||||
FWD_QUANT(r6, pu2_threshold_matrix[48], pu2_scale_matrix[48], u4_round_factor, u4_qbits,
|
||||
u4_nonzero_coeff);
|
||||
pi2_out_tmp[48] = r6;
|
||||
|
||||
FWD_QUANT(r7, pu2_threshold_matrix[56], pu2_scale_matrix[56], u4_round_factor, u4_qbits,
|
||||
u4_nonzero_coeff);
|
||||
pi2_out_tmp[56] = r7;
|
||||
|
||||
pi2_out_tmp++;
|
||||
pu2_scale_matrix++;
|
||||
pu2_threshold_matrix++;
|
||||
}
|
||||
/* Return total nonzero coefficients in the current sub block */
|
||||
*pu1_nnz = u4_nonzero_coeff;
|
||||
}
|
||||
335
common/svc/isvc_structs.h
Normal file
335
common/svc/isvc_structs.h
Normal file
|
|
@ -0,0 +1,335 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
* @file
|
||||
* isvc_structs.h
|
||||
*
|
||||
* @brief
|
||||
* Contains struct definition used for SVC
|
||||
*
|
||||
* @author
|
||||
* ittiam
|
||||
*
|
||||
* @remarks
|
||||
* None
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef _ISVC_STRUCTS_H_
|
||||
#define _ISVC_STRUCTS_H_
|
||||
|
||||
#include "ih264_typedefs.h"
|
||||
#include "iv2.h"
|
||||
#include "ih264_defs.h"
|
||||
#include "ih264_structs.h"
|
||||
#include "isvc_defs.h"
|
||||
|
||||
typedef struct buffer_container_t
|
||||
{
|
||||
void *pv_data;
|
||||
|
||||
WORD32 i4_data_stride;
|
||||
|
||||
} buffer_container_t;
|
||||
|
||||
typedef struct yuv_buf_props_t
|
||||
{
|
||||
buffer_container_t as_component_bufs[NUM_COMPONENTS];
|
||||
|
||||
IV_COLOR_FORMAT_T e_color_format;
|
||||
|
||||
UWORD32 u4_width;
|
||||
|
||||
UWORD32 u4_height;
|
||||
|
||||
UWORD8 u1_bit_depth;
|
||||
} yuv_buf_props_t;
|
||||
|
||||
typedef struct nal_unit_header_t
|
||||
{
|
||||
UWORD8 u1_nal_ref_idc;
|
||||
|
||||
UWORD8 u1_nal_unit_type;
|
||||
} nal_unit_header_t;
|
||||
|
||||
typedef struct coordinates_t
|
||||
{
|
||||
WORD32 i4_abscissa;
|
||||
|
||||
WORD32 i4_ordinate;
|
||||
} coordinates_t;
|
||||
|
||||
typedef struct svc_au_buf_t
|
||||
{
|
||||
/* Array of structs that contain properties of the buffers used for storing */
|
||||
yuv_buf_props_t *ps_layer_yuv_buf_props;
|
||||
|
||||
/* Temporal ID */
|
||||
WORD8 i1_temporal_id;
|
||||
|
||||
/* Num Spatial Layers */
|
||||
UWORD8 u1_num_spatial_layers;
|
||||
|
||||
/* Resolution ration b/w spatial layers */
|
||||
DOUBLE d_spatial_res_ratio;
|
||||
|
||||
/* absolute value of POC */
|
||||
WORD32 i4_abs_poc;
|
||||
|
||||
/* POC % MaxPicOrderCntLSB */
|
||||
WORD32 i4_poc_lsb;
|
||||
|
||||
/* Lower 32 bits of time stamp */
|
||||
UWORD32 u4_timestamp_low;
|
||||
|
||||
/* Higher 32 bits of time stamp */
|
||||
UWORD32 u4_timestamp_high;
|
||||
|
||||
/* Is Pic used as refPic for future frames? */
|
||||
WORD32 i4_used_as_ref;
|
||||
|
||||
/* frame_num in the slice header */
|
||||
WORD32 i4_frame_num;
|
||||
|
||||
/*
|
||||
* 0: Top Field
|
||||
* 1: Bottom Field
|
||||
*/
|
||||
WORD8 i1_field_type;
|
||||
|
||||
/* buffer ID from frame buffer manager */
|
||||
WORD32 i4_buf_id;
|
||||
|
||||
} svc_au_buf_t;
|
||||
|
||||
typedef struct svc_nalu_ext_t
|
||||
{
|
||||
nal_unit_header_t s_nalu_header;
|
||||
|
||||
/* idr_flag */
|
||||
UWORD8 u1_idr_flag;
|
||||
|
||||
/* priority_id (Range = [0, 63]) */
|
||||
UWORD8 u1_priority_id;
|
||||
|
||||
/* no_inter_layer_pred_flag */
|
||||
UWORD8 u1_no_inter_layer_pred_flag;
|
||||
|
||||
/* dependency_id (Range = [0, 7]) */
|
||||
UWORD8 u1_dependency_id;
|
||||
|
||||
/* quality_id (Range = [0, 15]) */
|
||||
UWORD8 u1_quality_id;
|
||||
|
||||
/* temporal_id (Range = [0, 7]) */
|
||||
UWORD8 u1_temporal_id;
|
||||
|
||||
/* use_ref_base_pic_flag */
|
||||
UWORD8 u1_use_ref_base_pic_flag;
|
||||
|
||||
/* discardable_flag */
|
||||
UWORD8 u1_discardable_flag;
|
||||
|
||||
/* output_flag */
|
||||
UWORD8 u1_output_flag;
|
||||
|
||||
/* reserved_three_2bits */
|
||||
UWORD8 u1_reserved_three_2bits;
|
||||
|
||||
} svc_nalu_ext_t;
|
||||
|
||||
typedef struct svc_vui_ext_t
|
||||
{
|
||||
/* specifies the maximum layers in the SVC bitstream */
|
||||
UWORD32 u4_vui_ext_num_entries_minus1;
|
||||
|
||||
/* specifies the dependency ID for each layer */
|
||||
UWORD8 u1_vui_ext_dependency_id[MAX_VUI_EXT_NUM_ENTRIES];
|
||||
|
||||
/* specifies the quality ID for each layer */
|
||||
UWORD8 u1_vui_ext_quality_id[MAX_VUI_EXT_NUM_ENTRIES];
|
||||
|
||||
/* specifies the temporal ID for each layer */
|
||||
UWORD8 u1_vui_ext_temporal_id[MAX_VUI_EXT_NUM_ENTRIES];
|
||||
|
||||
/* specifies the timing_info_present_flag value of the i-th sub-bitstream */
|
||||
UWORD8 u1_vui_ext_timing_info_present_flag[MAX_VUI_EXT_NUM_ENTRIES];
|
||||
|
||||
/* specifies the num_units_in_tick value of the i-th sub-bitstream */
|
||||
UWORD32 u4_vui_ext_num_units_in_tick[MAX_VUI_EXT_NUM_ENTRIES];
|
||||
|
||||
/* specifies the time_scale value of the i-th sub-bitstream */
|
||||
UWORD32 u4_vui_ext_time_scale[MAX_VUI_EXT_NUM_ENTRIES];
|
||||
|
||||
/* specifies the fixed_frame_rate_flag value of the i-th sub-bitstream */
|
||||
UWORD8 u1_vui_ext_fixed_frame_rate_flag[MAX_VUI_EXT_NUM_ENTRIES];
|
||||
|
||||
/* specifies the nal_hrd_parameters_present_flag value of the i-th */
|
||||
UWORD8 u1_vui_ext_nal_hrd_params_present_flag[MAX_VUI_EXT_NUM_ENTRIES];
|
||||
|
||||
/* specifies the vcl_hrd_parameters_present_flag value of the i-th */
|
||||
UWORD8 u1_vui_ext_vcl_hrd_params_present_flag[MAX_VUI_EXT_NUM_ENTRIES];
|
||||
|
||||
/* specifies the low_delay_hrd_flag value of the i-th sub-bitstream */
|
||||
UWORD8 u1_vui_ext_low_delay_hrd_flag[MAX_VUI_EXT_NUM_ENTRIES];
|
||||
|
||||
/* specifies the pic_struct_present_flag value of the i-th sub-bitstream */
|
||||
UWORD8 u1_vui_ext_pic_struct_present_flag[MAX_VUI_EXT_NUM_ENTRIES];
|
||||
|
||||
} svc_vui_ext_t;
|
||||
|
||||
typedef struct sps_svc_ext_t
|
||||
{
|
||||
/* inter_layer_deblocking_filter_control_present_flag */
|
||||
UWORD8 u1_inter_layer_deblocking_filter_control_present_flag;
|
||||
|
||||
/* extended_spatial_scalability_idc */
|
||||
UWORD8 u1_extended_spatial_scalability_idc;
|
||||
|
||||
/* chroma_phase_x_plus1_flag */
|
||||
UWORD8 u1_chroma_phase_x_plus1;
|
||||
|
||||
/* chroma_phase_y_plus1 */
|
||||
UWORD8 u1_chroma_phase_y_plus1;
|
||||
|
||||
/* seq_ref_layer_chroma_phase_x_plus1_flag */
|
||||
UWORD8 u1_seq_ref_layer_chroma_phase_x_plus1_flag;
|
||||
|
||||
/* seq_ref_layer_chroma_phase_y_plus1 */
|
||||
UWORD8 u1_seq_ref_layer_chroma_phase_y_plus1;
|
||||
|
||||
/* seq_scaled_ref_layer_left_offset */
|
||||
WORD32 i4_seq_scaled_ref_layer_left_offset;
|
||||
|
||||
/* seq_scaled_ref_layer_top_offset */
|
||||
WORD32 i4_seq_scaled_ref_layer_top_offset;
|
||||
|
||||
/* seq_scaled_ref_layer_right_offset */
|
||||
WORD32 i4_seq_scaled_ref_layer_right_offset;
|
||||
|
||||
/* seq_scaled_ref_layer_bottom_offset */
|
||||
WORD32 i4_seq_scaled_ref_layer_bottom_offset;
|
||||
|
||||
/* seq_tcoeff_level_prediction_flag */
|
||||
WORD8 i1_seq_tcoeff_level_prediction_flag;
|
||||
|
||||
/* adaptive_tcoeff_level_prediction_flag */
|
||||
WORD8 i1_adaptive_tcoeff_level_prediction_flag;
|
||||
|
||||
/* slice_header_restriction_flag */
|
||||
WORD8 i1_slice_header_restriction_flag;
|
||||
|
||||
} sps_svc_ext_t;
|
||||
|
||||
typedef struct subset_sps_t
|
||||
{
|
||||
/* SPS structure */
|
||||
sps_t s_sps;
|
||||
|
||||
/* Structure containing flags specific to SVC SPS */
|
||||
sps_svc_ext_t s_sps_svc_ext;
|
||||
|
||||
/* svc_vui_parameters_present_flag */
|
||||
WORD8 i1_svc_vui_parameters_present_flag;
|
||||
|
||||
svc_vui_ext_t s_svc_vui;
|
||||
|
||||
/* additional_extension2_data_flag */
|
||||
WORD8 i1_additional_extension2_flag;
|
||||
|
||||
} subset_sps_t;
|
||||
|
||||
typedef struct svc_slice_header_t
|
||||
{
|
||||
/* ref_layer_dq_id */
|
||||
UWORD32 u4_ref_layer_dq_id;
|
||||
|
||||
/* disable_inter_layer_deblocking_filter_idc */
|
||||
UWORD32 u4_disable_inter_layer_deblocking_filter_idc;
|
||||
|
||||
/* inter_layer_slice_alpha_c0_offset_div2 */
|
||||
WORD32 i4_inter_layer_slice_alpha_c0_offset_div2;
|
||||
|
||||
/* inter_layer_slice_beta_offset_div2 */
|
||||
WORD32 i4_inter_layer_slice_beta_offset_div2;
|
||||
|
||||
/* constrained_intra_resampling_flag */
|
||||
WORD8 i1_constrained_intra_resampling_flag;
|
||||
|
||||
/* ref_layer_chroma_phase_x_plus1_flag */
|
||||
WORD8 i1_ref_layer_chroma_phase_x_plus1_flag;
|
||||
|
||||
/* ref_layer_chroma_phase_y_plus1 */
|
||||
WORD8 i1_ref_layer_chroma_phase_y_plus1;
|
||||
|
||||
/* scaled_ref_layer_left_offset */
|
||||
WORD32 i4_scaled_ref_layer_left;
|
||||
|
||||
/* scaled_ref_layer_top_offset */
|
||||
WORD32 i4_scaled_ref_layer_top;
|
||||
|
||||
/* scaled_ref_layer_right_offset */
|
||||
WORD32 i4_scaled_ref_layer_right;
|
||||
|
||||
/* scaled_ref_layer_bottom_offset */
|
||||
WORD32 i4_scaled_ref_layer_bottom;
|
||||
|
||||
/* slice_skip_flag */
|
||||
WORD8 i1_slice_skip_flag;
|
||||
|
||||
/* num_mbs_in_slice_minus1 */
|
||||
UWORD32 u4_num_mbs_in_slice_minus1;
|
||||
|
||||
/* adaptive_base_mode_flag */
|
||||
WORD8 i1_adaptive_base_mode_flag;
|
||||
|
||||
/* default_base_mode_flag */
|
||||
WORD8 i1_default_base_mode_flag;
|
||||
|
||||
/* adaptive_motion_prediction_flag */
|
||||
WORD8 i1_adaptive_motion_prediction_flag;
|
||||
|
||||
/* default_motion_prediction_flag */
|
||||
WORD8 i1_default_motion_prediction_flag;
|
||||
|
||||
/* adaptive_residual_prediction_flag */
|
||||
WORD8 i1_adaptive_residual_prediction_flag;
|
||||
|
||||
/* default_residual_prediction_flag */
|
||||
WORD8 i1_default_residual_prediction_flag;
|
||||
|
||||
/* tcoeff_level_prediction_flag */
|
||||
WORD8 i1_tcoeff_level_prediction_flag;
|
||||
|
||||
/* scan_idx_start */
|
||||
UWORD32 u4_scan_idx_start;
|
||||
|
||||
/* scan_idx_end */
|
||||
UWORD32 u4_scan_idx_end;
|
||||
|
||||
WORD32 i4_store_ref_base_pic_flag;
|
||||
|
||||
slice_header_t s_slice_header;
|
||||
} svc_slice_header_t;
|
||||
|
||||
#endif
|
||||
253
common/svc/isvc_trans_quant_itrans_iquant.h
Normal file
253
common/svc/isvc_trans_quant_itrans_iquant.h
Normal file
|
|
@ -0,0 +1,253 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
/**
|
||||
*******************************************************************************
|
||||
* @file
|
||||
* isvc_trans_quant.h
|
||||
*
|
||||
* @brief
|
||||
* Contains declarations for forward and inverse transform paths for H264
|
||||
*
|
||||
* @author
|
||||
* Ittiam
|
||||
*
|
||||
* @remarks
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef _ISVC_TRANS_QUANT_ITRANS_IQUANT_H_
|
||||
#define _ISVC_TRANS_QUANT_ITRANS_IQUANT_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "ih264_typedefs.h"
|
||||
#include "ih264_debug.h"
|
||||
#include "ih264_macros.h"
|
||||
#include "isvc_macros.h"
|
||||
#include "isvc_structs.h"
|
||||
|
||||
/* With and without residual_pred use */
|
||||
#define NUM_RESI_TRANS_QUANT_VARIANTS 2
|
||||
|
||||
#define NUM_IQ_IT_RECON_VARIANTS 3
|
||||
|
||||
/* Structs */
|
||||
typedef struct resi_trans_quant_constants_t
|
||||
{
|
||||
const UWORD16 *pu2_scale_matrix;
|
||||
|
||||
const UWORD16 *pu2_threshold_matrix;
|
||||
|
||||
UWORD32 u4_qbits;
|
||||
|
||||
UWORD32 u4_round_factor;
|
||||
} resi_trans_quant_constants_t;
|
||||
|
||||
typedef struct iq_it_res_rec_constants_t
|
||||
{
|
||||
const UWORD16 *pu2_iscal_mat;
|
||||
|
||||
const UWORD16 *pu2_weigh_mat;
|
||||
|
||||
UWORD32 u4_qp_div_6;
|
||||
} iq_it_res_rec_constants_t;
|
||||
|
||||
/* Typedefs */
|
||||
typedef void FT_RESI_TRANS_DCTRANS_QUANT(UWORD8 *pu1_src, UWORD8 *pu1_pred, WORD16 *pi2_out,
|
||||
WORD32 src_strd, WORD32 pred_strd, WORD32 dst_strd,
|
||||
const UWORD16 *pu2_scale_mat,
|
||||
const UWORD16 *pu2_thresh_mat, UWORD32 u4_qbit,
|
||||
UWORD32 u4_round_fact, UWORD8 *pu1_nnz);
|
||||
|
||||
typedef void FT_IDCTRANS_IQUANT_ITRANS_RECON(WORD16 *pi2_src, UWORD8 *pu1_pred, UWORD8 *pu1_out,
|
||||
WORD32 src_strd, WORD32 pred_strd, WORD32 out_strd,
|
||||
const UWORD16 *pu2_iscale_mat,
|
||||
const UWORD16 *pu2_weigh_mat, UWORD32 qp_div,
|
||||
UWORD32 pi4_cntrl, WORD32 *pi4_tmp);
|
||||
|
||||
typedef void FT_RESI_TRANS_QUANT(buffer_container_t *ps_src, buffer_container_t *ps_pred,
|
||||
buffer_container_t *ps_out, buffer_container_t *ps_upsampled_res,
|
||||
resi_trans_quant_constants_t *ps_quant_constants, UWORD8 *pu1_nnz,
|
||||
WORD16 *pi2_dc_out, UWORD8 u1_use_upsampled_res);
|
||||
|
||||
typedef void FT_LUMA_16X16_RESI_TRANS_DCTRANS_QUANT(
|
||||
UWORD8 *pu1_src, UWORD8 *pu1_pred, WORD16 *pi2_out, WORD32 src_strd, WORD32 pred_strd,
|
||||
WORD32 dst_strd, const UWORD16 *pu2_scale_matrix, const UWORD16 *pu2_threshold_matrix,
|
||||
UWORD32 u4_qbits, UWORD32 u4_round_factor, UWORD8 *pu1_nnz, UWORD32 u4_dc_flag);
|
||||
|
||||
typedef void FT_CHROMA_8X8_RESI_TRANS_DCTRANS_QUANT(
|
||||
UWORD8 *pu1_src, UWORD8 *pu1_pred, WORD16 *pi2_out, WORD32 src_strd, WORD32 pred_strd,
|
||||
WORD32 dst_strd, const UWORD16 *pu2_scale_matrix, const UWORD16 *pu2_threshold_matrix,
|
||||
UWORD32 u4_qbits, UWORD32 u4_round_factor, UWORD8 *pu1_nnz);
|
||||
|
||||
typedef void FT_IQ_IT_RECON(buffer_container_t *ps_src, buffer_container_t *ps_pred,
|
||||
buffer_container_t *ps_res_pred, buffer_container_t *ps_res,
|
||||
buffer_container_t *ps_rec,
|
||||
iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants, WORD16 *pi2_tmp,
|
||||
WORD16 *pi2_dc_src, WORD32 i4_iq_start_idx, UWORD8 u1_res_accumulate);
|
||||
|
||||
typedef void FT_LUMA_16X16_IDCTRANS_IQUANT_ITRANS_RECON(
|
||||
WORD16 *pi2_src, UWORD8 *pu1_pred, UWORD8 *pu1_out, WORD32 src_strd, WORD32 pred_strd,
|
||||
WORD32 out_strd, const UWORD16 *pu2_iscale_mat, const UWORD16 *pu2_weigh_mat, UWORD32 qp_div,
|
||||
UWORD32 pi4_cntrl, UWORD32 u4_dc_trans_flag, WORD32 *pi4_tmp);
|
||||
|
||||
typedef void FT_CHROMA_8X8_IDCTRANS_IQUANT_ITRANS_RECON(
|
||||
WORD16 *pi2_src, UWORD8 *pu1_pred, UWORD8 *pu1_out, WORD32 src_strd, WORD32 pred_strd,
|
||||
WORD32 out_strd, const UWORD16 *pu2_iscale_mat, const UWORD16 *pu2_weigh_mat, UWORD32 qp_div,
|
||||
UWORD32 pi4_cntrl, WORD32 *pi4_tmp);
|
||||
|
||||
typedef void FT_IHADAMARD_SCALING(WORD16 *pi2_src, WORD16 *pi2_out, const UWORD16 *pu2_iscal_mat,
|
||||
const UWORD16 *pu2_weigh_mat, UWORD32 u4_qp_div_6,
|
||||
WORD32 *pi4_tmp);
|
||||
|
||||
typedef void FT_HADAMARD_QUANT(WORD16 *pi2_src, WORD16 *pi2_dst,
|
||||
resi_trans_quant_constants_t *ps_quant_constants, UWORD8 *pu1_nnz);
|
||||
|
||||
/*****************************************************************************/
|
||||
/* Extern Function Declarations */
|
||||
/*****************************************************************************/
|
||||
|
||||
extern FT_RESI_TRANS_QUANT isvc_resi_trans_quant_4x4;
|
||||
extern FT_RESI_TRANS_QUANT isvc_resi_trans_quant_chroma_4x4;
|
||||
extern FT_RESI_TRANS_QUANT isvc_resi_trans_quant_8x8;
|
||||
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_4x4;
|
||||
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_8x8;
|
||||
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_4x4_dc;
|
||||
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_chroma_4x4;
|
||||
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_chroma_4x4_dc;
|
||||
extern FT_IQ_IT_RECON isvc_zcbf_iquant_itrans_recon_4x4;
|
||||
extern FT_IQ_IT_RECON isvc_chroma_zcbf_iquant_itrans_recon_4x4;
|
||||
extern FT_IHADAMARD_SCALING ih264_ihadamard_scaling_4x4;
|
||||
extern FT_IHADAMARD_SCALING ih264_ihadamard_scaling_2x2_uv;
|
||||
extern FT_HADAMARD_QUANT isvc_hadamard_quant_4x4;
|
||||
extern FT_HADAMARD_QUANT isvc_hadamard_quant_2x2_uv;
|
||||
|
||||
/* A9 Declarations */
|
||||
extern FT_RESI_TRANS_QUANT isvc_resi_trans_quant_4x4_a9;
|
||||
extern FT_RESI_TRANS_QUANT isvc_resi_trans_quant_chroma_4x4_a9;
|
||||
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_4x4_a9;
|
||||
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_8x8_a9;
|
||||
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_4x4_dc_a9;
|
||||
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_8x8_dc_a9;
|
||||
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_chroma_4x4_a9;
|
||||
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_chroma_4x4_dc_a9;
|
||||
extern FT_LUMA_16X16_RESI_TRANS_DCTRANS_QUANT isvc_luma_16x16_resi_trans_dctrans_quant_a9;
|
||||
extern FT_CHROMA_8X8_RESI_TRANS_DCTRANS_QUANT isvc_chroma_8x8_resi_trans_dctrans_quant_a9;
|
||||
extern FT_LUMA_16X16_IDCTRANS_IQUANT_ITRANS_RECON isvc_luma_16x16_idctrans_iquant_itrans_recon_a9;
|
||||
extern FT_CHROMA_8X8_IDCTRANS_IQUANT_ITRANS_RECON isvc_chroma_8x8_idctrans_iquant_itrans_recon_a9;
|
||||
extern FT_IHADAMARD_SCALING ih264_ihadamard_scaling_4x4_a9;
|
||||
extern FT_IHADAMARD_SCALING ih264_ihadamard_scaling_2x2_uv_a9;
|
||||
extern FT_HADAMARD_QUANT isvc_hadamard_quant_4x4_a9;
|
||||
extern FT_HADAMARD_QUANT isvc_hadamard_quant_2x2_uv_a9;
|
||||
|
||||
/* Av8 Declarations */
|
||||
extern FT_RESI_TRANS_QUANT isvc_resi_trans_quant_4x4_av8;
|
||||
extern FT_RESI_TRANS_QUANT isvc_resi_trans_quant_chroma_4x4_av8;
|
||||
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_4x4_av8;
|
||||
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_8x8_av8;
|
||||
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_4x4_dc_av8;
|
||||
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_8x8_dc_av8;
|
||||
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_chroma_4x4_av8;
|
||||
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_chroma_4x4_dc_av8;
|
||||
extern FT_IHADAMARD_SCALING ih264_ihadamard_scaling_4x4_av8;
|
||||
extern FT_IHADAMARD_SCALING ih264_ihadamard_scaling_2x2_uv_av8;
|
||||
|
||||
/* NEON Declarations */
|
||||
extern FT_RESI_TRANS_QUANT isvc_resi_trans_quant_4x4_neon;
|
||||
extern FT_RESI_TRANS_QUANT isvc_resi_trans_quant_4x4_with_residual_sub_neon;
|
||||
extern FT_RESI_TRANS_QUANT isvc_resi_trans_quant_chroma_4x4_neon;
|
||||
extern FT_RESI_TRANS_QUANT isvc_resi_trans_quant_chroma_4x4_with_residual_sub_neon;
|
||||
|
||||
/* SSSE3 Declarations */
|
||||
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_4x4_ssse3;
|
||||
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_8x8_ssse3;
|
||||
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_4x4_dc_ssse3;
|
||||
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_8x8_dc_ssse3;
|
||||
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_chroma_4x4_dc_ssse3;
|
||||
extern FT_IHADAMARD_SCALING ih264_ihadamard_scaling_4x4_ssse3;
|
||||
extern FT_IHADAMARD_SCALING ih264_ihadamard_scaling_2x2_uv_ssse3;
|
||||
|
||||
/* SSSE42 Declarations */
|
||||
extern FT_RESI_TRANS_QUANT isvc_resi_trans_quant_4x4_sse42;
|
||||
extern FT_RESI_TRANS_QUANT isvc_resi_trans_quant_4x4_with_res_pred_sse42;
|
||||
extern FT_RESI_TRANS_QUANT isvc_resi_trans_quant_chroma_4x4_sse42;
|
||||
extern FT_RESI_TRANS_QUANT isvc_resi_trans_quant_chroma_4x4_with_res_pred_sse42;
|
||||
|
||||
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_4x4_sse42;
|
||||
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_res_4x4_sse42;
|
||||
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_res_4x4_with_res_acc_sse42;
|
||||
|
||||
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_chroma_4x4_sse42;
|
||||
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_res_chroma_4x4_sse42;
|
||||
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_res_chroma_4x4_with_res_acc_sse42;
|
||||
|
||||
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_dc_4x4_sse42;
|
||||
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_res_dc_4x4_sse42;
|
||||
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_res_dc_with_res_acc_4x4_sse42;
|
||||
|
||||
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_chroma_4x4_dc_sse42;
|
||||
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_res_chroma_4x4_dc_sse42;
|
||||
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_res_chroma_4x4_dc_with_res_acc_sse42;
|
||||
|
||||
extern FT_IHADAMARD_SCALING ih264_ihadamard_scaling_4x4_sse42;
|
||||
|
||||
extern FT_HADAMARD_QUANT isvc_hadamard_quant_4x4_sse42;
|
||||
extern FT_HADAMARD_QUANT isvc_hadamard_quant_2x2_uv_sse42;
|
||||
|
||||
/* NEON Declarations */
|
||||
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_4x4_neon;
|
||||
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_4x4_with_res_output_neon;
|
||||
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_4x4_with_res_accumulate_neon;
|
||||
|
||||
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_chroma_4x4_neon;
|
||||
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_chroma_4x4_with_res_output_neon;
|
||||
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_chroma_4x4_with_res_accumulate_neon;
|
||||
|
||||
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_4x4_dc_neon;
|
||||
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_4x4_dc_with_res_output_neon;
|
||||
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_4x4_dc_with_res_accumulate_neon;
|
||||
|
||||
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_chroma_4x4_dc_neon;
|
||||
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_chroma_4x4_dc_with_res_output_neon;
|
||||
extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_chroma_4x4_dc_with_res_accumulate_neon;
|
||||
|
||||
static FORCEINLINE UWORD8 isvc_get_resi_trans_quant_variant_idx(UWORD8 u1_use_upsampled_res)
|
||||
{
|
||||
return u1_use_upsampled_res;
|
||||
}
|
||||
|
||||
static FORCEINLINE UWORD8 isvc_get_iq_it_recon_variant_idx(UWORD8 u1_is_intra,
|
||||
UWORD8 u1_res_accumulate)
|
||||
{
|
||||
ASSERT(!((1 == u1_is_intra) && (1 == u1_res_accumulate)));
|
||||
|
||||
return u1_is_intra * 2 + u1_res_accumulate;
|
||||
}
|
||||
|
||||
static FORCEINLINE WORD16 isvc_get_residue(WORD16 i2_it_out, WORD16 i2_res_pred,
|
||||
UWORD8 u1_res_accumulate)
|
||||
{
|
||||
return (u1_res_accumulate
|
||||
? (CLIP3(-((WORD16) UINT8_MAX), ((WORD16) UINT8_MAX), i2_it_out + i2_res_pred))
|
||||
: (CLIP3(-((WORD16) UINT8_MAX), ((WORD16) UINT8_MAX), i2_it_out)));
|
||||
}
|
||||
|
||||
#endif
|
||||
39
common/svccommon.cmake
Normal file
39
common/svccommon.cmake
Normal file
|
|
@ -0,0 +1,39 @@
|
|||
# src files
|
||||
list(
|
||||
APPEND
|
||||
LIBAVC_COMMON_SRCS
|
||||
"${AVC_ROOT}/common/svc/isvc_common_tables.c"
|
||||
"${AVC_ROOT}/common/svc/isvc_cabac_tables.c"
|
||||
"${AVC_ROOT}/common/svc/isvc_intra_resample.c"
|
||||
"${AVC_ROOT}/common/svc/isvc_iquant_itrans_recon.c"
|
||||
"${AVC_ROOT}/common/svc/isvc_mem_fns.c"
|
||||
"${AVC_ROOT}/common/svc/isvc_resi_trans_quant.c")
|
||||
|
||||
include_directories(${AVC_ROOT}/common/svc)
|
||||
|
||||
# arm/x86 sources
|
||||
if("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "aarch64" OR
|
||||
"${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "aarch32")
|
||||
list(
|
||||
APPEND
|
||||
LIBAVC_COMMON_ASMS
|
||||
"${AVC_ROOT}/common/arm/svc/isvc_intra_sampling_neon.c"
|
||||
"${AVC_ROOT}/common/arm/svc/isvc_iquant_itrans_recon_neon.c"
|
||||
"${AVC_ROOT}/common/arm/svc/isvc_mem_fns_neon.c"
|
||||
"${AVC_ROOT}/common/arm/svc/isvc_resi_trans_quant_neon.c")
|
||||
include_directories(${AVC_ROOT}/common/arm/svc)
|
||||
else()
|
||||
list(
|
||||
APPEND
|
||||
LIBAVC_COMMON_SRCS
|
||||
"${AVC_ROOT}/common/x86/svc/isvc_iquant_itrans_recon_dc_ssse3.c"
|
||||
"${AVC_ROOT}/common/x86/svc/isvc_iquant_itrans_recon_sse42.c"
|
||||
"${AVC_ROOT}/common/x86/svc/isvc_iquant_itrans_recon_ssse3.c"
|
||||
"${AVC_ROOT}/common/x86/svc/isvc_mem_fns_sse42.c"
|
||||
"${AVC_ROOT}/common/x86/svc/isvc_mem_fns_ssse3.c"
|
||||
"${AVC_ROOT}/common/x86/svc/isvc_padding_ssse3.c"
|
||||
"${AVC_ROOT}/common/x86/svc/isvc_resi_trans_quant_sse42.c"
|
||||
"${AVC_ROOT}/common/x86/svc/isvc_intra_resample_sse42.c")
|
||||
|
||||
include_directories(${AVC_ROOT}/common/x86/svc)
|
||||
endif()
|
||||
658
common/x86/svc/isvc_intra_resample_sse42.c
Normal file
658
common/x86/svc/isvc_intra_resample_sse42.c
Normal file
|
|
@ -0,0 +1,658 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
/*!
|
||||
**************************************************************************
|
||||
|
||||
* * \file ih264d_resamp_svc.c
|
||||
*
|
||||
* \brief
|
||||
* Contains routines that
|
||||
* resample for SVC resampling
|
||||
*
|
||||
* Detailed_description
|
||||
*
|
||||
* \date
|
||||
*
|
||||
*
|
||||
*
|
||||
* \author
|
||||
|
||||
* **************************************************************************
|
||||
|
||||
*/
|
||||
#include <immintrin.h>
|
||||
|
||||
#include "ih264_typedefs.h"
|
||||
#include "ih264_debug.h"
|
||||
#include "isvc_intra_resample.h"
|
||||
|
||||
void isvc_interpolate_base_luma_dyadic_sse42(UWORD8 *pu1_inp_buf, WORD16 *pi2_tmp_filt_buf,
|
||||
UWORD8 *pu1_out_buf, WORD32 i4_out_stride)
|
||||
{
|
||||
WORD32 i4_y;
|
||||
WORD32 i4_filt_stride, i4_src_stride;
|
||||
UWORD8 *pu1_inp, *pu1_out;
|
||||
WORD16 *pi2_tmp;
|
||||
|
||||
__m128i i4_samp_16x8b_0, i4_samp_16x8b_1, i4_samp_16x8b_2, i4_samp_16x8b_3;
|
||||
__m128i i4_samp_8x16b_0, i4_samp_8x16b_1, i4_samp_8x16b_2, i4_samp_8x16b_3;
|
||||
__m128i i4_res_8x16b_r1_1, i4_res_8x16b_r1_2, i4_res_8x16b_r1_3;
|
||||
__m128i i4_res_8x16b_r2_1, i4_res_8x16b_r2_2, i4_res_8x16b_r2_3;
|
||||
|
||||
/* Filter coefficient values for phase 4 */
|
||||
__m128i i4_coeff_8x16b_0 = _mm_set1_epi16(-3);
|
||||
__m128i i4_coeff_8x16b_1 = _mm_set1_epi16(28);
|
||||
i4_filt_stride = 12;
|
||||
i4_src_stride = DYADIC_REF_W_Y;
|
||||
|
||||
/* Initializing pointers */
|
||||
pu1_inp = pu1_inp_buf;
|
||||
pi2_tmp = pi2_tmp_filt_buf;
|
||||
pu1_out = pu1_out_buf;
|
||||
|
||||
/* Vertical interpolation */
|
||||
/*First 64 bit */
|
||||
/* y = 0, y_phase = 12 */
|
||||
i4_samp_16x8b_0 = _mm_loadl_epi64((__m128i *) (pu1_inp));
|
||||
i4_samp_16x8b_1 = _mm_loadl_epi64((__m128i *) (pu1_inp + i4_src_stride));
|
||||
i4_samp_16x8b_2 = _mm_loadl_epi64((__m128i *) (pu1_inp + (i4_src_stride << 1)));
|
||||
i4_samp_16x8b_3 = _mm_loadl_epi64((__m128i *) (pu1_inp + (i4_src_stride << 1) + i4_src_stride));
|
||||
pu1_inp += (i4_src_stride << 2);
|
||||
i4_samp_8x16b_0 = _mm_cvtepu8_epi16(i4_samp_16x8b_0);
|
||||
i4_samp_8x16b_1 = _mm_cvtepu8_epi16(i4_samp_16x8b_1);
|
||||
i4_samp_8x16b_2 = _mm_cvtepu8_epi16(i4_samp_16x8b_2);
|
||||
i4_samp_8x16b_3 = _mm_cvtepu8_epi16(i4_samp_16x8b_3);
|
||||
|
||||
/* since y_phase 12 for y = 0 */
|
||||
/*Multiply by 8 => left shift by 3*/
|
||||
i4_res_8x16b_r1_1 = _mm_slli_epi16(i4_samp_8x16b_1, 3);
|
||||
i4_res_8x16b_r1_2 = _mm_mullo_epi16(i4_samp_8x16b_2, i4_coeff_8x16b_1);
|
||||
i4_res_8x16b_r1_3 = _mm_mullo_epi16(i4_samp_8x16b_3, i4_coeff_8x16b_0);
|
||||
|
||||
i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_2);
|
||||
i4_res_8x16b_r1_3 = _mm_subs_epi16(i4_res_8x16b_r1_3, i4_samp_8x16b_0);
|
||||
i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_3);
|
||||
|
||||
_mm_storeu_si128((__m128i *) pi2_tmp, i4_res_8x16b_r1_1);
|
||||
pi2_tmp += i4_filt_stride;
|
||||
|
||||
for(i4_y = 1; i4_y < 15; i4_y += 2)
|
||||
{
|
||||
i4_samp_8x16b_0 = i4_samp_8x16b_1;
|
||||
i4_samp_8x16b_1 = i4_samp_8x16b_2;
|
||||
i4_samp_8x16b_2 = i4_samp_8x16b_3;
|
||||
i4_samp_8x16b_3 = _mm_cvtepu8_epi16(_mm_loadl_epi64((__m128i *) (pu1_inp)));
|
||||
|
||||
/* y_phase is 4 for odd values of y */
|
||||
/* and 12 for even values of y */
|
||||
//*Multiply by 8 => left shift by 3*/
|
||||
i4_res_8x16b_r1_1 = _mm_mullo_epi16(i4_samp_8x16b_0, i4_coeff_8x16b_0);
|
||||
i4_res_8x16b_r1_2 = _mm_mullo_epi16(i4_samp_8x16b_1, i4_coeff_8x16b_1);
|
||||
i4_res_8x16b_r1_3 = _mm_slli_epi16(i4_samp_8x16b_2, 3);
|
||||
|
||||
i4_res_8x16b_r2_1 = _mm_slli_epi16(i4_samp_8x16b_1, 3);
|
||||
i4_res_8x16b_r2_2 = _mm_mullo_epi16(i4_samp_8x16b_2, i4_coeff_8x16b_1);
|
||||
i4_res_8x16b_r2_3 = _mm_mullo_epi16(i4_samp_8x16b_3, i4_coeff_8x16b_0);
|
||||
|
||||
i4_res_8x16b_r1_3 = _mm_subs_epi16(i4_res_8x16b_r1_3, i4_samp_8x16b_3);
|
||||
i4_res_8x16b_r2_3 = _mm_subs_epi16(i4_res_8x16b_r2_3, i4_samp_8x16b_0);
|
||||
|
||||
i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_2);
|
||||
i4_res_8x16b_r2_1 = _mm_adds_epi16(i4_res_8x16b_r2_1, i4_res_8x16b_r2_2);
|
||||
|
||||
i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_3);
|
||||
i4_res_8x16b_r2_1 = _mm_adds_epi16(i4_res_8x16b_r2_1, i4_res_8x16b_r2_3);
|
||||
|
||||
/* Storing the results */
|
||||
_mm_storeu_si128((__m128i *) pi2_tmp, i4_res_8x16b_r1_1);
|
||||
_mm_storeu_si128((__m128i *) (pi2_tmp + i4_filt_stride), i4_res_8x16b_r2_1);
|
||||
pi2_tmp += (i4_filt_stride << 1);
|
||||
pu1_inp += i4_src_stride;
|
||||
|
||||
} /* End of loop over y */
|
||||
|
||||
/* y = 15, y_phase = 4 */
|
||||
i4_samp_8x16b_0 = i4_samp_8x16b_1;
|
||||
i4_samp_8x16b_1 = i4_samp_8x16b_2;
|
||||
i4_samp_8x16b_2 = i4_samp_8x16b_3;
|
||||
i4_samp_8x16b_3 = _mm_cvtepu8_epi16(_mm_loadl_epi64((__m128i *) (pu1_inp)));
|
||||
|
||||
i4_res_8x16b_r1_1 = _mm_mullo_epi16(i4_samp_8x16b_0, i4_coeff_8x16b_0);
|
||||
i4_res_8x16b_r1_2 = _mm_mullo_epi16(i4_samp_8x16b_1, i4_coeff_8x16b_1);
|
||||
i4_res_8x16b_r1_3 = _mm_slli_epi16(i4_samp_8x16b_2, 3);
|
||||
i4_res_8x16b_r1_3 = _mm_subs_epi16(i4_res_8x16b_r1_3, i4_samp_8x16b_3);
|
||||
|
||||
i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_2);
|
||||
i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_3);
|
||||
|
||||
/* Store the output */
|
||||
_mm_storeu_si128((__m128i *) pi2_tmp, i4_res_8x16b_r1_1);
|
||||
|
||||
/* Reinitializing the ptrs */
|
||||
pu1_inp = pu1_inp_buf;
|
||||
pi2_tmp = pi2_tmp_filt_buf;
|
||||
|
||||
/*Remaining 32 bit */
|
||||
pu1_inp += 8;
|
||||
pi2_tmp += 8;
|
||||
|
||||
/* y = 0, y_phase = 12 */
|
||||
i4_samp_16x8b_0 = _mm_loadl_epi64((__m128i *) (pu1_inp));
|
||||
i4_samp_16x8b_1 = _mm_loadl_epi64((__m128i *) (pu1_inp + i4_src_stride));
|
||||
i4_samp_16x8b_2 = _mm_loadl_epi64((__m128i *) (pu1_inp + (i4_src_stride << 1)));
|
||||
i4_samp_16x8b_3 =
|
||||
_mm_loadl_epi64((__m128i *) (pu1_inp + (i4_src_stride << 1) + i4_src_stride));
|
||||
pu1_inp += (i4_src_stride << 2);
|
||||
i4_samp_8x16b_0 = _mm_cvtepu8_epi16(i4_samp_16x8b_0);
|
||||
i4_samp_8x16b_1 = _mm_cvtepu8_epi16(i4_samp_16x8b_1);
|
||||
i4_samp_8x16b_2 = _mm_cvtepu8_epi16(i4_samp_16x8b_2);
|
||||
i4_samp_8x16b_3 = _mm_cvtepu8_epi16(i4_samp_16x8b_3);
|
||||
|
||||
/* since y_phase 12 for y = 0 */
|
||||
/*Multiply by 8 => left shift by 3*/
|
||||
i4_res_8x16b_r1_1 = _mm_slli_epi16(i4_samp_8x16b_1, 3);
|
||||
i4_res_8x16b_r1_2 = _mm_mullo_epi16(i4_samp_8x16b_2, i4_coeff_8x16b_1);
|
||||
i4_res_8x16b_r1_3 = _mm_mullo_epi16(i4_samp_8x16b_3, i4_coeff_8x16b_0);
|
||||
|
||||
i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_2);
|
||||
i4_res_8x16b_r1_3 = _mm_subs_epi16(i4_res_8x16b_r1_3, i4_samp_8x16b_0);
|
||||
i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_3);
|
||||
|
||||
_mm_storel_epi64((__m128i *) pi2_tmp, i4_res_8x16b_r1_1);
|
||||
pi2_tmp += i4_filt_stride;
|
||||
|
||||
for(i4_y = 1; i4_y < 15; i4_y += 2)
|
||||
{
|
||||
i4_samp_8x16b_0 = i4_samp_8x16b_1;
|
||||
i4_samp_8x16b_1 = i4_samp_8x16b_2;
|
||||
i4_samp_8x16b_2 = i4_samp_8x16b_3;
|
||||
i4_samp_8x16b_3 = _mm_cvtepu8_epi16(_mm_loadl_epi64((__m128i *) (pu1_inp)));
|
||||
|
||||
/* y_phase is 4 for odd values of y */
|
||||
/* and 12 for even values of y */
|
||||
//*Multiply by 8 => left shift by 3*/
|
||||
i4_res_8x16b_r1_1 = _mm_mullo_epi16(i4_samp_8x16b_0, i4_coeff_8x16b_0);
|
||||
i4_res_8x16b_r1_2 = _mm_mullo_epi16(i4_samp_8x16b_1, i4_coeff_8x16b_1);
|
||||
i4_res_8x16b_r1_3 = _mm_slli_epi16(i4_samp_8x16b_2, 3);
|
||||
|
||||
i4_res_8x16b_r2_1 = _mm_slli_epi16(i4_samp_8x16b_1, 3);
|
||||
i4_res_8x16b_r2_2 = _mm_mullo_epi16(i4_samp_8x16b_2, i4_coeff_8x16b_1);
|
||||
i4_res_8x16b_r2_3 = _mm_mullo_epi16(i4_samp_8x16b_3, i4_coeff_8x16b_0);
|
||||
|
||||
i4_res_8x16b_r1_3 = _mm_subs_epi16(i4_res_8x16b_r1_3, i4_samp_8x16b_3);
|
||||
i4_res_8x16b_r2_3 = _mm_subs_epi16(i4_res_8x16b_r2_3, i4_samp_8x16b_0);
|
||||
|
||||
i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_2);
|
||||
i4_res_8x16b_r2_1 = _mm_adds_epi16(i4_res_8x16b_r2_1, i4_res_8x16b_r2_2);
|
||||
|
||||
i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_3);
|
||||
i4_res_8x16b_r2_1 = _mm_adds_epi16(i4_res_8x16b_r2_1, i4_res_8x16b_r2_3);
|
||||
|
||||
/* Storing the results */
|
||||
_mm_storel_epi64((__m128i *) pi2_tmp, i4_res_8x16b_r1_1);
|
||||
_mm_storel_epi64((__m128i *) (pi2_tmp + i4_filt_stride), i4_res_8x16b_r2_1);
|
||||
pi2_tmp += (i4_filt_stride << 1);
|
||||
pu1_inp += i4_src_stride;
|
||||
|
||||
} /* End of loop over y */
|
||||
|
||||
/* y = 15, y_phase = 4 */
|
||||
i4_samp_8x16b_0 = i4_samp_8x16b_1;
|
||||
i4_samp_8x16b_1 = i4_samp_8x16b_2;
|
||||
i4_samp_8x16b_2 = i4_samp_8x16b_3;
|
||||
i4_samp_8x16b_3 = _mm_cvtepu8_epi16(_mm_loadl_epi64((__m128i *) (pu1_inp)));
|
||||
|
||||
i4_res_8x16b_r1_1 = _mm_mullo_epi16(i4_samp_8x16b_0, i4_coeff_8x16b_0);
|
||||
i4_res_8x16b_r1_2 = _mm_mullo_epi16(i4_samp_8x16b_1, i4_coeff_8x16b_1);
|
||||
i4_res_8x16b_r1_3 = _mm_slli_epi16(i4_samp_8x16b_2, 3);
|
||||
i4_res_8x16b_r1_3 = _mm_subs_epi16(i4_res_8x16b_r1_3, i4_samp_8x16b_3);
|
||||
|
||||
i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_2);
|
||||
i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_3);
|
||||
|
||||
/* Store the output */
|
||||
_mm_storel_epi64((__m128i *) pi2_tmp, i4_res_8x16b_r1_1);
|
||||
|
||||
/* Reinitializing the ptrs */
|
||||
pu1_inp = pu1_inp_buf;
|
||||
pi2_tmp = pi2_tmp_filt_buf;
|
||||
|
||||
{
|
||||
__m128i coeff_c0_c1_8x16b = _mm_set_epi16(28, -3, 28, -3, 28, -3, 28, -3);
|
||||
__m128i coeff_c2_c3_8x16b = _mm_set_epi16(-1, 8, -1, 8, -1, 8, -1, 8);
|
||||
__m128i coeff_c3_c2_8x16b = _mm_set_epi16(8, -1, 8, -1, 8, -1, 8, -1);
|
||||
__m128i coeff_c1_c0_8x16b = _mm_set_epi16(-3, 28, -3, 28, -3, 28, -3, 28);
|
||||
|
||||
__m128i i4_samp_8x16b_rpart1_0, i4_samp_8x16b_rpart2_0;
|
||||
__m128i i4_samp_8x16b_rpart1_1, i4_samp_8x16b_rpart2_1;
|
||||
__m128i i4_samp_8x16b_rpart1_2, i4_samp_8x16b_rpart2_2;
|
||||
__m128i i4_samp_8x16b_rpart1_3, i4_samp_8x16b_rpart2_3;
|
||||
__m128i i4_samp_8x16b_rpart1_4, i4_samp_8x16b_rpart2_4;
|
||||
|
||||
__m128i i4_res_4x32b_rpart1_0, i4_res_4x32b_rpart2_0;
|
||||
__m128i i4_res_4x32b_rpart1_1, i4_res_4x32b_rpart2_1;
|
||||
__m128i i4_res_4x32b_rpart1_2, i4_res_4x32b_rpart2_2;
|
||||
__m128i i4_res_4x32b_rpart1_3, i4_res_4x32b_rpart2_3;
|
||||
|
||||
__m128i res_512 = _mm_set1_epi32(512);
|
||||
/* Horizontal interpolation */
|
||||
for(i4_y = 0; i4_y < 16; i4_y++)
|
||||
{
|
||||
i4_samp_8x16b_rpart1_0 = _mm_loadu_si128((__m128i *) pi2_tmp);
|
||||
i4_samp_8x16b_rpart2_0 = _mm_loadu_si128((__m128i *) (pi2_tmp + 4));
|
||||
|
||||
i4_samp_8x16b_rpart1_1 = _mm_srli_si128(i4_samp_8x16b_rpart1_0, 2);
|
||||
i4_samp_8x16b_rpart1_2 = _mm_srli_si128(i4_samp_8x16b_rpart1_0, 4);
|
||||
i4_samp_8x16b_rpart1_3 = _mm_srli_si128(i4_samp_8x16b_rpart1_0, 6);
|
||||
i4_samp_8x16b_rpart1_4 = _mm_srli_si128(i4_samp_8x16b_rpart1_0, 8);
|
||||
|
||||
i4_samp_8x16b_rpart2_1 = _mm_srli_si128(i4_samp_8x16b_rpart2_0, 2);
|
||||
i4_samp_8x16b_rpart2_2 = _mm_srli_si128(i4_samp_8x16b_rpart2_0, 4);
|
||||
i4_samp_8x16b_rpart2_3 = _mm_srli_si128(i4_samp_8x16b_rpart2_0, 6);
|
||||
i4_samp_8x16b_rpart2_4 = _mm_srli_si128(i4_samp_8x16b_rpart2_0, 8);
|
||||
|
||||
i4_samp_8x16b_rpart1_0 =
|
||||
_mm_unpacklo_epi16(i4_samp_8x16b_rpart1_0, i4_samp_8x16b_rpart1_1);
|
||||
i4_samp_8x16b_rpart1_1 =
|
||||
_mm_unpacklo_epi16(i4_samp_8x16b_rpart1_1, i4_samp_8x16b_rpart1_2);
|
||||
i4_samp_8x16b_rpart1_2 =
|
||||
_mm_unpacklo_epi16(i4_samp_8x16b_rpart1_2, i4_samp_8x16b_rpart1_3);
|
||||
i4_samp_8x16b_rpart1_3 =
|
||||
_mm_unpacklo_epi16(i4_samp_8x16b_rpart1_3, i4_samp_8x16b_rpart1_4);
|
||||
|
||||
i4_samp_8x16b_rpart2_0 =
|
||||
_mm_unpacklo_epi16(i4_samp_8x16b_rpart2_0, i4_samp_8x16b_rpart2_1);
|
||||
i4_samp_8x16b_rpart2_1 =
|
||||
_mm_unpacklo_epi16(i4_samp_8x16b_rpart2_1, i4_samp_8x16b_rpart2_2);
|
||||
i4_samp_8x16b_rpart2_2 =
|
||||
_mm_unpacklo_epi16(i4_samp_8x16b_rpart2_2, i4_samp_8x16b_rpart2_3);
|
||||
i4_samp_8x16b_rpart2_3 =
|
||||
_mm_unpacklo_epi16(i4_samp_8x16b_rpart2_3, i4_samp_8x16b_rpart2_4);
|
||||
|
||||
i4_res_4x32b_rpart1_0 = _mm_madd_epi16(i4_samp_8x16b_rpart1_0, coeff_c3_c2_8x16b);
|
||||
i4_res_4x32b_rpart1_2 = _mm_madd_epi16(i4_samp_8x16b_rpart1_2, coeff_c1_c0_8x16b);
|
||||
|
||||
i4_res_4x32b_rpart1_1 = _mm_madd_epi16(i4_samp_8x16b_rpart1_1, coeff_c0_c1_8x16b);
|
||||
i4_res_4x32b_rpart1_3 = _mm_madd_epi16(i4_samp_8x16b_rpart1_3, coeff_c2_c3_8x16b);
|
||||
|
||||
i4_res_4x32b_rpart2_0 = _mm_madd_epi16(i4_samp_8x16b_rpart2_0, coeff_c3_c2_8x16b);
|
||||
i4_res_4x32b_rpart2_2 = _mm_madd_epi16(i4_samp_8x16b_rpart2_2, coeff_c1_c0_8x16b);
|
||||
|
||||
i4_res_4x32b_rpart2_1 = _mm_madd_epi16(i4_samp_8x16b_rpart2_1, coeff_c0_c1_8x16b);
|
||||
i4_res_4x32b_rpart2_3 = _mm_madd_epi16(i4_samp_8x16b_rpart2_3, coeff_c2_c3_8x16b);
|
||||
|
||||
i4_res_4x32b_rpart1_0 = _mm_add_epi32(i4_res_4x32b_rpart1_0, i4_res_4x32b_rpart1_2);
|
||||
i4_res_4x32b_rpart1_1 = _mm_add_epi32(i4_res_4x32b_rpart1_1, i4_res_4x32b_rpart1_3);
|
||||
|
||||
i4_res_4x32b_rpart2_0 = _mm_add_epi32(i4_res_4x32b_rpart2_0, i4_res_4x32b_rpart2_2);
|
||||
i4_res_4x32b_rpart2_1 = _mm_add_epi32(i4_res_4x32b_rpart2_1, i4_res_4x32b_rpart2_3);
|
||||
|
||||
i4_res_4x32b_rpart1_2 =
|
||||
_mm_unpacklo_epi32(i4_res_4x32b_rpart1_0, i4_res_4x32b_rpart1_1);
|
||||
i4_res_4x32b_rpart1_3 =
|
||||
_mm_unpackhi_epi32(i4_res_4x32b_rpart1_0, i4_res_4x32b_rpart1_1);
|
||||
|
||||
i4_res_4x32b_rpart2_2 =
|
||||
_mm_unpacklo_epi32(i4_res_4x32b_rpart2_0, i4_res_4x32b_rpart2_1);
|
||||
i4_res_4x32b_rpart2_3 =
|
||||
_mm_unpackhi_epi32(i4_res_4x32b_rpart2_0, i4_res_4x32b_rpart2_1);
|
||||
|
||||
i4_res_4x32b_rpart1_0 = _mm_add_epi32(i4_res_4x32b_rpart1_2, res_512);
|
||||
i4_res_4x32b_rpart1_1 = _mm_add_epi32(i4_res_4x32b_rpart1_3, res_512);
|
||||
|
||||
i4_res_4x32b_rpart1_0 = _mm_srai_epi32(i4_res_4x32b_rpart1_0, 10);
|
||||
i4_res_4x32b_rpart1_1 = _mm_srai_epi32(i4_res_4x32b_rpart1_1, 10);
|
||||
|
||||
i4_res_4x32b_rpart2_0 = _mm_add_epi32(i4_res_4x32b_rpart2_2, res_512);
|
||||
i4_res_4x32b_rpart2_1 = _mm_add_epi32(i4_res_4x32b_rpart2_3, res_512);
|
||||
|
||||
i4_res_4x32b_rpart2_0 = _mm_srai_epi32(i4_res_4x32b_rpart2_0, 10);
|
||||
i4_res_4x32b_rpart2_1 = _mm_srai_epi32(i4_res_4x32b_rpart2_1, 10);
|
||||
|
||||
_mm_storeu_si128(
|
||||
(__m128i *) pu1_out,
|
||||
_mm_packus_epi16(_mm_packus_epi32(i4_res_4x32b_rpart1_0, i4_res_4x32b_rpart1_1),
|
||||
_mm_packus_epi32(i4_res_4x32b_rpart2_0, i4_res_4x32b_rpart2_1)));
|
||||
|
||||
pi2_tmp += i4_filt_stride;
|
||||
pu1_out += i4_out_stride;
|
||||
|
||||
} /* End of loop over y */
|
||||
}
|
||||
}
|
||||
|
||||
void isvc_vert_interpol_chroma_dyadic_sse42(UWORD8 *pu1_inp_buf, WORD16 *pi2_tmp_filt_buf,
|
||||
WORD32 i4_phase_0, WORD32 i4_phase_1)
|
||||
{
|
||||
WORD8 i4_coeff_0, i4_coeff_1, i4_coeff_2, i4_coeff_3;
|
||||
WORD32 i4_filt_stride, i4_src_stride;
|
||||
UWORD8 *pu1_inp;
|
||||
WORD16 *pi2_tmp;
|
||||
__m128i i4_samp_16x8b_0, i4_samp_16x8b_1, i4_samp_16x8b_2, i4_samp_16x8b_3, i4_samp_16x8b_4,
|
||||
i4_samp_16x8b_5;
|
||||
__m128i i4_res_8x16b_r0, i4_res_8x16b_r1, i4_res_8x16b_r2, i4_res_8x16b_r3, i4_res_8x16b_r4,
|
||||
i4_res_8x16b_r5, i4_res_8x16b_r6, i4_res_8x16b_r7;
|
||||
__m128i i4_res_8x16b_r7_temp;
|
||||
__m128i i4_c0_c1_16x8b, i4_c2_c3_16x8b;
|
||||
|
||||
i4_coeff_0 = (WORD8) (16 - i4_phase_0);
|
||||
i4_coeff_1 = (WORD8) (i4_phase_0);
|
||||
i4_coeff_2 = (WORD8) (16 - i4_phase_1);
|
||||
i4_coeff_3 = (WORD8) (i4_phase_1);
|
||||
|
||||
i4_c0_c1_16x8b =
|
||||
_mm_set_epi8(i4_coeff_1, i4_coeff_0, i4_coeff_1, i4_coeff_0, i4_coeff_1, i4_coeff_0,
|
||||
i4_coeff_1, i4_coeff_0, i4_coeff_1, i4_coeff_0, i4_coeff_1, i4_coeff_0,
|
||||
i4_coeff_1, i4_coeff_0, i4_coeff_1, i4_coeff_0);
|
||||
i4_c2_c3_16x8b =
|
||||
_mm_set_epi8(i4_coeff_3, i4_coeff_2, i4_coeff_3, i4_coeff_2, i4_coeff_3, i4_coeff_2,
|
||||
i4_coeff_3, i4_coeff_2, i4_coeff_3, i4_coeff_2, i4_coeff_3, i4_coeff_2,
|
||||
i4_coeff_3, i4_coeff_2, i4_coeff_3, i4_coeff_2);
|
||||
|
||||
/* Initializing pointers */
|
||||
pu1_inp = pu1_inp_buf;
|
||||
pi2_tmp = pi2_tmp_filt_buf;
|
||||
i4_filt_stride = 6;
|
||||
i4_src_stride = DYADIC_REF_W_C;
|
||||
|
||||
i4_samp_16x8b_0 = _mm_loadu_si128((__m128i *) (pu1_inp));
|
||||
i4_samp_16x8b_1 = _mm_loadu_si128((__m128i *) (pu1_inp + i4_src_stride));
|
||||
i4_samp_16x8b_2 = _mm_loadu_si128((__m128i *) (pu1_inp + (i4_src_stride << 1)));
|
||||
i4_samp_16x8b_3 = _mm_loadu_si128((__m128i *) (pu1_inp + (i4_src_stride << 1) + i4_src_stride));
|
||||
i4_samp_16x8b_4 = _mm_loadu_si128((__m128i *) (pu1_inp + (i4_src_stride << 2)));
|
||||
i4_samp_16x8b_5 = _mm_loadu_si128((__m128i *) (pu1_inp + (i4_src_stride << 2) + i4_src_stride));
|
||||
|
||||
i4_samp_16x8b_0 = _mm_unpacklo_epi8(i4_samp_16x8b_0, i4_samp_16x8b_1);
|
||||
i4_res_8x16b_r0 = _mm_maddubs_epi16(i4_samp_16x8b_0, i4_c0_c1_16x8b);
|
||||
_mm_storeu_si128((__m128i *) (pi2_tmp), i4_res_8x16b_r0);
|
||||
|
||||
i4_samp_16x8b_1 = _mm_unpacklo_epi8(i4_samp_16x8b_1, i4_samp_16x8b_2);
|
||||
i4_res_8x16b_r1 = _mm_maddubs_epi16(i4_samp_16x8b_1, i4_c2_c3_16x8b);
|
||||
_mm_storeu_si128((__m128i *) (pi2_tmp + i4_filt_stride), i4_res_8x16b_r1);
|
||||
|
||||
i4_res_8x16b_r2 = _mm_maddubs_epi16(i4_samp_16x8b_1, i4_c0_c1_16x8b);
|
||||
_mm_storeu_si128((__m128i *) (pi2_tmp + (i4_filt_stride << 1)), i4_res_8x16b_r2);
|
||||
|
||||
i4_samp_16x8b_2 = _mm_unpacklo_epi8(i4_samp_16x8b_2, i4_samp_16x8b_3);
|
||||
i4_res_8x16b_r3 = _mm_maddubs_epi16(i4_samp_16x8b_2, i4_c2_c3_16x8b);
|
||||
_mm_storeu_si128((__m128i *) (pi2_tmp + (i4_filt_stride << 1) + i4_filt_stride),
|
||||
i4_res_8x16b_r3);
|
||||
|
||||
i4_res_8x16b_r4 = _mm_maddubs_epi16(i4_samp_16x8b_2, i4_c0_c1_16x8b);
|
||||
_mm_storeu_si128((__m128i *) (pi2_tmp + (i4_filt_stride << 2)), i4_res_8x16b_r4);
|
||||
|
||||
i4_samp_16x8b_3 = _mm_unpacklo_epi8(i4_samp_16x8b_3, i4_samp_16x8b_4);
|
||||
i4_res_8x16b_r5 = _mm_maddubs_epi16(i4_samp_16x8b_3, i4_c2_c3_16x8b);
|
||||
_mm_storeu_si128((__m128i *) (pi2_tmp + (i4_filt_stride << 2) + i4_filt_stride),
|
||||
i4_res_8x16b_r5);
|
||||
|
||||
i4_res_8x16b_r6 = _mm_maddubs_epi16(i4_samp_16x8b_3, i4_c0_c1_16x8b);
|
||||
_mm_storel_epi64((__m128i *) (pi2_tmp + (i4_filt_stride << 2) + (i4_filt_stride << 1)),
|
||||
i4_res_8x16b_r6);
|
||||
|
||||
i4_res_8x16b_r6 = _mm_shuffle_epi32(i4_res_8x16b_r6, 78);
|
||||
|
||||
i4_samp_16x8b_4 = _mm_unpacklo_epi8(i4_samp_16x8b_4, i4_samp_16x8b_5);
|
||||
|
||||
i4_res_8x16b_r7 = _mm_maddubs_epi16(i4_samp_16x8b_4, i4_c2_c3_16x8b);
|
||||
|
||||
i4_res_8x16b_r7 = _mm_shuffle_epi32(i4_res_8x16b_r7, 147);
|
||||
|
||||
i4_res_8x16b_r7_temp = _mm_blend_epi16(i4_res_8x16b_r6, i4_res_8x16b_r7, 252);
|
||||
|
||||
_mm_storeu_si128((__m128i *) (pi2_tmp + (i4_filt_stride << 2) + (i4_filt_stride << 1) + 4),
|
||||
i4_res_8x16b_r7_temp);
|
||||
}
|
||||
|
||||
void isvc_horz_interpol_chroma_dyadic_sse42(WORD16 *pi2_tmp_filt_buf, UWORD8 *pu1_out_buf,
|
||||
WORD32 i4_out_stride, WORD32 i4_phase_0,
|
||||
WORD32 i4_phase_1)
|
||||
{
|
||||
WORD32 i4_dst_stride, i4_dst_stride2, i4_dst_stride4;
|
||||
UWORD8 *pu1_out;
|
||||
WORD16 *pi2_tmp;
|
||||
|
||||
__m128i i4_samp_8x16b_r1_0, i4_samp_8x16b_r1_1, i4_samp_8x16b_r1_2;
|
||||
__m128i i4_samp_8x16b_r2_0, i4_samp_8x16b_r2_1, i4_samp_8x16b_r2_2;
|
||||
__m128i i4_samp_8x16b_r3_0, i4_samp_8x16b_r3_1, i4_samp_8x16b_r3_2;
|
||||
__m128i i4_samp_8x16b_r4_0, i4_samp_8x16b_r4_1, i4_samp_8x16b_r4_2;
|
||||
__m128i i4_samp_8x16b_r5_0, i4_samp_8x16b_r5_1, i4_samp_8x16b_r5_2;
|
||||
__m128i i4_samp_8x16b_r6_0, i4_samp_8x16b_r6_1, i4_samp_8x16b_r6_2;
|
||||
__m128i i4_samp_8x16b_r7_0, i4_samp_8x16b_r7_1, i4_samp_8x16b_r7_2;
|
||||
__m128i i4_samp_8x16b_r8_0, i4_samp_8x16b_r8_1, i4_samp_8x16b_r8_2;
|
||||
|
||||
__m128i i4_res_4x32b_r1_0, i4_res_4x32b_r1_1;
|
||||
__m128i i4_res_4x32b_r2_0, i4_res_4x32b_r2_1;
|
||||
__m128i i4_res_4x32b_r3_0, i4_res_4x32b_r3_1;
|
||||
__m128i i4_res_4x32b_r4_0, i4_res_4x32b_r4_1;
|
||||
__m128i i4_res_4x32b_r5_0, i4_res_4x32b_r5_1;
|
||||
__m128i i4_res_4x32b_r6_0, i4_res_4x32b_r6_1;
|
||||
__m128i i4_res_4x32b_r7_0, i4_res_4x32b_r7_1;
|
||||
__m128i i4_res_4x32b_r8_0, i4_res_4x32b_r8_1;
|
||||
|
||||
__m128i i4_res_final_8x16b_r1, i4_res_final_8x16b_r2, i4_res_final_8x16b_r3,
|
||||
i4_res_final_8x16b_r4, i4_res_final_8x16b_r5, i4_res_final_8x16b_r6, i4_res_final_8x16b_r7,
|
||||
i4_res_final_8x16b_r8;
|
||||
|
||||
__m128i out_16x8b_r1, out_16x8b_r2, out_16x8b_r3, out_16x8b_r4, out_16x8b_r5, out_16x8b_r6,
|
||||
out_16x8b_r7, out_16x8b_r8;
|
||||
|
||||
__m128i i4_res_final_8x16b_r12_0, i4_res_final_8x16b_r12_1;
|
||||
__m128i i4_res_final_8x16b_r34_0, i4_res_final_8x16b_r34_1;
|
||||
__m128i i4_res_final_8x16b_r56_0, i4_res_final_8x16b_r56_1;
|
||||
__m128i i4_res_final_8x16b_r67_0, i4_res_final_8x16b_r67_1;
|
||||
__m128i chroma_mask, chroma_mask2;
|
||||
|
||||
WORD32 i4_coeff_0 = 16 - i4_phase_0;
|
||||
WORD32 i4_coeff_1 = i4_phase_0;
|
||||
WORD32 i4_coeff_2 = 16 - i4_phase_1;
|
||||
WORD32 i4_coeff_3 = i4_phase_1;
|
||||
__m128i coeff_c0_c1_8x16b = _mm_set_epi16(i4_coeff_1, i4_coeff_0, i4_coeff_1, i4_coeff_0,
|
||||
i4_coeff_1, i4_coeff_0, i4_coeff_1, i4_coeff_0);
|
||||
__m128i coeff_c2_c3_8x16b = _mm_set_epi16(i4_coeff_3, i4_coeff_2, i4_coeff_3, i4_coeff_2,
|
||||
i4_coeff_3, i4_coeff_2, i4_coeff_3, i4_coeff_2);
|
||||
__m128i res_128 = _mm_set1_epi32(128);
|
||||
UWORD32 u4_norm_factor = 8;
|
||||
|
||||
/* Initializing pointers */
|
||||
pu1_out = pu1_out_buf;
|
||||
pi2_tmp = pi2_tmp_filt_buf;
|
||||
i4_dst_stride = i4_out_stride;
|
||||
|
||||
i4_dst_stride2 = i4_dst_stride << 1;
|
||||
i4_dst_stride4 = i4_dst_stride << 2;
|
||||
|
||||
/* Horizontal interpolation */
|
||||
i4_samp_8x16b_r1_0 = _mm_loadu_si128((__m128i *) pi2_tmp);
|
||||
i4_samp_8x16b_r2_0 = _mm_loadu_si128((__m128i *) (pi2_tmp + 6));
|
||||
i4_samp_8x16b_r3_0 = _mm_loadu_si128((__m128i *) (pi2_tmp + 12));
|
||||
i4_samp_8x16b_r4_0 = _mm_loadu_si128((__m128i *) (pi2_tmp + 18));
|
||||
i4_samp_8x16b_r5_0 = _mm_loadu_si128((__m128i *) (pi2_tmp + 24));
|
||||
i4_samp_8x16b_r6_0 = _mm_loadu_si128((__m128i *) (pi2_tmp + 30));
|
||||
i4_samp_8x16b_r7_0 = _mm_loadu_si128((__m128i *) (pi2_tmp + 36));
|
||||
i4_samp_8x16b_r8_0 = _mm_loadu_si128((__m128i *) (pi2_tmp + 42));
|
||||
|
||||
i4_samp_8x16b_r1_1 = _mm_srli_si128(i4_samp_8x16b_r1_0, 2);
|
||||
i4_samp_8x16b_r1_2 = _mm_srli_si128(i4_samp_8x16b_r1_0, 4);
|
||||
|
||||
i4_samp_8x16b_r2_1 = _mm_srli_si128(i4_samp_8x16b_r2_0, 2);
|
||||
i4_samp_8x16b_r2_2 = _mm_srli_si128(i4_samp_8x16b_r2_0, 4);
|
||||
|
||||
i4_samp_8x16b_r3_1 = _mm_srli_si128(i4_samp_8x16b_r3_0, 2);
|
||||
i4_samp_8x16b_r3_2 = _mm_srli_si128(i4_samp_8x16b_r3_0, 4);
|
||||
|
||||
i4_samp_8x16b_r4_1 = _mm_srli_si128(i4_samp_8x16b_r4_0, 2);
|
||||
i4_samp_8x16b_r4_2 = _mm_srli_si128(i4_samp_8x16b_r4_0, 4);
|
||||
|
||||
i4_samp_8x16b_r5_1 = _mm_srli_si128(i4_samp_8x16b_r5_0, 2);
|
||||
i4_samp_8x16b_r5_2 = _mm_srli_si128(i4_samp_8x16b_r5_0, 4);
|
||||
|
||||
i4_samp_8x16b_r6_1 = _mm_srli_si128(i4_samp_8x16b_r6_0, 2);
|
||||
i4_samp_8x16b_r6_2 = _mm_srli_si128(i4_samp_8x16b_r6_0, 4);
|
||||
|
||||
i4_samp_8x16b_r7_1 = _mm_srli_si128(i4_samp_8x16b_r7_0, 2);
|
||||
i4_samp_8x16b_r7_2 = _mm_srli_si128(i4_samp_8x16b_r7_0, 4);
|
||||
|
||||
i4_samp_8x16b_r8_1 = _mm_srli_si128(i4_samp_8x16b_r8_0, 2);
|
||||
i4_samp_8x16b_r8_2 = _mm_srli_si128(i4_samp_8x16b_r8_0, 4);
|
||||
|
||||
i4_samp_8x16b_r1_0 = _mm_unpacklo_epi16(i4_samp_8x16b_r1_0, i4_samp_8x16b_r1_1);
|
||||
i4_samp_8x16b_r2_0 = _mm_unpacklo_epi16(i4_samp_8x16b_r2_0, i4_samp_8x16b_r2_1);
|
||||
i4_samp_8x16b_r3_0 = _mm_unpacklo_epi16(i4_samp_8x16b_r3_0, i4_samp_8x16b_r3_1);
|
||||
i4_samp_8x16b_r4_0 = _mm_unpacklo_epi16(i4_samp_8x16b_r4_0, i4_samp_8x16b_r4_1);
|
||||
i4_samp_8x16b_r5_0 = _mm_unpacklo_epi16(i4_samp_8x16b_r5_0, i4_samp_8x16b_r5_1);
|
||||
i4_samp_8x16b_r6_0 = _mm_unpacklo_epi16(i4_samp_8x16b_r6_0, i4_samp_8x16b_r6_1);
|
||||
i4_samp_8x16b_r7_0 = _mm_unpacklo_epi16(i4_samp_8x16b_r7_0, i4_samp_8x16b_r7_1);
|
||||
i4_samp_8x16b_r8_0 = _mm_unpacklo_epi16(i4_samp_8x16b_r8_0, i4_samp_8x16b_r8_1);
|
||||
|
||||
i4_samp_8x16b_r1_1 = _mm_unpacklo_epi16(i4_samp_8x16b_r1_1, i4_samp_8x16b_r1_2);
|
||||
i4_samp_8x16b_r2_1 = _mm_unpacklo_epi16(i4_samp_8x16b_r2_1, i4_samp_8x16b_r2_2);
|
||||
i4_samp_8x16b_r3_1 = _mm_unpacklo_epi16(i4_samp_8x16b_r3_1, i4_samp_8x16b_r3_2);
|
||||
i4_samp_8x16b_r4_1 = _mm_unpacklo_epi16(i4_samp_8x16b_r4_1, i4_samp_8x16b_r4_2);
|
||||
i4_samp_8x16b_r5_1 = _mm_unpacklo_epi16(i4_samp_8x16b_r5_1, i4_samp_8x16b_r5_2);
|
||||
i4_samp_8x16b_r6_1 = _mm_unpacklo_epi16(i4_samp_8x16b_r6_1, i4_samp_8x16b_r6_2);
|
||||
i4_samp_8x16b_r7_1 = _mm_unpacklo_epi16(i4_samp_8x16b_r7_1, i4_samp_8x16b_r7_2);
|
||||
i4_samp_8x16b_r8_1 = _mm_unpacklo_epi16(i4_samp_8x16b_r8_1, i4_samp_8x16b_r8_2);
|
||||
|
||||
// a0c0 + a1c1 a1c0 + a2c1 a2c0 + a3c1 a3c0 + a4c1
|
||||
i4_res_4x32b_r1_0 = _mm_madd_epi16(i4_samp_8x16b_r1_0, coeff_c0_c1_8x16b);
|
||||
// b0c0+b1c1 b1c0+b2c1 b2c0+b3c1 b3c0+b4c1
|
||||
i4_res_4x32b_r2_0 = _mm_madd_epi16(i4_samp_8x16b_r2_0, coeff_c0_c1_8x16b);
|
||||
i4_res_4x32b_r3_0 = _mm_madd_epi16(i4_samp_8x16b_r3_0, coeff_c0_c1_8x16b);
|
||||
i4_res_4x32b_r4_0 = _mm_madd_epi16(i4_samp_8x16b_r4_0, coeff_c0_c1_8x16b);
|
||||
i4_res_4x32b_r5_0 = _mm_madd_epi16(i4_samp_8x16b_r5_0, coeff_c0_c1_8x16b);
|
||||
i4_res_4x32b_r6_0 = _mm_madd_epi16(i4_samp_8x16b_r6_0, coeff_c0_c1_8x16b);
|
||||
i4_res_4x32b_r7_0 = _mm_madd_epi16(i4_samp_8x16b_r7_0, coeff_c0_c1_8x16b);
|
||||
i4_res_4x32b_r8_0 = _mm_madd_epi16(i4_samp_8x16b_r8_0, coeff_c0_c1_8x16b);
|
||||
|
||||
// a1c2+a2c3 a2c2+a3c3 a3c2+a4c3 a4c2+a5c3
|
||||
i4_res_4x32b_r1_1 = _mm_madd_epi16(i4_samp_8x16b_r1_1, coeff_c2_c3_8x16b);
|
||||
// b1c2+b2c3 b2c2+b3c3 b3c2+b4c3 b4c2+b5c3
|
||||
i4_res_4x32b_r2_1 = _mm_madd_epi16(i4_samp_8x16b_r2_1, coeff_c2_c3_8x16b);
|
||||
i4_res_4x32b_r3_1 = _mm_madd_epi16(i4_samp_8x16b_r3_1, coeff_c2_c3_8x16b);
|
||||
i4_res_4x32b_r4_1 = _mm_madd_epi16(i4_samp_8x16b_r4_1, coeff_c2_c3_8x16b);
|
||||
i4_res_4x32b_r5_1 = _mm_madd_epi16(i4_samp_8x16b_r5_1, coeff_c2_c3_8x16b);
|
||||
i4_res_4x32b_r6_1 = _mm_madd_epi16(i4_samp_8x16b_r6_1, coeff_c2_c3_8x16b);
|
||||
i4_res_4x32b_r7_1 = _mm_madd_epi16(i4_samp_8x16b_r7_1, coeff_c2_c3_8x16b);
|
||||
i4_res_4x32b_r8_1 = _mm_madd_epi16(i4_samp_8x16b_r8_1, coeff_c2_c3_8x16b);
|
||||
|
||||
i4_res_4x32b_r1_0 = _mm_add_epi32(i4_res_4x32b_r1_0, res_128);
|
||||
i4_res_4x32b_r2_0 = _mm_add_epi32(i4_res_4x32b_r2_0, res_128);
|
||||
i4_res_4x32b_r3_0 = _mm_add_epi32(i4_res_4x32b_r3_0, res_128);
|
||||
i4_res_4x32b_r4_0 = _mm_add_epi32(i4_res_4x32b_r4_0, res_128);
|
||||
i4_res_4x32b_r5_0 = _mm_add_epi32(i4_res_4x32b_r5_0, res_128);
|
||||
i4_res_4x32b_r6_0 = _mm_add_epi32(i4_res_4x32b_r6_0, res_128);
|
||||
i4_res_4x32b_r7_0 = _mm_add_epi32(i4_res_4x32b_r7_0, res_128);
|
||||
i4_res_4x32b_r8_0 = _mm_add_epi32(i4_res_4x32b_r8_0, res_128);
|
||||
|
||||
i4_res_4x32b_r1_1 = _mm_add_epi32(i4_res_4x32b_r1_1, res_128);
|
||||
i4_res_4x32b_r2_1 = _mm_add_epi32(i4_res_4x32b_r2_1, res_128);
|
||||
i4_res_4x32b_r3_1 = _mm_add_epi32(i4_res_4x32b_r3_1, res_128);
|
||||
i4_res_4x32b_r4_1 = _mm_add_epi32(i4_res_4x32b_r4_1, res_128);
|
||||
i4_res_4x32b_r5_1 = _mm_add_epi32(i4_res_4x32b_r5_1, res_128);
|
||||
i4_res_4x32b_r6_1 = _mm_add_epi32(i4_res_4x32b_r6_1, res_128);
|
||||
i4_res_4x32b_r7_1 = _mm_add_epi32(i4_res_4x32b_r7_1, res_128);
|
||||
i4_res_4x32b_r8_1 = _mm_add_epi32(i4_res_4x32b_r8_1, res_128);
|
||||
|
||||
i4_res_4x32b_r1_0 = _mm_srai_epi32(i4_res_4x32b_r1_0, u4_norm_factor);
|
||||
i4_res_4x32b_r2_0 = _mm_srai_epi32(i4_res_4x32b_r2_0, u4_norm_factor);
|
||||
i4_res_4x32b_r3_0 = _mm_srai_epi32(i4_res_4x32b_r3_0, u4_norm_factor);
|
||||
i4_res_4x32b_r4_0 = _mm_srai_epi32(i4_res_4x32b_r4_0, u4_norm_factor);
|
||||
i4_res_4x32b_r5_0 = _mm_srai_epi32(i4_res_4x32b_r5_0, u4_norm_factor);
|
||||
i4_res_4x32b_r6_0 = _mm_srai_epi32(i4_res_4x32b_r6_0, u4_norm_factor);
|
||||
i4_res_4x32b_r7_0 = _mm_srai_epi32(i4_res_4x32b_r7_0, u4_norm_factor);
|
||||
i4_res_4x32b_r8_0 = _mm_srai_epi32(i4_res_4x32b_r8_0, u4_norm_factor);
|
||||
|
||||
i4_res_4x32b_r1_1 = _mm_srai_epi32(i4_res_4x32b_r1_1, u4_norm_factor);
|
||||
i4_res_4x32b_r2_1 = _mm_srai_epi32(i4_res_4x32b_r2_1, u4_norm_factor);
|
||||
i4_res_4x32b_r3_1 = _mm_srai_epi32(i4_res_4x32b_r3_1, u4_norm_factor);
|
||||
i4_res_4x32b_r4_1 = _mm_srai_epi32(i4_res_4x32b_r4_1, u4_norm_factor);
|
||||
i4_res_4x32b_r5_1 = _mm_srai_epi32(i4_res_4x32b_r5_1, u4_norm_factor);
|
||||
i4_res_4x32b_r6_1 = _mm_srai_epi32(i4_res_4x32b_r6_1, u4_norm_factor);
|
||||
i4_res_4x32b_r7_1 = _mm_srai_epi32(i4_res_4x32b_r7_1, u4_norm_factor);
|
||||
i4_res_4x32b_r8_1 = _mm_srai_epi32(i4_res_4x32b_r8_1, u4_norm_factor);
|
||||
|
||||
i4_res_final_8x16b_r12_0 = _mm_packs_epi32(i4_res_4x32b_r1_0, i4_res_4x32b_r2_0);
|
||||
i4_res_final_8x16b_r34_0 = _mm_packs_epi32(i4_res_4x32b_r3_0, i4_res_4x32b_r4_0);
|
||||
i4_res_final_8x16b_r56_0 = _mm_packs_epi32(i4_res_4x32b_r5_0, i4_res_4x32b_r6_0);
|
||||
i4_res_final_8x16b_r67_0 = _mm_packs_epi32(i4_res_4x32b_r7_0, i4_res_4x32b_r8_0);
|
||||
|
||||
i4_res_final_8x16b_r12_1 = _mm_packs_epi32(i4_res_4x32b_r1_1, i4_res_4x32b_r2_1);
|
||||
i4_res_final_8x16b_r34_1 = _mm_packs_epi32(i4_res_4x32b_r3_1, i4_res_4x32b_r4_1);
|
||||
i4_res_final_8x16b_r56_1 = _mm_packs_epi32(i4_res_4x32b_r5_1, i4_res_4x32b_r6_1);
|
||||
i4_res_final_8x16b_r67_1 = _mm_packs_epi32(i4_res_4x32b_r7_1, i4_res_4x32b_r8_1);
|
||||
|
||||
i4_res_final_8x16b_r1 = _mm_unpacklo_epi16(i4_res_final_8x16b_r12_0, i4_res_final_8x16b_r12_1);
|
||||
i4_res_final_8x16b_r2 = _mm_unpackhi_epi16(i4_res_final_8x16b_r12_0, i4_res_final_8x16b_r12_1);
|
||||
i4_res_final_8x16b_r3 = _mm_unpacklo_epi16(i4_res_final_8x16b_r34_0, i4_res_final_8x16b_r34_1);
|
||||
i4_res_final_8x16b_r4 = _mm_unpackhi_epi16(i4_res_final_8x16b_r34_0, i4_res_final_8x16b_r34_1);
|
||||
i4_res_final_8x16b_r5 = _mm_unpacklo_epi16(i4_res_final_8x16b_r56_0, i4_res_final_8x16b_r56_1);
|
||||
i4_res_final_8x16b_r6 = _mm_unpackhi_epi16(i4_res_final_8x16b_r56_0, i4_res_final_8x16b_r56_1);
|
||||
i4_res_final_8x16b_r7 = _mm_unpacklo_epi16(i4_res_final_8x16b_r67_0, i4_res_final_8x16b_r67_1);
|
||||
i4_res_final_8x16b_r8 = _mm_unpackhi_epi16(i4_res_final_8x16b_r67_0, i4_res_final_8x16b_r67_1);
|
||||
|
||||
chroma_mask = _mm_set1_epi16(0xFF00);
|
||||
chroma_mask2 = _mm_set1_epi16(0x00FF);
|
||||
out_16x8b_r1 = _mm_loadu_si128((__m128i *) (&pu1_out[0]));
|
||||
out_16x8b_r2 = _mm_loadu_si128((__m128i *) (&pu1_out[i4_dst_stride]));
|
||||
out_16x8b_r3 = _mm_loadu_si128((__m128i *) (&pu1_out[i4_dst_stride2]));
|
||||
out_16x8b_r4 = _mm_loadu_si128((__m128i *) (&pu1_out[i4_dst_stride2 + i4_dst_stride]));
|
||||
out_16x8b_r5 = _mm_loadu_si128((__m128i *) (&pu1_out[i4_dst_stride4]));
|
||||
out_16x8b_r6 = _mm_loadu_si128((__m128i *) (&pu1_out[i4_dst_stride4 + i4_dst_stride]));
|
||||
out_16x8b_r7 = _mm_loadu_si128((__m128i *) (&pu1_out[i4_dst_stride4 + i4_dst_stride2]));
|
||||
out_16x8b_r8 =
|
||||
_mm_loadu_si128((__m128i *) (&pu1_out[i4_dst_stride4 + i4_dst_stride2 + i4_dst_stride]));
|
||||
|
||||
out_16x8b_r1 = _mm_and_si128(out_16x8b_r1, chroma_mask);
|
||||
out_16x8b_r2 = _mm_and_si128(out_16x8b_r2, chroma_mask);
|
||||
out_16x8b_r3 = _mm_and_si128(out_16x8b_r3, chroma_mask);
|
||||
out_16x8b_r4 = _mm_and_si128(out_16x8b_r4, chroma_mask);
|
||||
out_16x8b_r5 = _mm_and_si128(out_16x8b_r5, chroma_mask);
|
||||
out_16x8b_r6 = _mm_and_si128(out_16x8b_r6, chroma_mask);
|
||||
out_16x8b_r7 = _mm_and_si128(out_16x8b_r7, chroma_mask);
|
||||
out_16x8b_r8 = _mm_and_si128(out_16x8b_r8, chroma_mask);
|
||||
|
||||
i4_res_final_8x16b_r1 = _mm_and_si128(i4_res_final_8x16b_r1, chroma_mask2);
|
||||
i4_res_final_8x16b_r2 = _mm_and_si128(i4_res_final_8x16b_r2, chroma_mask2);
|
||||
i4_res_final_8x16b_r3 = _mm_and_si128(i4_res_final_8x16b_r3, chroma_mask2);
|
||||
i4_res_final_8x16b_r4 = _mm_and_si128(i4_res_final_8x16b_r4, chroma_mask2);
|
||||
i4_res_final_8x16b_r5 = _mm_and_si128(i4_res_final_8x16b_r5, chroma_mask2);
|
||||
i4_res_final_8x16b_r6 = _mm_and_si128(i4_res_final_8x16b_r6, chroma_mask2);
|
||||
i4_res_final_8x16b_r7 = _mm_and_si128(i4_res_final_8x16b_r7, chroma_mask2);
|
||||
i4_res_final_8x16b_r8 = _mm_and_si128(i4_res_final_8x16b_r8, chroma_mask2);
|
||||
|
||||
out_16x8b_r1 = _mm_add_epi8(i4_res_final_8x16b_r1, out_16x8b_r1);
|
||||
out_16x8b_r2 = _mm_add_epi8(i4_res_final_8x16b_r2, out_16x8b_r2);
|
||||
out_16x8b_r3 = _mm_add_epi8(i4_res_final_8x16b_r3, out_16x8b_r3);
|
||||
out_16x8b_r4 = _mm_add_epi8(i4_res_final_8x16b_r4, out_16x8b_r4);
|
||||
out_16x8b_r5 = _mm_add_epi8(i4_res_final_8x16b_r5, out_16x8b_r5);
|
||||
out_16x8b_r6 = _mm_add_epi8(i4_res_final_8x16b_r6, out_16x8b_r6);
|
||||
out_16x8b_r7 = _mm_add_epi8(i4_res_final_8x16b_r7, out_16x8b_r7);
|
||||
out_16x8b_r8 = _mm_add_epi8(i4_res_final_8x16b_r8, out_16x8b_r8);
|
||||
|
||||
_mm_storeu_si128((__m128i *) pu1_out, out_16x8b_r1);
|
||||
_mm_storeu_si128((__m128i *) (pu1_out + i4_dst_stride), out_16x8b_r2);
|
||||
_mm_storeu_si128((__m128i *) (pu1_out + (i4_dst_stride * 2)), out_16x8b_r3);
|
||||
_mm_storeu_si128((__m128i *) (pu1_out + (i4_dst_stride * 3)), out_16x8b_r4);
|
||||
_mm_storeu_si128((__m128i *) (pu1_out + (i4_dst_stride * 4)), out_16x8b_r5);
|
||||
_mm_storeu_si128((__m128i *) (pu1_out + (i4_dst_stride * 5)), out_16x8b_r6);
|
||||
_mm_storeu_si128((__m128i *) (pu1_out + (i4_dst_stride * 6)), out_16x8b_r7);
|
||||
_mm_storeu_si128((__m128i *) (pu1_out + (i4_dst_stride * 7)), out_16x8b_r8);
|
||||
}
|
||||
548
common/x86/svc/isvc_iquant_itrans_recon_dc_ssse3.c
Normal file
548
common/x86/svc/isvc_iquant_itrans_recon_dc_ssse3.c
Normal file
|
|
@ -0,0 +1,548 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
/**
|
||||
*******************************************************************************
|
||||
* @file
|
||||
* isvc_iquant_itrans_recon_dc_ssse3.c
|
||||
*
|
||||
* @brief
|
||||
* Contains function definitions for inverse quantization, inverse
|
||||
* transform and reconstruction
|
||||
*
|
||||
* @author
|
||||
* Mohit [100664]
|
||||
*
|
||||
* @par List of Functions:
|
||||
* - isvc_iquant_itrans_recon_4x4_dc_ssse3()
|
||||
* - isvc_iquant_itrans_recon_8x8_dc_ssse3()
|
||||
*
|
||||
* @remarks
|
||||
* None
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
#include <immintrin.h>
|
||||
|
||||
#include "ih264_typedefs.h"
|
||||
#include "ih264_debug.h"
|
||||
#include "ih264_defs.h"
|
||||
#include "ih264_trans_macros.h"
|
||||
#include "ih264_macros.h"
|
||||
#include "ih264_platform_macros.h"
|
||||
#include "ih264_trans_data.h"
|
||||
#include "ih264_size_defs.h"
|
||||
#include "isvc_structs.h"
|
||||
#include "isvc_trans_quant_itrans_iquant.h"
|
||||
|
||||
/*
|
||||
********************************************************************************
|
||||
*
|
||||
* @brief This function reconstructs a 4x4 sub block from quantized resiude and
|
||||
* prediction buffer for dc input pattern only, i.e. only the (0,0) element of
|
||||
*the input 4x4 block is non-zero. For complete function, refer
|
||||
*isvc_iquant_itrans_recon_ssse3.c
|
||||
*
|
||||
* @par Description:
|
||||
* The quantized residue is first inverse quantized, then inverse transformed.
|
||||
* This inverse transformed content is added to the prediction buffer to recon-
|
||||
* struct the end output
|
||||
*
|
||||
* @param[in] pi2_src
|
||||
* quantized 4x4 block
|
||||
*
|
||||
* @param[in] pu1_pred
|
||||
* prediction 4x4 block
|
||||
*
|
||||
* @param[out] pu1_out
|
||||
* reconstructed 4x4 block
|
||||
*
|
||||
* @param[in] src_strd
|
||||
* quantization buffer stride
|
||||
*
|
||||
* @param[in] i4_pred_stride,
|
||||
* Prediction buffer stride
|
||||
*
|
||||
* @param[in] i4_out_stride
|
||||
* recon buffer Stride
|
||||
*
|
||||
* @param[in] pu2_scaling_list
|
||||
* pointer to scaling list
|
||||
*
|
||||
* @param[in] pu2_norm_adjust
|
||||
* pointer to inverse scale matrix
|
||||
*
|
||||
* @param[in] u4_qp_div_6
|
||||
* Floor (qp/6)
|
||||
*
|
||||
* @param[in] pi4_tmp
|
||||
* temporary buffer of size 1*16
|
||||
*
|
||||
* @returns none
|
||||
*
|
||||
* @remarks none
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
void isvc_iquant_itrans_recon_4x4_dc_ssse3(buffer_container_t *ps_src, buffer_container_t *ps_pred,
|
||||
buffer_container_t *ps_res_pred,
|
||||
buffer_container_t *ps_res, buffer_container_t *ps_rec,
|
||||
iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants,
|
||||
WORD16 *pi2_tmp, WORD16 *pi2_dc_src,
|
||||
WORD32 i4_iq_start_idx, UWORD8 u1_res_accumulate)
|
||||
{
|
||||
WORD16 *pi2_src = ps_src->pv_data;
|
||||
WORD16 *pi2_res = ps_res->pv_data;
|
||||
WORD16 *pi2_res_pred = ps_res_pred->pv_data;
|
||||
UWORD8 *pu1_pred = ps_pred->pv_data;
|
||||
UWORD8 *pu1_out = ps_rec->pv_data;
|
||||
WORD32 i4_src_stride = ps_src->i4_data_stride;
|
||||
WORD32 i4_res_stride = ps_res->i4_data_stride;
|
||||
WORD32 i4_res_pred_stride = ps_res_pred->i4_data_stride;
|
||||
WORD32 i4_pred_stride = ps_pred->i4_data_stride;
|
||||
WORD32 i4_out_stride = ps_rec->i4_data_stride;
|
||||
const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat;
|
||||
const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat;
|
||||
UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6;
|
||||
UWORD32 *pu4_out = (UWORD32 *) pu1_out;
|
||||
WORD32 q0 = pi2_src[0];
|
||||
WORD16 i_macro, rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
|
||||
|
||||
__m128i predload_r, pred_r0, pred_r1, pred_r2, pred_r3;
|
||||
__m128i sign_reg;
|
||||
__m128i zero_8x16b = _mm_setzero_si128(); // all bits reset to zero
|
||||
__m128i temp4, temp5, temp6, temp7;
|
||||
__m128i value_add;
|
||||
|
||||
UNUSED(pi2_tmp);
|
||||
UNUSED(u1_res_accumulate);
|
||||
UNUSED(i4_src_stride);
|
||||
UNUSED(i4_res_stride);
|
||||
UNUSED(i4_res_pred_stride);
|
||||
UNUSED(pi2_res);
|
||||
UNUSED(pi2_res_pred);
|
||||
UNUSED(i4_iq_start_idx);
|
||||
|
||||
/* Implement residue accumulation */
|
||||
ASSERT(0);
|
||||
|
||||
INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4);
|
||||
|
||||
if(i4_iq_start_idx != 0) q0 = pi2_dc_src[0]; // Restoring dc value for intra case
|
||||
|
||||
i_macro = ((q0 + 32) >> 6);
|
||||
|
||||
value_add = _mm_set1_epi16(i_macro);
|
||||
|
||||
zero_8x16b = _mm_setzero_si128(); // all bits reset to zero
|
||||
// Load pred buffer
|
||||
predload_r = _mm_loadl_epi64((__m128i *) (&pu1_pred[0])); // p00 p01 p02 p03 0 0 0 0 0
|
||||
// 0 0 0 -- all 8 bits
|
||||
pred_r0 = _mm_unpacklo_epi8(predload_r, zero_8x16b); // p00 p01 p02 p03 0 0 0 0 -- all 16 bits
|
||||
predload_r =
|
||||
_mm_loadl_epi64((__m128i *) (&pu1_pred[i4_pred_stride])); // p10 p11 p12 p13 0 0 0 0 0 0
|
||||
// 0 0 -- all 8 bits
|
||||
pred_r1 = _mm_unpacklo_epi8(predload_r, zero_8x16b); // p10 p11 p12 p13 0 0 0 0 -- all 16 bits
|
||||
predload_r =
|
||||
_mm_loadl_epi64((__m128i *) (&pu1_pred[2 * i4_pred_stride])); // p20 p21 p22 p23 0 0 0 0
|
||||
// 0 0 0 0 -- all 8 bits
|
||||
pred_r2 = _mm_unpacklo_epi8(predload_r, zero_8x16b); // p20 p21 p22 p23 0 0 0 0 -- all 16 bits
|
||||
predload_r =
|
||||
_mm_loadl_epi64((__m128i *) (&pu1_pred[3 * i4_pred_stride])); // p30 p31 p32 p33 0 0 0 0
|
||||
// 0 0 0 0 -- all 8 bits
|
||||
pred_r3 = _mm_unpacklo_epi8(predload_r, zero_8x16b); // p30 p31 p32 p33 0 0 0 0 -- all 16 bits
|
||||
|
||||
pred_r0 = _mm_unpacklo_epi64(pred_r0, pred_r1); // p00 p01 p02 p03 p10 p11 p12 p13
|
||||
pred_r2 = _mm_unpacklo_epi64(pred_r2, pred_r3); // p20 p21 p22p p23 p30 p31 p32 p33
|
||||
|
||||
temp4 = _mm_add_epi16(value_add, pred_r0);
|
||||
temp5 = _mm_add_epi16(value_add, pred_r2);
|
||||
/*------------------------------------------------------------------*/
|
||||
// Clipping the results to 8 bits
|
||||
sign_reg = _mm_cmpgt_epi16(temp4, zero_8x16b); // sign check
|
||||
temp4 = _mm_and_si128(temp4, sign_reg);
|
||||
sign_reg = _mm_cmpgt_epi16(temp5, zero_8x16b); // sign check
|
||||
temp5 = _mm_and_si128(temp5, sign_reg);
|
||||
|
||||
temp4 = _mm_packus_epi16(temp4, temp5);
|
||||
temp5 = _mm_srli_si128(temp4, 4);
|
||||
temp6 = _mm_srli_si128(temp5, 4);
|
||||
temp7 = _mm_srli_si128(temp6, 4);
|
||||
|
||||
*pu4_out = _mm_cvtsi128_si32(temp4);
|
||||
pu1_out += i4_out_stride;
|
||||
pu4_out = (UWORD32 *) (pu1_out);
|
||||
*(pu4_out) = _mm_cvtsi128_si32(temp5);
|
||||
pu1_out += i4_out_stride;
|
||||
pu4_out = (UWORD32 *) (pu1_out);
|
||||
*(pu4_out) = _mm_cvtsi128_si32(temp6);
|
||||
pu1_out += i4_out_stride;
|
||||
pu4_out = (UWORD32 *) (pu1_out);
|
||||
*(pu4_out) = _mm_cvtsi128_si32(temp7);
|
||||
}
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* This function performs inverse quant and Inverse transform type Ci4 for 8x8
|
||||
*block for dc input pattern only, i.e. only the (0,0) element of the input 8x8
|
||||
*block is non-zero. For complete function, refer
|
||||
*isvc_iquant_itrans_recon_ssse3.c
|
||||
*
|
||||
* @par Description:
|
||||
* Performs inverse transform Ci8 and adds the residue to get the
|
||||
* reconstructed block
|
||||
*
|
||||
* @param[in] pi2_src
|
||||
* Input 8x8coefficients
|
||||
*
|
||||
* @param[in] pu1_pred
|
||||
* Prediction 8x8 block
|
||||
*
|
||||
* @param[out] pu1_recon
|
||||
* Output 8x8 block
|
||||
*
|
||||
* @param[in] q_div
|
||||
* QP/6
|
||||
*
|
||||
* @param[in] q_rem
|
||||
* QP%6
|
||||
*
|
||||
* @param[in] q_lev
|
||||
* Quantizer level
|
||||
*
|
||||
* @param[in] u4_src_stride
|
||||
* Input stride
|
||||
*
|
||||
* @param[in] u4_pred_stride,
|
||||
* Prediction stride
|
||||
*
|
||||
* @param[in] u4_out_stride
|
||||
* Output Stride
|
||||
*
|
||||
* @param[in] pi4_tmp
|
||||
* temporary buffer of size 1*64
|
||||
* the tmp for each block
|
||||
*
|
||||
* @param[in] pu4_iquant_mat
|
||||
* Pointer to the inverse quantization matrix
|
||||
*
|
||||
* @returns Void
|
||||
*
|
||||
* @remarks
|
||||
* None
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
void isvc_iquant_itrans_recon_8x8_dc_ssse3(buffer_container_t *ps_src, buffer_container_t *ps_pred,
|
||||
buffer_container_t *ps_res_pred,
|
||||
buffer_container_t *ps_res, buffer_container_t *ps_rec,
|
||||
iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants,
|
||||
WORD16 *pi2_tmp, WORD16 *pi2_dc_src,
|
||||
WORD32 i4_iq_start_idx, UWORD8 u1_res_accumulate)
|
||||
{
|
||||
WORD16 *pi2_src = ps_src->pv_data;
|
||||
WORD16 *pi2_res = ps_res->pv_data;
|
||||
WORD16 *pi2_res_pred = ps_res_pred->pv_data;
|
||||
UWORD8 *pu1_pred = ps_pred->pv_data;
|
||||
UWORD8 *pu1_out = ps_rec->pv_data;
|
||||
WORD32 i4_src_stride = ps_src->i4_data_stride;
|
||||
WORD32 i4_res_stride = ps_res->i4_data_stride;
|
||||
WORD32 i4_res_pred_stride = ps_res_pred->i4_data_stride;
|
||||
WORD32 i4_pred_stride = ps_pred->i4_data_stride;
|
||||
WORD32 i4_out_stride = ps_rec->i4_data_stride;
|
||||
const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat;
|
||||
const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat;
|
||||
UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6;
|
||||
WORD32 q0 = pi2_src[0];
|
||||
WORD16 i_macro, rnd_fact = (u4_qp_div_6 < 6) ? 1 << (5 - u4_qp_div_6) : 0;
|
||||
|
||||
__m128i predload_r, pred_r0, pred_r1, pred_r2, pred_r3, pred_r4, pred_r5, pred_r6, pred_r7;
|
||||
__m128i sign_reg;
|
||||
__m128i zero_8x16b = _mm_setzero_si128(); // all bits reset to zero
|
||||
__m128i temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
|
||||
__m128i value_add;
|
||||
|
||||
UNUSED(pi2_tmp);
|
||||
UNUSED(pi2_dc_src);
|
||||
UNUSED(u1_res_accumulate);
|
||||
UNUSED(i4_src_stride);
|
||||
UNUSED(i4_res_stride);
|
||||
UNUSED(i4_res_pred_stride);
|
||||
UNUSED(pi2_res);
|
||||
UNUSED(pi2_res_pred);
|
||||
UNUSED(i4_iq_start_idx);
|
||||
|
||||
/* Implement residue accumulation */
|
||||
ASSERT(0);
|
||||
|
||||
INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 6);
|
||||
i_macro = ((q0 + 32) >> 6);
|
||||
|
||||
value_add = _mm_set1_epi16(i_macro);
|
||||
|
||||
// Load pred buffer row 0
|
||||
predload_r =
|
||||
_mm_loadl_epi64((__m128i *) (&pu1_pred[0])); // p0 p1 p2 p3 p4 p5 p6 p7 0 0 0 0 0 0 0 0
|
||||
// -- all 8 bits
|
||||
pred_r0 = _mm_unpacklo_epi8(predload_r, zero_8x16b); // p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits
|
||||
// Load pred buffer row 1
|
||||
predload_r =
|
||||
_mm_loadl_epi64((__m128i *) (&pu1_pred[i4_pred_stride])); // p0 p1 p2 p3 p4 p5 p6 p7 0 0
|
||||
// 0 0 0 0 0 0 -- all 8 bits
|
||||
pred_r1 = _mm_unpacklo_epi8(predload_r, zero_8x16b); // p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits
|
||||
// Load pred buffer row 2
|
||||
predload_r = _mm_loadl_epi64(
|
||||
(__m128i *) (&pu1_pred[2 * i4_pred_stride])); // p0 p1 p2 p3 p4 p5 p6 p7 0 0
|
||||
// 0 0 0 0 0 0 -- all 8 bits
|
||||
pred_r2 = _mm_unpacklo_epi8(predload_r, zero_8x16b); // p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits
|
||||
// Load pred buffer row 3
|
||||
predload_r = _mm_loadl_epi64(
|
||||
(__m128i *) (&pu1_pred[3 * i4_pred_stride])); // p0 p1 p2 p3 p4 p5 p6 p7 0 0
|
||||
// 0 0 0 0 0 0 -- all 8 bits
|
||||
pred_r3 = _mm_unpacklo_epi8(predload_r, zero_8x16b); // p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits
|
||||
// Load pred buffer row 4
|
||||
predload_r = _mm_loadl_epi64(
|
||||
(__m128i *) (&pu1_pred[4 * i4_pred_stride])); // p0 p1 p2 p3 p4 p5 p6 p7 0 0
|
||||
// 0 0 0 0 0 0 -- all 8 bits
|
||||
pred_r4 = _mm_unpacklo_epi8(predload_r, zero_8x16b); // p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits
|
||||
// Load pred buffer row 5
|
||||
predload_r =
|
||||
_mm_loadl_epi64((__m128i *) (&pu1_pred[5 * i4_pred_stride])); // p0 p1 p2 p3 p4 p5 p6 p7 0
|
||||
// 0 0 0 0 0 0 0 -- all 8 bit
|
||||
pred_r5 = _mm_unpacklo_epi8(predload_r, zero_8x16b); // p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits
|
||||
// Load pred buffer row 6
|
||||
predload_r = _mm_loadl_epi64(
|
||||
(__m128i *) (&pu1_pred[6 * i4_pred_stride])); // p0 p1 p2 p3 p4 p5 p6 p7 0 0
|
||||
// 0 0 0 0 0 0 -- all 8 bits
|
||||
pred_r6 = _mm_unpacklo_epi8(predload_r, zero_8x16b); // p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits
|
||||
// Load pred buffer row 7
|
||||
predload_r = _mm_loadl_epi64(
|
||||
(__m128i *) (&pu1_pred[7 * i4_pred_stride])); // p0 p1 p2 p3 p4 p5 p6 p7 0 0
|
||||
// 0 0 0 0 0 0 -- all 8 bits
|
||||
pred_r7 = _mm_unpacklo_epi8(predload_r, zero_8x16b); // p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits
|
||||
|
||||
temp1 = _mm_add_epi16(value_add, pred_r0);
|
||||
|
||||
temp2 = _mm_add_epi16(value_add, pred_r1);
|
||||
|
||||
temp3 = _mm_add_epi16(value_add, pred_r2);
|
||||
|
||||
temp4 = _mm_add_epi16(value_add, pred_r3);
|
||||
|
||||
temp5 = _mm_add_epi16(value_add, pred_r4);
|
||||
|
||||
temp6 = _mm_add_epi16(value_add, pred_r5);
|
||||
|
||||
temp7 = _mm_add_epi16(value_add, pred_r6);
|
||||
|
||||
temp8 = _mm_add_epi16(value_add, pred_r7);
|
||||
/*------------------------------------------------------------------*/
|
||||
// Clipping the results to 8 bits
|
||||
sign_reg = _mm_cmpgt_epi16(temp1, zero_8x16b); // sign check
|
||||
temp1 = _mm_and_si128(temp1, sign_reg);
|
||||
sign_reg = _mm_cmpgt_epi16(temp2, zero_8x16b); // sign check
|
||||
temp2 = _mm_and_si128(temp2, sign_reg);
|
||||
sign_reg = _mm_cmpgt_epi16(temp3, zero_8x16b); // sign check
|
||||
temp3 = _mm_and_si128(temp3, sign_reg);
|
||||
sign_reg = _mm_cmpgt_epi16(temp4, zero_8x16b); // sign check
|
||||
temp4 = _mm_and_si128(temp4, sign_reg);
|
||||
sign_reg = _mm_cmpgt_epi16(temp5, zero_8x16b); // sign check
|
||||
temp5 = _mm_and_si128(temp5, sign_reg);
|
||||
sign_reg = _mm_cmpgt_epi16(temp6, zero_8x16b); // sign check
|
||||
temp6 = _mm_and_si128(temp6, sign_reg);
|
||||
sign_reg = _mm_cmpgt_epi16(temp7, zero_8x16b); // sign check
|
||||
temp7 = _mm_and_si128(temp7, sign_reg);
|
||||
sign_reg = _mm_cmpgt_epi16(temp8, zero_8x16b); // sign check
|
||||
temp8 = _mm_and_si128(temp8, sign_reg);
|
||||
|
||||
temp1 = _mm_packus_epi16(temp1, zero_8x16b);
|
||||
temp2 = _mm_packus_epi16(temp2, zero_8x16b);
|
||||
temp3 = _mm_packus_epi16(temp3, zero_8x16b);
|
||||
temp4 = _mm_packus_epi16(temp4, zero_8x16b);
|
||||
temp5 = _mm_packus_epi16(temp5, zero_8x16b);
|
||||
temp6 = _mm_packus_epi16(temp6, zero_8x16b);
|
||||
temp7 = _mm_packus_epi16(temp7, zero_8x16b);
|
||||
temp8 = _mm_packus_epi16(temp8, zero_8x16b);
|
||||
|
||||
_mm_storel_epi64((__m128i *) (&pu1_out[0]), temp1);
|
||||
_mm_storel_epi64((__m128i *) (&pu1_out[i4_out_stride]), temp2);
|
||||
_mm_storel_epi64((__m128i *) (&pu1_out[2 * i4_out_stride]), temp3);
|
||||
_mm_storel_epi64((__m128i *) (&pu1_out[3 * i4_out_stride]), temp4);
|
||||
_mm_storel_epi64((__m128i *) (&pu1_out[4 * i4_out_stride]), temp5);
|
||||
_mm_storel_epi64((__m128i *) (&pu1_out[5 * i4_out_stride]), temp6);
|
||||
_mm_storel_epi64((__m128i *) (&pu1_out[6 * i4_out_stride]), temp7);
|
||||
_mm_storel_epi64((__m128i *) (&pu1_out[7 * i4_out_stride]), temp8);
|
||||
}
|
||||
|
||||
/*
|
||||
********************************************************************************
|
||||
*
|
||||
* @brief This function reconstructs a 4x4 sub block from quantized chroma
|
||||
*resiude and prediction buffer
|
||||
*
|
||||
* @par Description:
|
||||
* The quantized residue is first inverse quantized, then inverse transformed.
|
||||
* This inverse transformed content is added to the prediction buffer to recon-
|
||||
* struct the end output
|
||||
*
|
||||
* @param[in] pi2_src
|
||||
* quantized 4x4 block
|
||||
*
|
||||
* @param[in] pu1_pred
|
||||
* prediction 4x4 block
|
||||
*
|
||||
* @param[out] pu1_out
|
||||
* reconstructed 4x4 block
|
||||
*
|
||||
* @param[in] src_strd
|
||||
* quantization buffer stride
|
||||
*
|
||||
* @param[in] i4_pred_stride,
|
||||
* Prediction buffer stride
|
||||
*
|
||||
* @param[in] i4_out_stride
|
||||
* recon buffer Stride
|
||||
*
|
||||
* @param[in] pu2_scaling_list
|
||||
* pointer to scaling list
|
||||
*
|
||||
* @param[in] pu2_norm_adjust
|
||||
* pointer to inverse scale matrix
|
||||
*
|
||||
* @param[in] u4_qp_div_6
|
||||
* Floor (qp/6)
|
||||
*
|
||||
* @param[in] pi4_tmp
|
||||
* temporary buffer of size 1*16
|
||||
*
|
||||
* @returns none
|
||||
*
|
||||
* @remarks none
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
void isvc_iquant_itrans_recon_chroma_4x4_dc_ssse3(
|
||||
buffer_container_t *ps_src, buffer_container_t *ps_pred, buffer_container_t *ps_res_pred,
|
||||
buffer_container_t *ps_res, buffer_container_t *ps_rec,
|
||||
iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants, WORD16 *pi2_tmp, WORD16 *pi2_dc_src,
|
||||
WORD32 i4_iq_start_idx, UWORD8 u1_res_accumulate)
|
||||
{
|
||||
WORD16 *pi2_src = ps_src->pv_data;
|
||||
WORD16 *pi2_res = ps_res->pv_data;
|
||||
WORD16 *pi2_res_pred = ps_res_pred->pv_data;
|
||||
UWORD8 *pu1_pred = ps_pred->pv_data;
|
||||
UWORD8 *pu1_out = ps_rec->pv_data;
|
||||
WORD32 i4_src_stride = ps_src->i4_data_stride;
|
||||
WORD32 i4_res_stride = ps_res->i4_data_stride;
|
||||
WORD32 i4_res_pred_stride = ps_res_pred->i4_data_stride;
|
||||
WORD32 i4_pred_stride = ps_pred->i4_data_stride;
|
||||
WORD32 i4_out_stride = ps_rec->i4_data_stride;
|
||||
const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat;
|
||||
const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat;
|
||||
UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6;
|
||||
WORD16 q0 = pi2_dc_src[0]; // DC value won't be dequantized for chroma
|
||||
// inverse transform
|
||||
WORD16 i_macro = ((q0 + 32) >> 6);
|
||||
|
||||
__m128i pred_r0, pred_r1, pred_r2, pred_r3, sign_reg;
|
||||
__m128i zero_8x16b = _mm_setzero_si128(); // all bits reset to zero
|
||||
__m128i chroma_mask = _mm_set1_epi16(0xFF);
|
||||
__m128i value_add = _mm_set1_epi16(i_macro);
|
||||
__m128i out_r0, out_r1, out_r2, out_r3;
|
||||
|
||||
UNUSED(pi2_src);
|
||||
UNUSED(pu2_iscal_mat);
|
||||
UNUSED(pu2_weigh_mat);
|
||||
UNUSED(u4_qp_div_6);
|
||||
UNUSED(pi2_tmp);
|
||||
UNUSED(u1_res_accumulate);
|
||||
UNUSED(i4_src_stride);
|
||||
UNUSED(i4_res_stride);
|
||||
UNUSED(i4_res_pred_stride);
|
||||
UNUSED(pi2_res);
|
||||
UNUSED(pi2_res_pred);
|
||||
UNUSED(i4_iq_start_idx);
|
||||
|
||||
/* Implement residue accumulation */
|
||||
ASSERT(0);
|
||||
|
||||
// Load pred buffer
|
||||
pred_r0 = _mm_loadl_epi64((__m128i *) (&pu1_pred[0])); // p00 p01 p02 p03 0 0 0 0 0
|
||||
// 0 0 0 -- all 8 bits
|
||||
pred_r1 = _mm_loadl_epi64((__m128i *) (&pu1_pred[i4_pred_stride])); // p10 p11 p12 p13 0 0 0 0
|
||||
// 0 0 0 0 -- all 8 bits
|
||||
pred_r2 =
|
||||
_mm_loadl_epi64((__m128i *) (&pu1_pred[2 * i4_pred_stride])); // p20 p21 p22 p23 0 0 0 0
|
||||
// 0 0 0 0 -- all 8 bits
|
||||
pred_r3 =
|
||||
_mm_loadl_epi64((__m128i *) (&pu1_pred[3 * i4_pred_stride])); // p30 p31 p32 p33 0 0 0 0
|
||||
// 0 0 0 0 -- all 8 bits
|
||||
|
||||
pred_r0 = _mm_and_si128(pred_r0, chroma_mask);
|
||||
pred_r1 = _mm_and_si128(pred_r1, chroma_mask);
|
||||
pred_r2 = _mm_and_si128(pred_r2, chroma_mask);
|
||||
pred_r3 = _mm_and_si128(pred_r3, chroma_mask);
|
||||
|
||||
pred_r0 = _mm_unpacklo_epi64(pred_r0, pred_r1); // p00 p01 p02 p03 p10 p11 p12 p13
|
||||
pred_r2 = _mm_unpacklo_epi64(pred_r2, pred_r3); // p20 p21 p22p p23 p30 p31 p32 p33
|
||||
|
||||
pred_r0 = _mm_add_epi16(value_add, pred_r0);
|
||||
pred_r2 = _mm_add_epi16(value_add, pred_r2);
|
||||
|
||||
/*------------------------------------------------------------------*/
|
||||
// Clipping the results to 8 bits
|
||||
sign_reg = _mm_cmpgt_epi16(pred_r0, zero_8x16b); // sign check
|
||||
pred_r0 = _mm_and_si128(pred_r0, sign_reg);
|
||||
sign_reg = _mm_cmpgt_epi16(pred_r2, zero_8x16b);
|
||||
pred_r2 = _mm_and_si128(pred_r2, sign_reg);
|
||||
|
||||
pred_r0 = _mm_packus_epi16(pred_r0, pred_r2);
|
||||
pred_r1 = _mm_srli_si128(pred_r0, 4);
|
||||
pred_r2 = _mm_srli_si128(pred_r1, 4);
|
||||
pred_r3 = _mm_srli_si128(pred_r2, 4);
|
||||
|
||||
pred_r0 = _mm_unpacklo_epi8(pred_r0, zero_8x16b); // p00 p01 p02 p03 -- all 16 bits
|
||||
pred_r1 = _mm_unpacklo_epi8(pred_r1, zero_8x16b); // p10 p11 p12 p13 -- all 16 bits
|
||||
pred_r2 = _mm_unpacklo_epi8(pred_r2, zero_8x16b); // p20 p21 p22 p23 -- all 16 bits
|
||||
pred_r3 = _mm_unpacklo_epi8(pred_r3, zero_8x16b); // p30 p31 p32 p33 -- all 16 bits
|
||||
|
||||
chroma_mask = _mm_set1_epi16(0xFF00);
|
||||
out_r0 = _mm_loadl_epi64((__m128i *) (&pu1_out[0]));
|
||||
out_r1 = _mm_loadl_epi64((__m128i *) (&pu1_out[i4_out_stride]));
|
||||
out_r2 = _mm_loadl_epi64((__m128i *) (&pu1_out[2 * i4_out_stride]));
|
||||
out_r3 = _mm_loadl_epi64((__m128i *) (&pu1_out[3 * i4_out_stride]));
|
||||
|
||||
out_r0 = _mm_and_si128(out_r0, chroma_mask);
|
||||
out_r1 = _mm_and_si128(out_r1, chroma_mask);
|
||||
out_r2 = _mm_and_si128(out_r2, chroma_mask);
|
||||
out_r3 = _mm_and_si128(out_r3, chroma_mask);
|
||||
|
||||
out_r0 = _mm_add_epi8(out_r0, pred_r0);
|
||||
out_r1 = _mm_add_epi8(out_r1, pred_r1);
|
||||
out_r2 = _mm_add_epi8(out_r2, pred_r2);
|
||||
out_r3 = _mm_add_epi8(out_r3, pred_r3);
|
||||
|
||||
_mm_storel_epi64((__m128i *) (&pu1_out[0]), out_r0);
|
||||
_mm_storel_epi64((__m128i *) (&pu1_out[i4_out_stride]), out_r1);
|
||||
_mm_storel_epi64((__m128i *) (&pu1_out[2 * i4_out_stride]), out_r2);
|
||||
_mm_storel_epi64((__m128i *) (&pu1_out[3 * i4_out_stride]), out_r3);
|
||||
}
|
||||
2849
common/x86/svc/isvc_iquant_itrans_recon_sse42.c
Normal file
2849
common/x86/svc/isvc_iquant_itrans_recon_sse42.c
Normal file
File diff suppressed because it is too large
Load diff
1291
common/x86/svc/isvc_iquant_itrans_recon_ssse3.c
Normal file
1291
common/x86/svc/isvc_iquant_itrans_recon_ssse3.c
Normal file
File diff suppressed because it is too large
Load diff
157
common/x86/svc/isvc_mem_fns_sse42.c
Normal file
157
common/x86/svc/isvc_mem_fns_sse42.c
Normal file
|
|
@ -0,0 +1,157 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
/**
|
||||
|
||||
* *******************************************************************************
|
||||
|
||||
* * @file
|
||||
* isvc_mem_fns_sse42.c
|
||||
*
|
||||
* @brief
|
||||
* SSE4.2 variants of
|
||||
* functions used for memory operations
|
||||
*
|
||||
|
||||
* *******************************************************************************
|
||||
|
||||
*/
|
||||
#include <string.h>
|
||||
#include <immintrin.h>
|
||||
|
||||
#include "ih264_typedefs.h"
|
||||
#include "isvc_mem_fns.h"
|
||||
|
||||
void isvc_memset_2d_sse42(UWORD8 *pu1_dst, WORD32 i4_dst_stride, UWORD8 u1_val, WORD32 i4_blk_wd,
|
||||
WORD32 i4_blk_ht)
|
||||
{
|
||||
WORD32 i, j;
|
||||
|
||||
if((i4_blk_wd == 4) && (i4_blk_ht == 4))
|
||||
{
|
||||
*((WORD32 *) (pu1_dst)) = _mm_cvtsi128_si32(_mm_set1_epi8(u1_val));
|
||||
pu1_dst += i4_dst_stride;
|
||||
|
||||
*((WORD32 *) (pu1_dst)) = _mm_cvtsi128_si32(_mm_set1_epi8(u1_val));
|
||||
pu1_dst += i4_dst_stride;
|
||||
|
||||
*((WORD32 *) (pu1_dst)) = _mm_cvtsi128_si32(_mm_set1_epi8(u1_val));
|
||||
pu1_dst += i4_dst_stride;
|
||||
|
||||
*((WORD32 *) (pu1_dst)) = _mm_cvtsi128_si32(_mm_set1_epi8(u1_val));
|
||||
}
|
||||
else if((i4_blk_wd == 8) && (i4_blk_ht == 8))
|
||||
{
|
||||
_mm_storel_epi64((__m128i *) (&pu1_dst[0]), _mm_set1_epi8(u1_val));
|
||||
pu1_dst += i4_dst_stride;
|
||||
|
||||
_mm_storel_epi64((__m128i *) (&pu1_dst[0]), _mm_set1_epi8(u1_val));
|
||||
pu1_dst += i4_dst_stride;
|
||||
|
||||
_mm_storel_epi64((__m128i *) (&pu1_dst[0]), _mm_set1_epi8(u1_val));
|
||||
pu1_dst += i4_dst_stride;
|
||||
|
||||
_mm_storel_epi64((__m128i *) (&pu1_dst[0]), _mm_set1_epi8(u1_val));
|
||||
pu1_dst += i4_dst_stride;
|
||||
|
||||
_mm_storel_epi64((__m128i *) (&pu1_dst[0]), _mm_set1_epi8(u1_val));
|
||||
pu1_dst += i4_dst_stride;
|
||||
|
||||
_mm_storel_epi64((__m128i *) (&pu1_dst[0]), _mm_set1_epi8(u1_val));
|
||||
pu1_dst += i4_dst_stride;
|
||||
|
||||
_mm_storel_epi64((__m128i *) (&pu1_dst[0]), _mm_set1_epi8(u1_val));
|
||||
pu1_dst += i4_dst_stride;
|
||||
|
||||
_mm_storel_epi64((__m128i *) (&pu1_dst[0]), _mm_set1_epi8(u1_val));
|
||||
}
|
||||
else if((i4_blk_wd % 16 == 0) && (i4_blk_ht % 16 == 0))
|
||||
{
|
||||
UWORD8 *pu1_dst_col_ptr, *pu1_dst_row_ptr;
|
||||
|
||||
WORD32 i4_width_by_16 = i4_blk_wd / 16;
|
||||
WORD32 i4_height_by_16 = i4_blk_ht / 16;
|
||||
|
||||
for(i = 0; i < i4_height_by_16; i++)
|
||||
{
|
||||
pu1_dst_row_ptr = pu1_dst + i * 16 * i4_dst_stride;
|
||||
|
||||
for(j = 0; j < i4_width_by_16; j++)
|
||||
{
|
||||
pu1_dst_col_ptr = pu1_dst_row_ptr + (j << 4);
|
||||
|
||||
_mm_storeu_si128((__m128i *) (&pu1_dst_col_ptr[0]), _mm_set1_epi8(u1_val));
|
||||
pu1_dst_col_ptr += i4_dst_stride;
|
||||
|
||||
_mm_storeu_si128((__m128i *) (&pu1_dst_col_ptr[0]), _mm_set1_epi8(u1_val));
|
||||
pu1_dst_col_ptr += i4_dst_stride;
|
||||
|
||||
_mm_storeu_si128((__m128i *) (&pu1_dst_col_ptr[0]), _mm_set1_epi8(u1_val));
|
||||
pu1_dst_col_ptr += i4_dst_stride;
|
||||
|
||||
_mm_storeu_si128((__m128i *) (&pu1_dst_col_ptr[0]), _mm_set1_epi8(u1_val));
|
||||
pu1_dst_col_ptr += i4_dst_stride;
|
||||
|
||||
_mm_storeu_si128((__m128i *) (&pu1_dst_col_ptr[0]), _mm_set1_epi8(u1_val));
|
||||
pu1_dst_col_ptr += i4_dst_stride;
|
||||
|
||||
_mm_storeu_si128((__m128i *) (&pu1_dst_col_ptr[0]), _mm_set1_epi8(u1_val));
|
||||
pu1_dst_col_ptr += i4_dst_stride;
|
||||
|
||||
_mm_storeu_si128((__m128i *) (&pu1_dst_col_ptr[0]), _mm_set1_epi8(u1_val));
|
||||
pu1_dst_col_ptr += i4_dst_stride;
|
||||
|
||||
_mm_storeu_si128((__m128i *) (&pu1_dst_col_ptr[0]), _mm_set1_epi8(u1_val));
|
||||
pu1_dst_col_ptr += i4_dst_stride;
|
||||
|
||||
_mm_storeu_si128((__m128i *) (&pu1_dst_col_ptr[0]), _mm_set1_epi8(u1_val));
|
||||
pu1_dst_col_ptr += i4_dst_stride;
|
||||
|
||||
_mm_storeu_si128((__m128i *) (&pu1_dst_col_ptr[0]), _mm_set1_epi8(u1_val));
|
||||
pu1_dst_col_ptr += i4_dst_stride;
|
||||
|
||||
_mm_storeu_si128((__m128i *) (&pu1_dst_col_ptr[0]), _mm_set1_epi8(u1_val));
|
||||
pu1_dst_col_ptr += i4_dst_stride;
|
||||
|
||||
_mm_storeu_si128((__m128i *) (&pu1_dst_col_ptr[0]), _mm_set1_epi8(u1_val));
|
||||
pu1_dst_col_ptr += i4_dst_stride;
|
||||
|
||||
_mm_storeu_si128((__m128i *) (&pu1_dst_col_ptr[0]), _mm_set1_epi8(u1_val));
|
||||
pu1_dst_col_ptr += i4_dst_stride;
|
||||
|
||||
_mm_storeu_si128((__m128i *) (&pu1_dst_col_ptr[0]), _mm_set1_epi8(u1_val));
|
||||
pu1_dst_col_ptr += i4_dst_stride;
|
||||
|
||||
_mm_storeu_si128((__m128i *) (&pu1_dst_col_ptr[0]), _mm_set1_epi8(u1_val));
|
||||
pu1_dst_col_ptr += i4_dst_stride;
|
||||
|
||||
_mm_storeu_si128((__m128i *) (&pu1_dst_col_ptr[0]), _mm_set1_epi8(u1_val));
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for(i = 0; i < i4_blk_ht; i++)
|
||||
{
|
||||
memset(pu1_dst, u1_val, i4_blk_wd);
|
||||
|
||||
pu1_dst += i4_dst_stride;
|
||||
}
|
||||
}
|
||||
}
|
||||
435
common/x86/svc/isvc_mem_fns_ssse3.c
Normal file
435
common/x86/svc/isvc_mem_fns_ssse3.c
Normal file
|
|
@ -0,0 +1,435 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
/**
|
||||
*******************************************************************************
|
||||
* @file
|
||||
* isvc_mem_fns_atom_intr.c
|
||||
*
|
||||
* @brief
|
||||
* Functions used for memory operations
|
||||
*
|
||||
* @author
|
||||
* Ittiam
|
||||
*
|
||||
* @par List of Functions:
|
||||
*
|
||||
* @remarks
|
||||
* None
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
/*****************************************************************************/
|
||||
/* File Includes */
|
||||
/*****************************************************************************/
|
||||
#include <stdio.h>
|
||||
#include <stddef.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "ih264_typedefs.h"
|
||||
#include "isvc_mem_fns.h"
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
/**
|
||||
********************************************************************************
|
||||
* @brief copies a 2d blk from one location to another
|
||||
*
|
||||
* @param[out] pu1_dst : dst pointer
|
||||
*
|
||||
* @param[in] i4_dst_stride: stride of destination
|
||||
*
|
||||
* @param[in] pu1_src : src ptr
|
||||
*
|
||||
* @param[in] i4_src_stride: stride of src
|
||||
*
|
||||
* @param[in] i4_blk_wd : blk width
|
||||
*
|
||||
* @param[in] i4_blk_ht : blk height
|
||||
*
|
||||
* @return void
|
||||
********************************************************************************
|
||||
*/
|
||||
void isvc_copy_2d_ssse3(UWORD8 *pu1_dst, WORD32 i4_dst_stride, UWORD8 *pu1_src,
|
||||
WORD32 i4_src_stride, WORD32 i4_blk_wd, WORD32 i4_blk_ht)
|
||||
{
|
||||
WORD32 i, j;
|
||||
/* all 128 bit registers are named with a suffix mxnb, where m is the */
|
||||
/* number of n bits packed in the register */
|
||||
|
||||
if(0 == (i4_blk_wd & 31)) /* wd multiple of 32 case */
|
||||
{
|
||||
__m128i src0_16x8b, src1_16x8b, src2_16x8b, src3_16x8b;
|
||||
__m128i src4_16x8b, src5_16x8b, src6_16x8b, src7_16x8b;
|
||||
|
||||
if(0 == (i4_blk_ht & 7)) /* ht multiple of 8 case */
|
||||
{
|
||||
__m128i src8_16x8b, src9_16x8b, src10_16x8b, src11_16x8b;
|
||||
__m128i src12_16x8b, src13_16x8b, src14_16x8b, src15_16x8b;
|
||||
|
||||
for(i = 0; i < i4_blk_ht; i += 8)
|
||||
{
|
||||
for(j = 0; j < i4_blk_wd; j += 32)
|
||||
{
|
||||
src0_16x8b =
|
||||
_mm_loadu_si128((__m128i *) (pu1_src)); // i = 0
|
||||
src1_16x8b =
|
||||
_mm_loadu_si128((__m128i *) (pu1_src + i4_src_stride)); // i = 1
|
||||
src2_16x8b =
|
||||
_mm_loadu_si128((__m128i *) (pu1_src + 2 * i4_src_stride)); // i = 2
|
||||
src3_16x8b =
|
||||
_mm_loadu_si128((__m128i *) (pu1_src + 3 * i4_src_stride)); // i = 3
|
||||
src4_16x8b =
|
||||
_mm_loadu_si128((__m128i *) (pu1_src + 4 * i4_src_stride)); // i = 4
|
||||
src5_16x8b =
|
||||
_mm_loadu_si128((__m128i *) (pu1_src + 5 * i4_src_stride)); // i = 5
|
||||
src6_16x8b =
|
||||
_mm_loadu_si128((__m128i *) (pu1_src + 6 * i4_src_stride)); // i = 6
|
||||
src7_16x8b =
|
||||
_mm_loadu_si128((__m128i *) (pu1_src + 7 * i4_src_stride)); // i = 7
|
||||
/* Add 16 as offset */
|
||||
src8_16x8b =
|
||||
_mm_loadu_si128((__m128i *) (pu1_src + 16)); // i = 0
|
||||
src9_16x8b =
|
||||
_mm_loadu_si128((__m128i *) (pu1_src + i4_src_stride + 16)); // i = 1
|
||||
src10_16x8b =
|
||||
_mm_loadu_si128((__m128i *) (pu1_src + 2 * i4_src_stride + 16)); // i = 2
|
||||
src11_16x8b =
|
||||
_mm_loadu_si128((__m128i *) (pu1_src + 3 * i4_src_stride + 16)); // i = 3
|
||||
src12_16x8b =
|
||||
_mm_loadu_si128((__m128i *) (pu1_src + 4 * i4_src_stride + 16)); // i = 4
|
||||
src13_16x8b =
|
||||
_mm_loadu_si128((__m128i *) (pu1_src + 5 * i4_src_stride + 16)); // i = 5
|
||||
src14_16x8b =
|
||||
_mm_loadu_si128((__m128i *) (pu1_src + 6 * i4_src_stride + 16)); // i = 6
|
||||
src15_16x8b =
|
||||
_mm_loadu_si128((__m128i *) (pu1_src + 7 * i4_src_stride + 16)); // i = 7
|
||||
|
||||
_mm_storeu_si128((__m128i *) (pu1_dst), src0_16x8b);
|
||||
_mm_storeu_si128((__m128i *) (pu1_dst + i4_dst_stride), src1_16x8b);
|
||||
_mm_storeu_si128((__m128i *) (pu1_dst + 2 * i4_dst_stride), src2_16x8b);
|
||||
_mm_storeu_si128((__m128i *) (pu1_dst + 3 * i4_dst_stride), src3_16x8b);
|
||||
_mm_storeu_si128((__m128i *) (pu1_dst + 4 * i4_dst_stride), src4_16x8b);
|
||||
_mm_storeu_si128((__m128i *) (pu1_dst + 5 * i4_dst_stride), src5_16x8b);
|
||||
_mm_storeu_si128((__m128i *) (pu1_dst + 6 * i4_dst_stride), src6_16x8b);
|
||||
_mm_storeu_si128((__m128i *) (pu1_dst + 7 * i4_dst_stride), src7_16x8b);
|
||||
|
||||
_mm_storeu_si128((__m128i *) (pu1_dst + 16), src8_16x8b);
|
||||
_mm_storeu_si128((__m128i *) (pu1_dst + i4_dst_stride + 16), src9_16x8b);
|
||||
_mm_storeu_si128((__m128i *) (pu1_dst + 2 * i4_dst_stride + 16), src10_16x8b);
|
||||
_mm_storeu_si128((__m128i *) (pu1_dst + 3 * i4_dst_stride + 16), src11_16x8b);
|
||||
_mm_storeu_si128((__m128i *) (pu1_dst + 4 * i4_dst_stride + 16), src12_16x8b);
|
||||
_mm_storeu_si128((__m128i *) (pu1_dst + 5 * i4_dst_stride + 16), src13_16x8b);
|
||||
_mm_storeu_si128((__m128i *) (pu1_dst + 6 * i4_dst_stride + 16), src14_16x8b);
|
||||
_mm_storeu_si128((__m128i *) (pu1_dst + 7 * i4_dst_stride + 16), src15_16x8b);
|
||||
|
||||
pu1_src += 32;
|
||||
pu1_dst += 32;
|
||||
}
|
||||
|
||||
pu1_src = pu1_src - i4_blk_wd + 8 * i4_src_stride;
|
||||
pu1_dst = pu1_dst - i4_blk_wd + 8 * i4_dst_stride;
|
||||
}
|
||||
}
|
||||
else /* ht multiple of 4 case */
|
||||
{
|
||||
for(i = 0; i < i4_blk_ht; i += 4)
|
||||
{
|
||||
for(j = 0; j < i4_blk_wd; j += 32)
|
||||
{
|
||||
src0_16x8b =
|
||||
_mm_loadu_si128((__m128i *) (pu1_src)); // i = 0
|
||||
src1_16x8b =
|
||||
_mm_loadu_si128((__m128i *) (pu1_src + i4_src_stride)); // i = 1
|
||||
src2_16x8b =
|
||||
_mm_loadu_si128((__m128i *) (pu1_src + 2 * i4_src_stride)); // i = 2
|
||||
src3_16x8b =
|
||||
_mm_loadu_si128((__m128i *) (pu1_src + 3 * i4_src_stride)); // i = 3
|
||||
/* Add 16 as offset */
|
||||
src4_16x8b =
|
||||
_mm_loadu_si128((__m128i *) (pu1_src + 16)); // i = 0
|
||||
src5_16x8b =
|
||||
_mm_loadu_si128((__m128i *) (pu1_src + i4_src_stride + 16)); // i = 1
|
||||
src6_16x8b =
|
||||
_mm_loadu_si128((__m128i *) (pu1_src + 2 * i4_src_stride + 16)); // i = 2
|
||||
src7_16x8b =
|
||||
_mm_loadu_si128((__m128i *) (pu1_src + 3 * i4_src_stride + 16)); // i = 3
|
||||
|
||||
_mm_storeu_si128((__m128i *) (pu1_dst), src0_16x8b);
|
||||
_mm_storeu_si128((__m128i *) (pu1_dst + i4_dst_stride), src1_16x8b);
|
||||
_mm_storeu_si128((__m128i *) (pu1_dst + 2 * i4_dst_stride), src2_16x8b);
|
||||
_mm_storeu_si128((__m128i *) (pu1_dst + 3 * i4_dst_stride), src3_16x8b);
|
||||
_mm_storeu_si128((__m128i *) (pu1_dst + 16), src4_16x8b);
|
||||
_mm_storeu_si128((__m128i *) (pu1_dst + i4_dst_stride + 16), src5_16x8b);
|
||||
_mm_storeu_si128((__m128i *) (pu1_dst + 2 * i4_dst_stride + 16), src6_16x8b);
|
||||
_mm_storeu_si128((__m128i *) (pu1_dst + 3 * i4_dst_stride + 16), src7_16x8b);
|
||||
|
||||
pu1_src += 32;
|
||||
pu1_dst += 32;
|
||||
}
|
||||
|
||||
pu1_src = pu1_src - i4_blk_wd + 4 * i4_src_stride;
|
||||
pu1_dst = pu1_dst - i4_blk_wd + 4 * i4_dst_stride;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if(0 == (i4_blk_wd & 15)) /* wd multiple of 16 case */
|
||||
{
|
||||
__m128i src0_16x8b, src1_16x8b, src2_16x8b, src3_16x8b;
|
||||
|
||||
if(0 == (i4_blk_ht & 7)) /* ht multiple of 8 case */
|
||||
{
|
||||
__m128i src4_16x8b, src5_16x8b, src6_16x8b, src7_16x8b;
|
||||
|
||||
for(i = 0; i < i4_blk_ht; i += 8)
|
||||
{
|
||||
for(j = 0; j < i4_blk_wd; j += 16)
|
||||
{
|
||||
src0_16x8b =
|
||||
_mm_loadu_si128((__m128i *) (pu1_src + 0 * i4_src_stride)); // i = 0
|
||||
src1_16x8b =
|
||||
_mm_loadu_si128((__m128i *) (pu1_src + 1 * i4_src_stride)); // i = 1
|
||||
src2_16x8b =
|
||||
_mm_loadu_si128((__m128i *) (pu1_src + 2 * i4_src_stride)); // i = 2
|
||||
src3_16x8b =
|
||||
_mm_loadu_si128((__m128i *) (pu1_src + 3 * i4_src_stride)); // i = 3
|
||||
src4_16x8b =
|
||||
_mm_loadu_si128((__m128i *) (pu1_src + 4 * i4_src_stride)); // i = 4
|
||||
src5_16x8b =
|
||||
_mm_loadu_si128((__m128i *) (pu1_src + 5 * i4_src_stride)); // i = 5
|
||||
src6_16x8b =
|
||||
_mm_loadu_si128((__m128i *) (pu1_src + 6 * i4_src_stride)); // i = 6
|
||||
src7_16x8b =
|
||||
_mm_loadu_si128((__m128i *) (pu1_src + 7 * i4_src_stride)); // i = 7
|
||||
|
||||
_mm_storeu_si128((__m128i *) (pu1_dst + 0 * i4_dst_stride), src0_16x8b);
|
||||
_mm_storeu_si128((__m128i *) (pu1_dst + 1 * i4_dst_stride), src1_16x8b);
|
||||
_mm_storeu_si128((__m128i *) (pu1_dst + 2 * i4_dst_stride), src2_16x8b);
|
||||
_mm_storeu_si128((__m128i *) (pu1_dst + 3 * i4_dst_stride), src3_16x8b);
|
||||
_mm_storeu_si128((__m128i *) (pu1_dst + 4 * i4_dst_stride), src4_16x8b);
|
||||
_mm_storeu_si128((__m128i *) (pu1_dst + 5 * i4_dst_stride), src5_16x8b);
|
||||
_mm_storeu_si128((__m128i *) (pu1_dst + 6 * i4_dst_stride), src6_16x8b);
|
||||
_mm_storeu_si128((__m128i *) (pu1_dst + 7 * i4_dst_stride), src7_16x8b);
|
||||
|
||||
pu1_src += 16;
|
||||
pu1_dst += 16;
|
||||
}
|
||||
|
||||
pu1_src = pu1_src - i4_blk_wd + 8 * i4_src_stride;
|
||||
pu1_dst = pu1_dst - i4_blk_wd + 8 * i4_dst_stride;
|
||||
}
|
||||
}
|
||||
else /* ht multiple of 4 case */
|
||||
{
|
||||
for(i = 0; i < i4_blk_ht; i += 4)
|
||||
{
|
||||
for(j = 0; j < i4_blk_wd; j += 16)
|
||||
{
|
||||
src0_16x8b =
|
||||
_mm_loadu_si128((__m128i *) (pu1_src + 0 * i4_src_stride)); // i = 0
|
||||
src1_16x8b =
|
||||
_mm_loadu_si128((__m128i *) (pu1_src + 1 * i4_src_stride)); // i = 1
|
||||
src2_16x8b =
|
||||
_mm_loadu_si128((__m128i *) (pu1_src + 2 * i4_src_stride)); // i = 2
|
||||
src3_16x8b =
|
||||
_mm_loadu_si128((__m128i *) (pu1_src + 3 * i4_src_stride)); // i = 3
|
||||
|
||||
_mm_storeu_si128((__m128i *) (pu1_dst + 0 * i4_dst_stride), src0_16x8b);
|
||||
_mm_storeu_si128((__m128i *) (pu1_dst + 1 * i4_dst_stride), src1_16x8b);
|
||||
_mm_storeu_si128((__m128i *) (pu1_dst + 2 * i4_dst_stride), src2_16x8b);
|
||||
_mm_storeu_si128((__m128i *) (pu1_dst + 3 * i4_dst_stride), src3_16x8b);
|
||||
|
||||
pu1_src += 16;
|
||||
pu1_dst += 16;
|
||||
}
|
||||
|
||||
pu1_src = pu1_src - i4_blk_wd + 4 * i4_src_stride;
|
||||
pu1_dst = pu1_dst - i4_blk_wd + 4 * i4_dst_stride;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if(0 == (i4_blk_wd & 7)) /* wd multiple of 8 case */
|
||||
{
|
||||
__m128i src0_16x8b, src1_16x8b, src2_16x8b, src3_16x8b;
|
||||
|
||||
if(0 == (i4_blk_ht & 7)) /* ht multiple of 8 case */
|
||||
{
|
||||
__m128i src4_16x8b, src5_16x8b, src6_16x8b, src7_16x8b;
|
||||
|
||||
for(i = 0; i < i4_blk_ht; i += 8)
|
||||
{
|
||||
for(j = 0; j < i4_blk_wd; j += 8)
|
||||
{
|
||||
src0_16x8b =
|
||||
_mm_loadl_epi64((__m128i *) (pu1_src + 0 * i4_src_stride)); // i = 0
|
||||
src1_16x8b =
|
||||
_mm_loadl_epi64((__m128i *) (pu1_src + 1 * i4_src_stride)); // i = 1
|
||||
src2_16x8b =
|
||||
_mm_loadl_epi64((__m128i *) (pu1_src + 2 * i4_src_stride)); // i = 2
|
||||
src3_16x8b =
|
||||
_mm_loadl_epi64((__m128i *) (pu1_src + 3 * i4_src_stride)); // i = 3
|
||||
src4_16x8b =
|
||||
_mm_loadl_epi64((__m128i *) (pu1_src + 4 * i4_src_stride)); // i = 4
|
||||
src5_16x8b =
|
||||
_mm_loadl_epi64((__m128i *) (pu1_src + 5 * i4_src_stride)); // i = 5
|
||||
src6_16x8b =
|
||||
_mm_loadl_epi64((__m128i *) (pu1_src + 6 * i4_src_stride)); // i = 6
|
||||
src7_16x8b =
|
||||
_mm_loadl_epi64((__m128i *) (pu1_src + 7 * i4_src_stride)); // i = 7
|
||||
|
||||
_mm_storel_epi64((__m128i *) (pu1_dst + 0 * i4_dst_stride), src0_16x8b);
|
||||
_mm_storel_epi64((__m128i *) (pu1_dst + 1 * i4_dst_stride), src1_16x8b);
|
||||
_mm_storel_epi64((__m128i *) (pu1_dst + 2 * i4_dst_stride), src2_16x8b);
|
||||
_mm_storel_epi64((__m128i *) (pu1_dst + 3 * i4_dst_stride), src3_16x8b);
|
||||
_mm_storel_epi64((__m128i *) (pu1_dst + 4 * i4_dst_stride), src4_16x8b);
|
||||
_mm_storel_epi64((__m128i *) (pu1_dst + 5 * i4_dst_stride), src5_16x8b);
|
||||
_mm_storel_epi64((__m128i *) (pu1_dst + 6 * i4_dst_stride), src6_16x8b);
|
||||
_mm_storel_epi64((__m128i *) (pu1_dst + 7 * i4_dst_stride), src7_16x8b);
|
||||
|
||||
pu1_src += 8;
|
||||
pu1_dst += 8;
|
||||
}
|
||||
|
||||
pu1_src = pu1_src - i4_blk_wd + 8 * i4_src_stride;
|
||||
pu1_dst = pu1_dst - i4_blk_wd + 8 * i4_dst_stride;
|
||||
}
|
||||
}
|
||||
else /* ht multiple of 4 case */
|
||||
{
|
||||
for(i = 0; i < i4_blk_ht; i += 4)
|
||||
{
|
||||
for(j = 0; j < i4_blk_wd; j += 8)
|
||||
{
|
||||
src0_16x8b =
|
||||
_mm_loadl_epi64((__m128i *) (pu1_src + 0 * i4_src_stride)); // i = 0
|
||||
src1_16x8b =
|
||||
_mm_loadl_epi64((__m128i *) (pu1_src + 1 * i4_src_stride)); // i = 1
|
||||
src2_16x8b =
|
||||
_mm_loadl_epi64((__m128i *) (pu1_src + 2 * i4_src_stride)); // i = 2
|
||||
src3_16x8b =
|
||||
_mm_loadl_epi64((__m128i *) (pu1_src + 3 * i4_src_stride)); // i = 3
|
||||
|
||||
_mm_storel_epi64((__m128i *) (pu1_dst + 0 * i4_dst_stride), src0_16x8b);
|
||||
_mm_storel_epi64((__m128i *) (pu1_dst + 1 * i4_dst_stride), src1_16x8b);
|
||||
_mm_storel_epi64((__m128i *) (pu1_dst + 2 * i4_dst_stride), src2_16x8b);
|
||||
_mm_storel_epi64((__m128i *) (pu1_dst + 3 * i4_dst_stride), src3_16x8b);
|
||||
|
||||
pu1_src += 8;
|
||||
pu1_dst += 8;
|
||||
}
|
||||
|
||||
pu1_src = pu1_src - i4_blk_wd + 4 * i4_src_stride;
|
||||
pu1_dst = pu1_dst - i4_blk_wd + 4 * i4_dst_stride;
|
||||
}
|
||||
}
|
||||
}
|
||||
else /* wd multiple of 4 case */
|
||||
{
|
||||
__m128i src0_16x8b, src1_16x8b, src2_16x8b, src3_16x8b;
|
||||
WORD32 src0, src1, src2, src3;
|
||||
if(0 == (i4_blk_ht & 7)) /* ht multiple of 8 case */
|
||||
{
|
||||
__m128i src4_16x8b, src5_16x8b, src6_16x8b, src7_16x8b;
|
||||
WORD32 src4, src5, src6, src7;
|
||||
|
||||
for(i = 0; i < i4_blk_ht; i += 8)
|
||||
{
|
||||
for(j = 0; j < i4_blk_wd; j += 4)
|
||||
{
|
||||
src0_16x8b =
|
||||
_mm_loadl_epi64((__m128i *) (pu1_src + 0 * i4_src_stride)); // i = 0
|
||||
src1_16x8b =
|
||||
_mm_loadl_epi64((__m128i *) (pu1_src + 1 * i4_src_stride)); // i = 1
|
||||
src2_16x8b =
|
||||
_mm_loadl_epi64((__m128i *) (pu1_src + 2 * i4_src_stride)); // i = 2
|
||||
src3_16x8b =
|
||||
_mm_loadl_epi64((__m128i *) (pu1_src + 3 * i4_src_stride)); // i = 3
|
||||
src4_16x8b =
|
||||
_mm_loadl_epi64((__m128i *) (pu1_src + 4 * i4_src_stride)); // i = 4
|
||||
src5_16x8b =
|
||||
_mm_loadl_epi64((__m128i *) (pu1_src + 5 * i4_src_stride)); // i = 5
|
||||
src6_16x8b =
|
||||
_mm_loadl_epi64((__m128i *) (pu1_src + 6 * i4_src_stride)); // i = 6
|
||||
src7_16x8b =
|
||||
_mm_loadl_epi64((__m128i *) (pu1_src + 7 * i4_src_stride)); // i = 7
|
||||
|
||||
src0 = _mm_cvtsi128_si32(src0_16x8b);
|
||||
src1 = _mm_cvtsi128_si32(src1_16x8b);
|
||||
src2 = _mm_cvtsi128_si32(src2_16x8b);
|
||||
src3 = _mm_cvtsi128_si32(src3_16x8b);
|
||||
src4 = _mm_cvtsi128_si32(src4_16x8b);
|
||||
src5 = _mm_cvtsi128_si32(src5_16x8b);
|
||||
src6 = _mm_cvtsi128_si32(src6_16x8b);
|
||||
src7 = _mm_cvtsi128_si32(src7_16x8b);
|
||||
|
||||
*(WORD32 *) (&pu1_dst[0 * i4_dst_stride]) = src0;
|
||||
*(WORD32 *) (&pu1_dst[1 * i4_dst_stride]) = src1;
|
||||
*(WORD32 *) (&pu1_dst[2 * i4_dst_stride]) = src2;
|
||||
*(WORD32 *) (&pu1_dst[3 * i4_dst_stride]) = src3;
|
||||
*(WORD32 *) (&pu1_dst[4 * i4_dst_stride]) = src4;
|
||||
*(WORD32 *) (&pu1_dst[5 * i4_dst_stride]) = src5;
|
||||
*(WORD32 *) (&pu1_dst[6 * i4_dst_stride]) = src6;
|
||||
*(WORD32 *) (&pu1_dst[7 * i4_dst_stride]) = src7;
|
||||
|
||||
pu1_src += 4;
|
||||
pu1_dst += 4;
|
||||
}
|
||||
|
||||
pu1_src = pu1_src - i4_blk_wd + 8 * i4_src_stride;
|
||||
pu1_dst = pu1_dst - i4_blk_wd + 8 * i4_dst_stride;
|
||||
}
|
||||
}
|
||||
else /* ht multiple of 4 case */
|
||||
{
|
||||
for(i = 0; i < i4_blk_ht; i += 4)
|
||||
{
|
||||
for(j = 0; j < i4_blk_wd; j += 4)
|
||||
{
|
||||
src0_16x8b =
|
||||
_mm_loadl_epi64((__m128i *) (pu1_src + 0 * i4_src_stride)); // i = 0
|
||||
src1_16x8b =
|
||||
_mm_loadl_epi64((__m128i *) (pu1_src + 1 * i4_src_stride)); // i = 1
|
||||
src2_16x8b =
|
||||
_mm_loadl_epi64((__m128i *) (pu1_src + 2 * i4_src_stride)); // i = 2
|
||||
src3_16x8b =
|
||||
_mm_loadl_epi64((__m128i *) (pu1_src + 3 * i4_src_stride)); // i = 3
|
||||
|
||||
src0 = _mm_cvtsi128_si32(src0_16x8b);
|
||||
src1 = _mm_cvtsi128_si32(src1_16x8b);
|
||||
src2 = _mm_cvtsi128_si32(src2_16x8b);
|
||||
src3 = _mm_cvtsi128_si32(src3_16x8b);
|
||||
|
||||
*(WORD32 *) (&pu1_dst[0 * i4_dst_stride]) = src0;
|
||||
*(WORD32 *) (&pu1_dst[1 * i4_dst_stride]) = src1;
|
||||
*(WORD32 *) (&pu1_dst[2 * i4_dst_stride]) = src2;
|
||||
*(WORD32 *) (&pu1_dst[3 * i4_dst_stride]) = src3;
|
||||
|
||||
pu1_src += 4;
|
||||
pu1_dst += 4;
|
||||
}
|
||||
|
||||
pu1_src = pu1_src - i4_blk_wd + 4 * i4_src_stride;
|
||||
pu1_dst = pu1_dst - i4_blk_wd + 4 * i4_dst_stride;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
294
common/x86/svc/isvc_padding_ssse3.c
Normal file
294
common/x86/svc/isvc_padding_ssse3.c
Normal file
|
|
@ -0,0 +1,294 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
/**
|
||||
*******************************************************************************
|
||||
* @file
|
||||
* ih264_padding_atom_intr.c
|
||||
*
|
||||
* @brief
|
||||
* Contains function definitions for Padding
|
||||
*
|
||||
* @author
|
||||
* Srinivas T
|
||||
*
|
||||
* @par List of Functions:
|
||||
* - isvc_pad_left_luma_ssse3()
|
||||
* - isvc_pad_left_chroma_ssse3()
|
||||
* - isvc_pad_right_luma_ssse3()
|
||||
* - isvc_pad_right_chroma_ssse3()
|
||||
*
|
||||
* @remarks
|
||||
* None
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include "ih264_typedefs.h"
|
||||
#include "ih264_platform_macros.h"
|
||||
#include "isvc_mem_fns.h"
|
||||
#include "ih264_debug.h"
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* Padding (luma block) at the left of a 2d array
|
||||
*
|
||||
* @par Description:
|
||||
* The left column of a 2d array is replicated for pad_size times at the left
|
||||
*
|
||||
*
|
||||
* @param[in] pu1_src
|
||||
* UWORD8 pointer to the source
|
||||
*
|
||||
* @param[in] src_strd
|
||||
* integer source stride
|
||||
*
|
||||
* @param[in] ht
|
||||
* integer height of the array
|
||||
*
|
||||
* @param[in] wd
|
||||
* integer width of the array
|
||||
*
|
||||
* @param[in] pad_size
|
||||
* integer -padding size of the array
|
||||
*
|
||||
* @param[in] ht
|
||||
* integer height of the array
|
||||
*
|
||||
* @param[in] wd
|
||||
* integer width of the array
|
||||
*
|
||||
* @returns
|
||||
*
|
||||
* @remarks
|
||||
* None
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
void isvc_pad_left_luma_ssse3(UWORD8 *pu1_src, WORD32 src_strd, WORD32 ht, WORD32 pad_size)
|
||||
{
|
||||
WORD32 row;
|
||||
WORD32 i;
|
||||
UWORD8 *pu1_dst;
|
||||
|
||||
ASSERT(pad_size % 8 == 0);
|
||||
|
||||
for(row = 0; row < ht; row++)
|
||||
{
|
||||
__m128i src_temp0_16x8b;
|
||||
|
||||
pu1_dst = pu1_src - pad_size;
|
||||
src_temp0_16x8b = _mm_set1_epi8(*pu1_src);
|
||||
for(i = 0; i < pad_size; i += 8)
|
||||
{
|
||||
_mm_storel_epi64((__m128i *) (pu1_dst + i), src_temp0_16x8b);
|
||||
}
|
||||
pu1_src += src_strd;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* Padding (chroma block) at the left of a 2d array
|
||||
*
|
||||
* @par Description:
|
||||
* The left column of a 2d array is replicated for pad_size times at the left
|
||||
*
|
||||
*
|
||||
* @param[in] pu1_src
|
||||
* UWORD8 pointer to the source
|
||||
*
|
||||
* @param[in] src_strd
|
||||
* integer source stride
|
||||
*
|
||||
* @param[in] ht
|
||||
* integer height of the array
|
||||
*
|
||||
* @param[in] wd
|
||||
* integer width of the array (each colour component)
|
||||
*
|
||||
* @param[in] pad_size
|
||||
* integer -padding size of the array
|
||||
*
|
||||
* @param[in] ht
|
||||
* integer height of the array
|
||||
*
|
||||
* @param[in] wd
|
||||
* integer width of the array
|
||||
*
|
||||
* @returns
|
||||
*
|
||||
* @remarks
|
||||
* None
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
void isvc_pad_left_chroma_ssse3(UWORD8 *pu1_src, WORD32 src_strd, WORD32 ht, WORD32 pad_size)
|
||||
{
|
||||
WORD32 row;
|
||||
WORD32 col;
|
||||
UWORD8 *pu1_dst;
|
||||
|
||||
ASSERT(pad_size % 8 == 0);
|
||||
for(row = 0; row < ht; row++)
|
||||
{
|
||||
__m128i src_temp0_16x8b;
|
||||
|
||||
pu1_dst = pu1_src - pad_size;
|
||||
src_temp0_16x8b = _mm_set1_epi16(*((UWORD16 *) pu1_src));
|
||||
for(col = 0; col < pad_size; col += 8)
|
||||
{
|
||||
_mm_storel_epi64((__m128i *) (pu1_dst + col), src_temp0_16x8b);
|
||||
}
|
||||
pu1_src += src_strd;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* Padding (luma block) at the right of a 2d array
|
||||
*
|
||||
* @par Description:
|
||||
* The right column of a 2d array is replicated for pad_size times at the right
|
||||
*
|
||||
*
|
||||
* @param[in] pu1_src
|
||||
* UWORD8 pointer to the source
|
||||
*
|
||||
* @param[in] src_strd
|
||||
* integer source stride
|
||||
*
|
||||
* @param[in] ht
|
||||
* integer height of the array
|
||||
*
|
||||
* @param[in] wd
|
||||
* integer width of the array
|
||||
*
|
||||
* @param[in] pad_size
|
||||
* integer -padding size of the array
|
||||
*
|
||||
* @param[in] ht
|
||||
* integer height of the array
|
||||
*
|
||||
* @param[in] wd
|
||||
* integer width of the array
|
||||
*
|
||||
* @returns
|
||||
*
|
||||
* @remarks
|
||||
* None
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
void isvc_pad_right_luma_ssse3(UWORD8 *pu1_src, WORD32 src_strd, WORD32 ht, WORD32 pad_size)
|
||||
{
|
||||
WORD32 row;
|
||||
WORD32 col;
|
||||
UWORD8 *pu1_dst;
|
||||
|
||||
ASSERT(pad_size % 8 == 0);
|
||||
|
||||
for(row = 0; row < ht; row++)
|
||||
{
|
||||
__m128i src_temp0_16x8b;
|
||||
|
||||
pu1_dst = pu1_src;
|
||||
src_temp0_16x8b = _mm_set1_epi8(*(pu1_src - 1));
|
||||
for(col = 0; col < pad_size; col += 8)
|
||||
{
|
||||
_mm_storel_epi64((__m128i *) (pu1_dst + col), src_temp0_16x8b);
|
||||
}
|
||||
pu1_src += src_strd;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* Padding (chroma block) at the right of a 2d array
|
||||
*
|
||||
* @par Description:
|
||||
* The right column of a 2d array is replicated for pad_size times at the right
|
||||
*
|
||||
*
|
||||
* @param[in] pu1_src
|
||||
* UWORD8 pointer to the source
|
||||
*
|
||||
* @param[in] src_strd
|
||||
* integer source stride
|
||||
*
|
||||
* @param[in] ht
|
||||
* integer height of the array
|
||||
*
|
||||
* @param[in] wd
|
||||
* integer width of the array (each colour component)
|
||||
*
|
||||
* @param[in] pad_size
|
||||
* integer -padding size of the array
|
||||
*
|
||||
* @param[in] ht
|
||||
* integer height of the array
|
||||
*
|
||||
* @param[in] wd
|
||||
* integer width of the array
|
||||
*
|
||||
* @returns
|
||||
*
|
||||
* @remarks
|
||||
* None
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
void isvc_pad_right_chroma_ssse3(UWORD8 *pu1_src, WORD32 src_strd, WORD32 ht, WORD32 pad_size)
|
||||
{
|
||||
WORD32 row;
|
||||
WORD32 col;
|
||||
UWORD8 *pu1_dst;
|
||||
|
||||
ASSERT(pad_size % 8 == 0);
|
||||
|
||||
for(row = 0; row < ht; row++)
|
||||
{
|
||||
__m128i src_temp0_16x8b;
|
||||
|
||||
pu1_dst = pu1_src;
|
||||
src_temp0_16x8b = _mm_set1_epi16(*((UWORD16 *) (pu1_src - 2)));
|
||||
for(col = 0; col < pad_size; col += 8)
|
||||
{
|
||||
_mm_storel_epi64((__m128i *) (pu1_dst + col), src_temp0_16x8b);
|
||||
}
|
||||
|
||||
pu1_src += src_strd;
|
||||
}
|
||||
}
|
||||
1881
common/x86/svc/isvc_resi_trans_quant_sse42.c
Normal file
1881
common/x86/svc/isvc_resi_trans_quant_sse42.c
Normal file
File diff suppressed because it is too large
Load diff
927
encoder/arm/svc/isvce_downscaler_neon.c
Normal file
927
encoder/arm/svc/isvce_downscaler_neon.c
Normal file
|
|
@ -0,0 +1,927 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
|
||||
/**
|
||||
******************************************************************************
|
||||
* @file ih264e_downscaler_neon.c
|
||||
*
|
||||
* @brief
|
||||
* This file contains the ARMV8 SIMD version of the function which does
|
||||
* horizontal scaling and transpose
|
||||
*
|
||||
* @author
|
||||
* Ittiam
|
||||
*
|
||||
* @par List of Functions:
|
||||
* - ih264e_horizontal_downscale_and_transpose_av8()
|
||||
*
|
||||
* @remarks
|
||||
* None
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
/*****************************************************************************/
|
||||
/* File Includes */
|
||||
/*****************************************************************************/
|
||||
|
||||
/* System include files */
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <arm_neon.h>
|
||||
|
||||
/* User include files */
|
||||
#include "ih264_typedefs.h"
|
||||
#include "ih264_macros.h"
|
||||
#include "ih264_platform_macros.h"
|
||||
#include "isvc_defs.h"
|
||||
#include "isvce_defs.h"
|
||||
#include "isvc_structs.h"
|
||||
#include "isvce_downscaler_private_defs.h"
|
||||
|
||||
void isvce_horizontal_downscale_and_transpose_neon(
|
||||
downscaler_ctxt_t *ps_scaler, buffer_container_t *ps_src, buffer_container_t *ps_dst,
|
||||
FILTER_COEFF_ARRAY pai1_filters, UWORD32 u4_blk_wd, UWORD32 u4_blk_ht, UWORD8 u1_is_chroma)
|
||||
{
|
||||
WORD32 i, j;
|
||||
UWORD8 u1_phase;
|
||||
UWORD8 *pu1_src_j, *pu1_dst_j;
|
||||
UWORD8 *pu1_in_pixel;
|
||||
UWORD8 *pu1_out_pixel;
|
||||
WORD8 *pi1_filter_grid;
|
||||
UWORD16 u2_full_pixel_inc;
|
||||
UWORD32 u4_num_iterations_vertical_by_16, u4_num_iterations_vertical_by_8;
|
||||
UWORD32 u4_rem_vert_loop_by_8, u4_rem_vert_loop_by_4;
|
||||
UWORD32 u4_rem_vert_loop;
|
||||
UWORD32 u4_height_finished;
|
||||
|
||||
uint8x8_t reg_8x8_src_r0, reg_8x8_src_r1, reg_8x8_src_r2, reg_8x8_src_r3, reg_8x8_src_r4,
|
||||
reg_8x8_src_r5, reg_8x8_src_r6, reg_8x8_src_r7;
|
||||
|
||||
uint16x8_t reg_16x8_src_r0, reg_16x8_src_r1, reg_16x8_src_r2, reg_16x8_src_r3, reg_16x8_src_r4,
|
||||
reg_16x8_src_r5, reg_16x8_src_r6, reg_16x8_src_r7;
|
||||
|
||||
int16x8_t reg_16x8_mul_r0, reg_16x8_mul_r1, reg_16x8_mul_r2, reg_16x8_mul_r3, reg_16x8_mul_r4,
|
||||
reg_16x8_mul_r5, reg_16x8_mul_r6, reg_16x8_mul_r7;
|
||||
|
||||
int32x4_t reg_32x4_sum_r0, reg_32x4_sum_r1, reg_32x4_sum_r2, reg_32x4_sum_r3, reg_32x4_sum_r4,
|
||||
reg_32x4_sum_r5, reg_32x4_sum_r6, reg_32x4_sum_r7;
|
||||
|
||||
int32x4_t reg_32x4_sum_r01, reg_32x4_sum_r23, reg_32x4_sum_r45, reg_32x4_sum_r67,
|
||||
reg_32x4_sum_r89, reg_32x4_sum_r1011, reg_32x4_sum_r1213, reg_32x4_sum_r1415;
|
||||
|
||||
uint8x8_t reg_8x8_src_r8, reg_8x8_src_r9, reg_8x8_src_r10, reg_8x8_src_r11, reg_8x8_src_r12,
|
||||
reg_8x8_src_r13, reg_8x8_src_r14, reg_8x8_src_r15;
|
||||
|
||||
uint16x8_t reg_16x8_src_r8, reg_16x8_src_r9, reg_16x8_src_r10, reg_16x8_src_r11,
|
||||
reg_16x8_src_r12, reg_16x8_src_r13, reg_16x8_src_r14, reg_16x8_src_r15;
|
||||
|
||||
int16x8_t reg_16x8_mul_r8, reg_16x8_mul_r9, reg_16x8_mul_r10, reg_16x8_mul_r11,
|
||||
reg_16x8_mul_r12, reg_16x8_mul_r13, reg_16x8_mul_r14, reg_16x8_mul_r15;
|
||||
|
||||
int32x4_t reg_32x4_sum_r8, reg_32x4_sum_r9, reg_32x4_sum_r10, reg_32x4_sum_r11,
|
||||
reg_32x4_sum_r12, reg_32x4_sum_r13, reg_32x4_sum_r14, reg_32x4_sum_r15;
|
||||
|
||||
uint8x16_t reg_8x16_src_r0, reg_8x16_src_r1, reg_8x16_src_r2, reg_8x16_src_r3, reg_8x16_src_r4,
|
||||
reg_8x16_src_r5, reg_8x16_src_r6, reg_8x16_src_r7;
|
||||
|
||||
uint16x8_t reg_16x8_src_cb_r0, reg_16x8_src_cb_r1, reg_16x8_src_cb_r2, reg_16x8_src_cb_r3,
|
||||
reg_16x8_src_cb_r4, reg_16x8_src_cb_r5, reg_16x8_src_cb_r6, reg_16x8_src_cb_r7;
|
||||
|
||||
uint16x8_t reg_16x8_src_cr_r0, reg_16x8_src_cr_r1, reg_16x8_src_cr_r2, reg_16x8_src_cr_r3,
|
||||
reg_16x8_src_cr_r4, reg_16x8_src_cr_r5, reg_16x8_src_cr_r6, reg_16x8_src_cr_r7;
|
||||
|
||||
int16x8_t reg_16x8_mul_cb_r0, reg_16x8_mul_cb_r1, reg_16x8_mul_cb_r2, reg_16x8_mul_cb_r3,
|
||||
reg_16x8_mul_cb_r4, reg_16x8_mul_cb_r5, reg_16x8_mul_cb_r6, reg_16x8_mul_cb_r7;
|
||||
|
||||
int16x8_t reg_16x8_mul_cr_r0, reg_16x8_mul_cr_r1, reg_16x8_mul_cr_r2, reg_16x8_mul_cr_r3,
|
||||
reg_16x8_mul_cr_r4, reg_16x8_mul_cr_r5, reg_16x8_mul_cr_r6, reg_16x8_mul_cr_r7;
|
||||
|
||||
int32x4_t reg_32x4_sum_cb_r0, reg_32x4_sum_cb_r1, reg_32x4_sum_cb_r2, reg_32x4_sum_cb_r3,
|
||||
reg_32x4_sum_cb_r4, reg_32x4_sum_cb_r5, reg_32x4_sum_cb_r6, reg_32x4_sum_cb_r7;
|
||||
|
||||
int32x4_t reg_32x4_sum_cr_r0, reg_32x4_sum_cr_r1, reg_32x4_sum_cr_r2, reg_32x4_sum_cr_r3,
|
||||
reg_32x4_sum_cr_r4, reg_32x4_sum_cr_r5, reg_32x4_sum_cr_r6, reg_32x4_sum_cr_r7;
|
||||
|
||||
int32x4_t reg_32x4_sum_cb_r01, reg_32x4_sum_cb_r23, reg_32x4_sum_cb_r45, reg_32x4_sum_cb_r67;
|
||||
uint16x4_t reg_16x4_sum_cb_r01_23, reg_16x4_sum_cb_r45_67;
|
||||
uint16x8_t reg_16x8_sum_cb_r0_r7;
|
||||
uint8x8_t reg_8x8_sum_cb_r0_r7;
|
||||
|
||||
int32x4_t reg_32x4_sum_cr_r01, reg_32x4_sum_cr_r23, reg_32x4_sum_cr_r45, reg_32x4_sum_cr_r67;
|
||||
uint16x4_t reg_16x4_sum_cr_r01_23, reg_16x4_sum_cr_r45_67;
|
||||
uint16x8_t reg_16x8_sum_cr_r0_r7;
|
||||
uint8x8_t reg_8x8_sum_cr_r0_r7;
|
||||
uint16x8_t reg_16x8_sum_cb_cr_r0_r3;
|
||||
uint8x8_t reg_8x8_sum_cb_cr_r0_r3;
|
||||
|
||||
int32x4_t reg_32x4_sum_cb_cr_r0;
|
||||
uint16x4_t reg_16x4_sum_cb_cr_r0;
|
||||
|
||||
int32x4_t reg_32x4_zero = vdupq_n_s32(0);
|
||||
|
||||
uint16x4_t reg_16x4_sum_r01_23, reg_16x4_sum_r45_67;
|
||||
uint16x4_t reg_16x4_sum_r8_r11, reg_16x4_sum_r12_r15;
|
||||
uint16x8_t reg_16x8_sum_r0_r7, reg_16x8_sum_r8_r15;
|
||||
uint8x8_t reg_8x8_sum_r0_r7, reg_8x8_sum_r8_r15;
|
||||
uint8x16_t reg_8x16_sum_r0_r15;
|
||||
int8x8_t reg_8x8_filt_coeff_grid;
|
||||
int16x8_t reg_16x8_filt_coeff_grid;
|
||||
int32x4x2_t reg_32x4x2_sum_r01, reg_32x4x2_sum_r23, reg_32x4x2_sum_r45, reg_32x4x2_sum_r67;
|
||||
int32x4x2_t reg_32x4x2_sum_r89, reg_32x4x2_sum_r1011, reg_32x4x2_sum_r1213,
|
||||
reg_32x4x2_sum_r1415;
|
||||
uint8x16x2_t reg_8x16x2_src_r0, reg_8x16x2_src_r1, reg_8x16x2_src_r2, reg_8x16x2_src_r3;
|
||||
|
||||
downscaler_state_t *ps_scaler_state = (downscaler_state_t *) ps_scaler->pv_scaler_state;
|
||||
|
||||
UWORD32 u4_center_pixel_pos = ps_scaler_state->i4_init_offset;
|
||||
UWORD32 u4_src_vert_increments = ps_scaler_state->u4_vert_increment;
|
||||
UWORD32 u4_src_horz_increments = ps_scaler_state->u4_horz_increment;
|
||||
UWORD8 *pu1_src = (UWORD8 *) ps_src->pv_data;
|
||||
UWORD32 u4_in_stride = ps_src->i4_data_stride;
|
||||
UWORD8 *pu1_dst = (UWORD8 *) ps_dst->pv_data;
|
||||
UWORD32 u4_out_stride = ps_dst->i4_data_stride;
|
||||
UWORD32 u4_center_pixel_pos_src = u4_center_pixel_pos;
|
||||
|
||||
/* Offset the input so that the input pixel to be processed
|
||||
co-incides with the centre of filter (4th coefficient)*/
|
||||
pu1_src += (1 + u1_is_chroma);
|
||||
|
||||
ASSERT((1 << DOWNSCALER_Q) == u4_src_vert_increments);
|
||||
|
||||
if(!u1_is_chroma)
|
||||
{
|
||||
u4_num_iterations_vertical_by_16 = u4_blk_ht >> 4;
|
||||
u4_rem_vert_loop = u4_blk_ht % 16;
|
||||
|
||||
for(j = 0; j < (WORD32) u4_num_iterations_vertical_by_16; j++)
|
||||
{
|
||||
pu1_src_j = pu1_src + ((j << 4) * u4_in_stride);
|
||||
pu1_dst_j = pu1_dst + (j << 4);
|
||||
|
||||
u4_center_pixel_pos = u4_center_pixel_pos_src;
|
||||
|
||||
for(i = 0; i < (WORD32) u4_blk_wd; i++)
|
||||
{
|
||||
u1_phase = get_filter_phase(u4_center_pixel_pos);
|
||||
|
||||
pi1_filter_grid = pai1_filters[u1_phase];
|
||||
|
||||
/* Doing the Calculation for current Loop Count */
|
||||
u2_full_pixel_inc = u4_center_pixel_pos >> DOWNSCALER_Q;
|
||||
|
||||
pu1_in_pixel = pu1_src_j + (u2_full_pixel_inc << u1_is_chroma);
|
||||
|
||||
pu1_out_pixel = pu1_dst_j + ((i << u1_is_chroma) * u4_out_stride);
|
||||
|
||||
reg_8x8_filt_coeff_grid = vld1_s8(pi1_filter_grid);
|
||||
|
||||
/******************************************************/
|
||||
/* This loop is going vertically in bottom direction */
|
||||
/* but the output pixels are stored in horizontal */
|
||||
/* direction in transpose manner */
|
||||
/******************************************************/
|
||||
|
||||
/* r0-r7 */
|
||||
reg_8x8_src_r0 = vld1_u8(pu1_in_pixel);
|
||||
reg_8x8_src_r1 = vld1_u8(pu1_in_pixel + u4_in_stride);
|
||||
reg_8x8_src_r2 = vld1_u8(pu1_in_pixel + 2 * u4_in_stride);
|
||||
reg_8x8_src_r3 = vld1_u8(pu1_in_pixel + 3 * u4_in_stride);
|
||||
reg_8x8_src_r4 = vld1_u8(pu1_in_pixel + 4 * u4_in_stride);
|
||||
reg_8x8_src_r5 = vld1_u8(pu1_in_pixel + 5 * u4_in_stride);
|
||||
reg_8x8_src_r6 = vld1_u8(pu1_in_pixel + 6 * u4_in_stride);
|
||||
reg_8x8_src_r7 = vld1_u8(pu1_in_pixel + 7 * u4_in_stride);
|
||||
|
||||
/* r0-r7 */
|
||||
reg_16x8_src_r0 = vmovl_u8(reg_8x8_src_r0);
|
||||
reg_16x8_src_r1 = vmovl_u8(reg_8x8_src_r1);
|
||||
reg_16x8_src_r2 = vmovl_u8(reg_8x8_src_r2);
|
||||
reg_16x8_src_r3 = vmovl_u8(reg_8x8_src_r3);
|
||||
reg_16x8_src_r4 = vmovl_u8(reg_8x8_src_r4);
|
||||
reg_16x8_src_r5 = vmovl_u8(reg_8x8_src_r5);
|
||||
reg_16x8_src_r6 = vmovl_u8(reg_8x8_src_r6);
|
||||
reg_16x8_src_r7 = vmovl_u8(reg_8x8_src_r7);
|
||||
|
||||
/* r8-r15 */
|
||||
reg_8x8_src_r8 = vld1_u8(pu1_in_pixel + 8 * u4_in_stride);
|
||||
reg_8x8_src_r9 = vld1_u8(pu1_in_pixel + 9 * u4_in_stride);
|
||||
reg_8x8_src_r10 = vld1_u8(pu1_in_pixel + 10 * u4_in_stride);
|
||||
reg_8x8_src_r11 = vld1_u8(pu1_in_pixel + 11 * u4_in_stride);
|
||||
reg_8x8_src_r12 = vld1_u8(pu1_in_pixel + 12 * u4_in_stride);
|
||||
reg_8x8_src_r13 = vld1_u8(pu1_in_pixel + 13 * u4_in_stride);
|
||||
reg_8x8_src_r14 = vld1_u8(pu1_in_pixel + 14 * u4_in_stride);
|
||||
reg_8x8_src_r15 = vld1_u8(pu1_in_pixel + 15 * u4_in_stride);
|
||||
|
||||
reg_16x8_filt_coeff_grid = vmovl_s8(reg_8x8_filt_coeff_grid);
|
||||
|
||||
/*r0-r7 */
|
||||
reg_16x8_mul_r0 =
|
||||
vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r0), reg_16x8_filt_coeff_grid);
|
||||
reg_16x8_mul_r1 =
|
||||
vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r1), reg_16x8_filt_coeff_grid);
|
||||
reg_16x8_mul_r2 =
|
||||
vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r2), reg_16x8_filt_coeff_grid);
|
||||
reg_16x8_mul_r3 =
|
||||
vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r3), reg_16x8_filt_coeff_grid);
|
||||
reg_16x8_mul_r4 =
|
||||
vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r4), reg_16x8_filt_coeff_grid);
|
||||
reg_16x8_mul_r5 =
|
||||
vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r5), reg_16x8_filt_coeff_grid);
|
||||
reg_16x8_mul_r6 =
|
||||
vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r6), reg_16x8_filt_coeff_grid);
|
||||
reg_16x8_mul_r7 =
|
||||
vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r7), reg_16x8_filt_coeff_grid);
|
||||
|
||||
/* r8-r15 */
|
||||
reg_16x8_src_r8 = vmovl_u8(reg_8x8_src_r8);
|
||||
reg_16x8_src_r9 = vmovl_u8(reg_8x8_src_r9);
|
||||
reg_16x8_src_r10 = vmovl_u8(reg_8x8_src_r10);
|
||||
reg_16x8_src_r11 = vmovl_u8(reg_8x8_src_r11);
|
||||
reg_16x8_src_r12 = vmovl_u8(reg_8x8_src_r12);
|
||||
reg_16x8_src_r13 = vmovl_u8(reg_8x8_src_r13);
|
||||
reg_16x8_src_r14 = vmovl_u8(reg_8x8_src_r14);
|
||||
reg_16x8_src_r15 = vmovl_u8(reg_8x8_src_r15);
|
||||
|
||||
/* r0-r7 */
|
||||
reg_32x4_sum_r0 = vpaddlq_s16(reg_16x8_mul_r0);
|
||||
reg_32x4_sum_r1 = vpaddlq_s16(reg_16x8_mul_r1);
|
||||
reg_32x4_sum_r2 = vpaddlq_s16(reg_16x8_mul_r2);
|
||||
reg_32x4_sum_r3 = vpaddlq_s16(reg_16x8_mul_r3);
|
||||
reg_32x4_sum_r4 = vpaddlq_s16(reg_16x8_mul_r4);
|
||||
reg_32x4_sum_r5 = vpaddlq_s16(reg_16x8_mul_r5);
|
||||
reg_32x4_sum_r6 = vpaddlq_s16(reg_16x8_mul_r6);
|
||||
reg_32x4_sum_r7 = vpaddlq_s16(reg_16x8_mul_r7);
|
||||
|
||||
/* r8-r15 */
|
||||
reg_16x8_mul_r8 =
|
||||
vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r8), reg_16x8_filt_coeff_grid);
|
||||
reg_16x8_mul_r9 =
|
||||
vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r9), reg_16x8_filt_coeff_grid);
|
||||
reg_16x8_mul_r10 =
|
||||
vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r10), reg_16x8_filt_coeff_grid);
|
||||
reg_16x8_mul_r11 =
|
||||
vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r11), reg_16x8_filt_coeff_grid);
|
||||
reg_16x8_mul_r12 =
|
||||
vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r12), reg_16x8_filt_coeff_grid);
|
||||
reg_16x8_mul_r13 =
|
||||
vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r13), reg_16x8_filt_coeff_grid);
|
||||
reg_16x8_mul_r14 =
|
||||
vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r14), reg_16x8_filt_coeff_grid);
|
||||
reg_16x8_mul_r15 =
|
||||
vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r15), reg_16x8_filt_coeff_grid);
|
||||
|
||||
/* r0-r7 */
|
||||
reg_32x4x2_sum_r01 = vuzpq_s32(reg_32x4_sum_r0, reg_32x4_sum_r1);
|
||||
reg_32x4x2_sum_r23 = vuzpq_s32(reg_32x4_sum_r2, reg_32x4_sum_r3);
|
||||
reg_32x4x2_sum_r45 = vuzpq_s32(reg_32x4_sum_r4, reg_32x4_sum_r5);
|
||||
reg_32x4x2_sum_r67 = vuzpq_s32(reg_32x4_sum_r6, reg_32x4_sum_r7);
|
||||
|
||||
reg_32x4_sum_r01 = vaddq_s32(reg_32x4x2_sum_r01.val[0], reg_32x4x2_sum_r01.val[1]);
|
||||
reg_32x4_sum_r23 = vaddq_s32(reg_32x4x2_sum_r23.val[0], reg_32x4x2_sum_r23.val[1]);
|
||||
reg_32x4_sum_r45 = vaddq_s32(reg_32x4x2_sum_r45.val[0], reg_32x4x2_sum_r45.val[1]);
|
||||
reg_32x4_sum_r67 = vaddq_s32(reg_32x4x2_sum_r67.val[0], reg_32x4x2_sum_r67.val[1]);
|
||||
|
||||
/* r8-r15 */
|
||||
reg_32x4_sum_r8 = vpaddlq_s16(reg_16x8_mul_r8);
|
||||
reg_32x4_sum_r9 = vpaddlq_s16(reg_16x8_mul_r9);
|
||||
reg_32x4_sum_r10 = vpaddlq_s16(reg_16x8_mul_r10);
|
||||
reg_32x4_sum_r11 = vpaddlq_s16(reg_16x8_mul_r11);
|
||||
reg_32x4_sum_r12 = vpaddlq_s16(reg_16x8_mul_r12);
|
||||
reg_32x4_sum_r13 = vpaddlq_s16(reg_16x8_mul_r13);
|
||||
reg_32x4_sum_r14 = vpaddlq_s16(reg_16x8_mul_r14);
|
||||
reg_32x4_sum_r15 = vpaddlq_s16(reg_16x8_mul_r15);
|
||||
|
||||
/* r0-r7 */
|
||||
reg_32x4x2_sum_r01 = vuzpq_s32(reg_32x4_sum_r01, reg_32x4_sum_r23);
|
||||
reg_32x4x2_sum_r45 = vuzpq_s32(reg_32x4_sum_r45, reg_32x4_sum_r67);
|
||||
reg_32x4_sum_r01 = vaddq_s32(reg_32x4x2_sum_r01.val[0], reg_32x4x2_sum_r01.val[1]);
|
||||
reg_32x4_sum_r45 = vaddq_s32(reg_32x4x2_sum_r45.val[0], reg_32x4x2_sum_r45.val[1]);
|
||||
|
||||
/* r8-r15 */
|
||||
reg_32x4x2_sum_r89 = vuzpq_s32(reg_32x4_sum_r8, reg_32x4_sum_r9);
|
||||
reg_32x4x2_sum_r1011 = vuzpq_s32(reg_32x4_sum_r10, reg_32x4_sum_r11);
|
||||
reg_32x4x2_sum_r1213 = vuzpq_s32(reg_32x4_sum_r12, reg_32x4_sum_r13);
|
||||
reg_32x4x2_sum_r1415 = vuzpq_s32(reg_32x4_sum_r14, reg_32x4_sum_r15);
|
||||
|
||||
reg_32x4_sum_r89 = vaddq_s32(reg_32x4x2_sum_r89.val[0], reg_32x4x2_sum_r89.val[1]);
|
||||
reg_32x4_sum_r1011 =
|
||||
vaddq_s32(reg_32x4x2_sum_r1011.val[0], reg_32x4x2_sum_r1011.val[1]);
|
||||
reg_32x4_sum_r1213 =
|
||||
vaddq_s32(reg_32x4x2_sum_r1213.val[0], reg_32x4x2_sum_r1213.val[1]);
|
||||
reg_32x4_sum_r1415 =
|
||||
vaddq_s32(reg_32x4x2_sum_r1415.val[0], reg_32x4x2_sum_r1415.val[1]);
|
||||
|
||||
/* r0-r7 */
|
||||
reg_16x4_sum_r01_23 = vqrshrun_n_s32(reg_32x4_sum_r01, 7);
|
||||
reg_16x4_sum_r45_67 = vqrshrun_n_s32(reg_32x4_sum_r45, 7);
|
||||
|
||||
/* r8-r15 */
|
||||
reg_32x4x2_sum_r89 = vuzpq_s32(reg_32x4_sum_r89, reg_32x4_sum_r1011);
|
||||
reg_32x4x2_sum_r1213 = vuzpq_s32(reg_32x4_sum_r1213, reg_32x4_sum_r1415);
|
||||
reg_32x4_sum_r89 = vaddq_s32(reg_32x4x2_sum_r89.val[0], reg_32x4x2_sum_r89.val[1]);
|
||||
reg_32x4_sum_r1213 =
|
||||
vaddq_s32(reg_32x4x2_sum_r1213.val[0], reg_32x4x2_sum_r1213.val[1]);
|
||||
|
||||
/* r0-r7 */
|
||||
reg_16x8_sum_r0_r7 = vcombine_u16(reg_16x4_sum_r01_23, reg_16x4_sum_r45_67);
|
||||
reg_8x8_sum_r0_r7 = vqmovn_u16(reg_16x8_sum_r0_r7);
|
||||
|
||||
reg_16x4_sum_r8_r11 = vqrshrun_n_s32(reg_32x4_sum_r89, 7);
|
||||
reg_16x4_sum_r12_r15 = vqrshrun_n_s32(reg_32x4_sum_r1213, 7);
|
||||
|
||||
reg_16x8_sum_r8_r15 = vcombine_u16(reg_16x4_sum_r8_r11, reg_16x4_sum_r12_r15);
|
||||
reg_8x8_sum_r8_r15 = vqmovn_u16(reg_16x8_sum_r8_r15);
|
||||
|
||||
reg_8x16_sum_r0_r15 = vcombine_u8(reg_8x8_sum_r0_r7, reg_8x8_sum_r8_r15);
|
||||
|
||||
/* r0-r7 */
|
||||
vst1q_u8(pu1_out_pixel, reg_8x16_sum_r0_r15);
|
||||
|
||||
pu1_out_pixel += 16;
|
||||
pu1_in_pixel += (u4_src_vert_increments * (u4_in_stride << 4)) >> DOWNSCALER_Q;
|
||||
|
||||
/* Update the context for next Loop Count */
|
||||
u4_center_pixel_pos += u4_src_horz_increments;
|
||||
}
|
||||
}
|
||||
|
||||
/* Loop for the remaining height less than 16 */
|
||||
if(u4_rem_vert_loop)
|
||||
{
|
||||
u4_rem_vert_loop_by_8 = u4_rem_vert_loop >> 3;
|
||||
u4_rem_vert_loop = u4_rem_vert_loop % 8;
|
||||
|
||||
u4_height_finished = (u4_num_iterations_vertical_by_16 << 4);
|
||||
|
||||
pu1_src_j = pu1_src + ((u4_height_finished) *u4_in_stride);
|
||||
pu1_dst_j = pu1_dst + u4_height_finished;
|
||||
|
||||
u4_center_pixel_pos = u4_center_pixel_pos_src;
|
||||
|
||||
/* 8 <= remaining height < 16 */
|
||||
if(u4_rem_vert_loop_by_8)
|
||||
{
|
||||
for(i = 0; i < (WORD32) u4_blk_wd; i++)
|
||||
{
|
||||
u1_phase = get_filter_phase(u4_center_pixel_pos);
|
||||
pi1_filter_grid = pai1_filters[u1_phase];
|
||||
|
||||
u2_full_pixel_inc = u4_center_pixel_pos >> DOWNSCALER_Q;
|
||||
|
||||
pu1_in_pixel = pu1_src_j + (u2_full_pixel_inc << u1_is_chroma);
|
||||
|
||||
pu1_out_pixel = pu1_dst_j + ((i << u1_is_chroma) * u4_out_stride);
|
||||
|
||||
reg_8x8_filt_coeff_grid = vld1_s8(pi1_filter_grid);
|
||||
|
||||
for(j = u4_rem_vert_loop_by_8; j > 0; j--)
|
||||
{
|
||||
/******************************************************/
|
||||
/* This loop is going vertically in bottom direction */
|
||||
/* but the output pixels are stored in horizontal */
|
||||
/* direction in transpose manner */
|
||||
/******************************************************/
|
||||
|
||||
reg_8x8_src_r0 = vld1_u8(pu1_in_pixel);
|
||||
reg_8x8_src_r1 = vld1_u8(pu1_in_pixel + u4_in_stride);
|
||||
reg_8x8_src_r2 = vld1_u8(pu1_in_pixel + 2 * u4_in_stride);
|
||||
reg_8x8_src_r3 = vld1_u8(pu1_in_pixel + 3 * u4_in_stride);
|
||||
reg_8x8_src_r4 = vld1_u8(pu1_in_pixel + 4 * u4_in_stride);
|
||||
reg_8x8_src_r5 = vld1_u8(pu1_in_pixel + 5 * u4_in_stride);
|
||||
reg_8x8_src_r6 = vld1_u8(pu1_in_pixel + 6 * u4_in_stride);
|
||||
reg_8x8_src_r7 = vld1_u8(pu1_in_pixel + 7 * u4_in_stride);
|
||||
|
||||
reg_16x8_src_r0 = vmovl_u8(reg_8x8_src_r0);
|
||||
reg_16x8_src_r1 = vmovl_u8(reg_8x8_src_r1);
|
||||
reg_16x8_src_r2 = vmovl_u8(reg_8x8_src_r2);
|
||||
reg_16x8_src_r3 = vmovl_u8(reg_8x8_src_r3);
|
||||
reg_16x8_src_r4 = vmovl_u8(reg_8x8_src_r4);
|
||||
reg_16x8_src_r5 = vmovl_u8(reg_8x8_src_r5);
|
||||
reg_16x8_src_r6 = vmovl_u8(reg_8x8_src_r6);
|
||||
reg_16x8_src_r7 = vmovl_u8(reg_8x8_src_r7);
|
||||
reg_16x8_filt_coeff_grid = vmovl_s8(reg_8x8_filt_coeff_grid);
|
||||
|
||||
reg_16x8_mul_r0 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r0),
|
||||
reg_16x8_filt_coeff_grid);
|
||||
reg_16x8_mul_r1 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r1),
|
||||
reg_16x8_filt_coeff_grid);
|
||||
reg_16x8_mul_r2 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r2),
|
||||
reg_16x8_filt_coeff_grid);
|
||||
reg_16x8_mul_r3 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r3),
|
||||
reg_16x8_filt_coeff_grid);
|
||||
reg_16x8_mul_r4 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r4),
|
||||
reg_16x8_filt_coeff_grid);
|
||||
reg_16x8_mul_r5 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r5),
|
||||
reg_16x8_filt_coeff_grid);
|
||||
reg_16x8_mul_r6 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r6),
|
||||
reg_16x8_filt_coeff_grid);
|
||||
reg_16x8_mul_r7 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r7),
|
||||
reg_16x8_filt_coeff_grid);
|
||||
|
||||
reg_32x4_sum_r0 = vpaddlq_s16(reg_16x8_mul_r0);
|
||||
reg_32x4_sum_r1 = vpaddlq_s16(reg_16x8_mul_r1);
|
||||
reg_32x4_sum_r2 = vpaddlq_s16(reg_16x8_mul_r2);
|
||||
reg_32x4_sum_r3 = vpaddlq_s16(reg_16x8_mul_r3);
|
||||
reg_32x4_sum_r4 = vpaddlq_s16(reg_16x8_mul_r4);
|
||||
reg_32x4_sum_r5 = vpaddlq_s16(reg_16x8_mul_r5);
|
||||
reg_32x4_sum_r6 = vpaddlq_s16(reg_16x8_mul_r6);
|
||||
reg_32x4_sum_r7 = vpaddlq_s16(reg_16x8_mul_r7);
|
||||
|
||||
reg_32x4x2_sum_r01 = vuzpq_s32(reg_32x4_sum_r0, reg_32x4_sum_r1);
|
||||
reg_32x4x2_sum_r23 = vuzpq_s32(reg_32x4_sum_r2, reg_32x4_sum_r3);
|
||||
reg_32x4x2_sum_r45 = vuzpq_s32(reg_32x4_sum_r4, reg_32x4_sum_r5);
|
||||
reg_32x4x2_sum_r67 = vuzpq_s32(reg_32x4_sum_r6, reg_32x4_sum_r7);
|
||||
|
||||
reg_32x4_sum_r01 =
|
||||
vaddq_s32(reg_32x4x2_sum_r01.val[0], reg_32x4x2_sum_r01.val[1]);
|
||||
reg_32x4_sum_r23 =
|
||||
vaddq_s32(reg_32x4x2_sum_r23.val[0], reg_32x4x2_sum_r23.val[1]);
|
||||
reg_32x4_sum_r45 =
|
||||
vaddq_s32(reg_32x4x2_sum_r45.val[0], reg_32x4x2_sum_r45.val[1]);
|
||||
reg_32x4_sum_r67 =
|
||||
vaddq_s32(reg_32x4x2_sum_r67.val[0], reg_32x4x2_sum_r67.val[1]);
|
||||
|
||||
reg_32x4x2_sum_r01 = vuzpq_s32(reg_32x4_sum_r01, reg_32x4_sum_r23);
|
||||
reg_32x4x2_sum_r45 = vuzpq_s32(reg_32x4_sum_r45, reg_32x4_sum_r67);
|
||||
reg_32x4_sum_r01 =
|
||||
vaddq_s32(reg_32x4x2_sum_r01.val[0], reg_32x4x2_sum_r01.val[1]);
|
||||
reg_32x4_sum_r45 =
|
||||
vaddq_s32(reg_32x4x2_sum_r45.val[0], reg_32x4x2_sum_r45.val[1]);
|
||||
|
||||
reg_16x4_sum_r01_23 = vqrshrun_n_s32(reg_32x4_sum_r01, 7);
|
||||
reg_16x4_sum_r45_67 = vqrshrun_n_s32(reg_32x4_sum_r45, 7);
|
||||
|
||||
reg_16x8_sum_r0_r7 = vcombine_u16(reg_16x4_sum_r01_23, reg_16x4_sum_r45_67);
|
||||
reg_8x8_sum_r0_r7 = vqmovn_u16(reg_16x8_sum_r0_r7);
|
||||
|
||||
vst1_u8(pu1_out_pixel, reg_8x8_sum_r0_r7);
|
||||
|
||||
pu1_out_pixel += 8;
|
||||
pu1_in_pixel +=
|
||||
(u4_src_vert_increments * (u4_in_stride << 3)) >> DOWNSCALER_Q;
|
||||
}
|
||||
/* Update the context for next Loop Count */
|
||||
u4_center_pixel_pos += u4_src_horz_increments;
|
||||
}
|
||||
}
|
||||
|
||||
/* 1 <= remaining height < 8 */
|
||||
if(u4_rem_vert_loop)
|
||||
{
|
||||
u4_height_finished =
|
||||
((u4_num_iterations_vertical_by_16 << 4) + (u4_rem_vert_loop_by_8 << 3));
|
||||
pu1_src_j = pu1_src + u4_height_finished * u4_in_stride;
|
||||
pu1_dst_j = pu1_dst + u4_height_finished;
|
||||
|
||||
u4_center_pixel_pos = u4_center_pixel_pos_src;
|
||||
|
||||
for(i = 0; i < (WORD32) u4_blk_wd; i++)
|
||||
{
|
||||
u1_phase = get_filter_phase(u4_center_pixel_pos);
|
||||
pi1_filter_grid = pai1_filters[u1_phase];
|
||||
|
||||
u2_full_pixel_inc = u4_center_pixel_pos >> DOWNSCALER_Q;
|
||||
|
||||
pu1_in_pixel = pu1_src_j + (u2_full_pixel_inc << u1_is_chroma);
|
||||
|
||||
pu1_out_pixel = pu1_dst_j + ((i << u1_is_chroma) * u4_out_stride);
|
||||
|
||||
reg_8x8_filt_coeff_grid = vld1_s8(pi1_filter_grid);
|
||||
|
||||
for(j = u4_rem_vert_loop; j > 0; j--)
|
||||
{
|
||||
/******************************************************/
|
||||
/* This loop is going vertically in bottom direction */
|
||||
/* but the output pixels are stored in horizontal */
|
||||
/* direction in transpose manner */
|
||||
/******************************************************/
|
||||
|
||||
reg_8x8_src_r0 = vld1_u8(pu1_in_pixel);
|
||||
reg_16x8_src_r0 = vmovl_u8(reg_8x8_src_r0);
|
||||
|
||||
reg_16x8_filt_coeff_grid = vmovl_s8(reg_8x8_filt_coeff_grid);
|
||||
|
||||
reg_16x8_mul_r0 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r0),
|
||||
reg_16x8_filt_coeff_grid);
|
||||
|
||||
reg_32x4_sum_r0 = vpaddlq_s16(reg_16x8_mul_r0);
|
||||
|
||||
reg_32x4x2_sum_r01 = vuzpq_s32(reg_32x4_sum_r0, reg_32x4_zero);
|
||||
reg_32x4_sum_r01 =
|
||||
vaddq_s32(reg_32x4x2_sum_r01.val[0], reg_32x4x2_sum_r01.val[1]);
|
||||
reg_32x4x2_sum_r01 = vuzpq_s32(reg_32x4_sum_r01, reg_32x4_zero);
|
||||
reg_32x4_sum_r01 =
|
||||
vaddq_s32(reg_32x4x2_sum_r01.val[0], reg_32x4x2_sum_r01.val[1]);
|
||||
|
||||
reg_16x4_sum_r01_23 = vqrshrun_n_s32(reg_32x4_sum_r01, 7);
|
||||
|
||||
vst1_lane_u8(pu1_out_pixel, vreinterpret_u8_u16(reg_16x4_sum_r01_23), 0);
|
||||
pu1_out_pixel += 1;
|
||||
pu1_in_pixel += (u4_src_vert_increments * u4_in_stride) >> DOWNSCALER_Q;
|
||||
}
|
||||
/* Update the context for next Loop Count */
|
||||
u4_center_pixel_pos += u4_src_horz_increments;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
/* for chroma */
|
||||
else
|
||||
{
|
||||
u4_num_iterations_vertical_by_8 = u4_blk_ht >> 3;
|
||||
u4_rem_vert_loop = u4_blk_ht % 8;
|
||||
|
||||
for(j = 0; j < (WORD32) u4_num_iterations_vertical_by_8; j++)
|
||||
{
|
||||
pu1_src_j = pu1_src + ((j << 3) * u4_in_stride);
|
||||
pu1_dst_j = pu1_dst + (j << 3);
|
||||
|
||||
u4_center_pixel_pos = u4_center_pixel_pos_src;
|
||||
|
||||
for(i = 0; i < (WORD32) u4_blk_wd; i++)
|
||||
{
|
||||
u1_phase = get_filter_phase(u4_center_pixel_pos);
|
||||
pi1_filter_grid = pai1_filters[u1_phase];
|
||||
|
||||
/*Doing the Calculation for current Loop Count */
|
||||
u2_full_pixel_inc = u4_center_pixel_pos >> DOWNSCALER_Q;
|
||||
|
||||
pu1_in_pixel = pu1_src_j + (u2_full_pixel_inc << u1_is_chroma);
|
||||
|
||||
pu1_out_pixel = pu1_dst_j + ((i << u1_is_chroma) * u4_out_stride);
|
||||
|
||||
reg_8x8_filt_coeff_grid = vld1_s8(pi1_filter_grid);
|
||||
|
||||
/******************************************************/
|
||||
/* This loop is going vertically in bottom direction */
|
||||
/* but the output pixels are stored in horizontal */
|
||||
/* direction in transpose manner */
|
||||
/******************************************************/
|
||||
|
||||
reg_8x16_src_r0 = vld1q_u8(pu1_in_pixel);
|
||||
reg_8x16_src_r1 = vld1q_u8(pu1_in_pixel + u4_in_stride);
|
||||
reg_8x16_src_r2 = vld1q_u8(pu1_in_pixel + 2 * u4_in_stride);
|
||||
reg_8x16_src_r3 = vld1q_u8(pu1_in_pixel + 3 * u4_in_stride);
|
||||
reg_8x16_src_r4 = vld1q_u8(pu1_in_pixel + 4 * u4_in_stride);
|
||||
reg_8x16_src_r5 = vld1q_u8(pu1_in_pixel + 5 * u4_in_stride);
|
||||
reg_8x16_src_r6 = vld1q_u8(pu1_in_pixel + 6 * u4_in_stride);
|
||||
reg_8x16_src_r7 = vld1q_u8(pu1_in_pixel + 7 * u4_in_stride);
|
||||
|
||||
reg_8x16x2_src_r0 = vuzpq_u8(reg_8x16_src_r0, reg_8x16_src_r1);
|
||||
reg_8x16x2_src_r1 = vuzpq_u8(reg_8x16_src_r2, reg_8x16_src_r3);
|
||||
reg_8x16x2_src_r2 = vuzpq_u8(reg_8x16_src_r4, reg_8x16_src_r5);
|
||||
reg_8x16x2_src_r3 = vuzpq_u8(reg_8x16_src_r6, reg_8x16_src_r7);
|
||||
|
||||
reg_16x8_src_cb_r0 = vmovl_u8(vget_low_u8(reg_8x16x2_src_r0.val[0]));
|
||||
reg_16x8_src_cb_r1 = vmovl_u8(vget_high_u8(reg_8x16x2_src_r0.val[0]));
|
||||
reg_16x8_src_cb_r2 = vmovl_u8(vget_low_u8(reg_8x16x2_src_r1.val[0]));
|
||||
reg_16x8_src_cb_r3 = vmovl_u8(vget_high_u8(reg_8x16x2_src_r1.val[0]));
|
||||
reg_16x8_src_cb_r4 = vmovl_u8(vget_low_u8(reg_8x16x2_src_r2.val[0]));
|
||||
reg_16x8_src_cb_r5 = vmovl_u8(vget_high_u8(reg_8x16x2_src_r2.val[0]));
|
||||
reg_16x8_src_cb_r6 = vmovl_u8(vget_low_u8(reg_8x16x2_src_r3.val[0]));
|
||||
reg_16x8_src_cb_r7 = vmovl_u8(vget_high_u8(reg_8x16x2_src_r3.val[0]));
|
||||
|
||||
reg_16x8_src_cr_r0 = vmovl_u8(vget_low_u8(reg_8x16x2_src_r0.val[1]));
|
||||
reg_16x8_src_cr_r1 = vmovl_u8(vget_high_u8(reg_8x16x2_src_r0.val[1]));
|
||||
reg_16x8_src_cr_r2 = vmovl_u8(vget_low_u8(reg_8x16x2_src_r1.val[1]));
|
||||
reg_16x8_src_cr_r3 = vmovl_u8(vget_high_u8(reg_8x16x2_src_r1.val[1]));
|
||||
reg_16x8_src_cr_r4 = vmovl_u8(vget_low_u8(reg_8x16x2_src_r2.val[1]));
|
||||
reg_16x8_src_cr_r5 = vmovl_u8(vget_high_u8(reg_8x16x2_src_r2.val[1]));
|
||||
reg_16x8_src_cr_r6 = vmovl_u8(vget_low_u8(reg_8x16x2_src_r3.val[1]));
|
||||
reg_16x8_src_cr_r7 = vmovl_u8(vget_high_u8(reg_8x16x2_src_r3.val[1]));
|
||||
|
||||
reg_16x8_filt_coeff_grid = vmovl_s8(reg_8x8_filt_coeff_grid);
|
||||
|
||||
reg_16x8_mul_cb_r0 =
|
||||
vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cb_r0), reg_16x8_filt_coeff_grid);
|
||||
reg_16x8_mul_cb_r1 =
|
||||
vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cb_r1), reg_16x8_filt_coeff_grid);
|
||||
reg_16x8_mul_cb_r2 =
|
||||
vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cb_r2), reg_16x8_filt_coeff_grid);
|
||||
reg_16x8_mul_cb_r3 =
|
||||
vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cb_r3), reg_16x8_filt_coeff_grid);
|
||||
reg_16x8_mul_cb_r4 =
|
||||
vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cb_r4), reg_16x8_filt_coeff_grid);
|
||||
reg_16x8_mul_cb_r5 =
|
||||
vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cb_r5), reg_16x8_filt_coeff_grid);
|
||||
reg_16x8_mul_cb_r6 =
|
||||
vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cb_r6), reg_16x8_filt_coeff_grid);
|
||||
reg_16x8_mul_cb_r7 =
|
||||
vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cb_r7), reg_16x8_filt_coeff_grid);
|
||||
|
||||
reg_16x8_mul_cr_r0 =
|
||||
vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cr_r0), reg_16x8_filt_coeff_grid);
|
||||
reg_16x8_mul_cr_r1 =
|
||||
vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cr_r1), reg_16x8_filt_coeff_grid);
|
||||
reg_16x8_mul_cr_r2 =
|
||||
vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cr_r2), reg_16x8_filt_coeff_grid);
|
||||
reg_16x8_mul_cr_r3 =
|
||||
vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cr_r3), reg_16x8_filt_coeff_grid);
|
||||
reg_16x8_mul_cr_r4 =
|
||||
vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cr_r4), reg_16x8_filt_coeff_grid);
|
||||
reg_16x8_mul_cr_r5 =
|
||||
vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cr_r5), reg_16x8_filt_coeff_grid);
|
||||
reg_16x8_mul_cr_r6 =
|
||||
vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cr_r6), reg_16x8_filt_coeff_grid);
|
||||
reg_16x8_mul_cr_r7 =
|
||||
vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cr_r7), reg_16x8_filt_coeff_grid);
|
||||
|
||||
reg_32x4_sum_cb_r0 = vpaddlq_s16(reg_16x8_mul_cb_r0);
|
||||
reg_32x4_sum_cb_r1 = vpaddlq_s16(reg_16x8_mul_cb_r1);
|
||||
reg_32x4_sum_cb_r2 = vpaddlq_s16(reg_16x8_mul_cb_r2);
|
||||
reg_32x4_sum_cb_r3 = vpaddlq_s16(reg_16x8_mul_cb_r3);
|
||||
reg_32x4_sum_cb_r4 = vpaddlq_s16(reg_16x8_mul_cb_r4);
|
||||
reg_32x4_sum_cb_r5 = vpaddlq_s16(reg_16x8_mul_cb_r5);
|
||||
reg_32x4_sum_cb_r6 = vpaddlq_s16(reg_16x8_mul_cb_r6);
|
||||
reg_32x4_sum_cb_r7 = vpaddlq_s16(reg_16x8_mul_cb_r7);
|
||||
|
||||
reg_32x4_sum_cr_r0 = vpaddlq_s16(reg_16x8_mul_cr_r0);
|
||||
reg_32x4_sum_cr_r1 = vpaddlq_s16(reg_16x8_mul_cr_r1);
|
||||
reg_32x4_sum_cr_r2 = vpaddlq_s16(reg_16x8_mul_cr_r2);
|
||||
reg_32x4_sum_cr_r3 = vpaddlq_s16(reg_16x8_mul_cr_r3);
|
||||
reg_32x4_sum_cr_r4 = vpaddlq_s16(reg_16x8_mul_cr_r4);
|
||||
reg_32x4_sum_cr_r5 = vpaddlq_s16(reg_16x8_mul_cr_r5);
|
||||
reg_32x4_sum_cr_r6 = vpaddlq_s16(reg_16x8_mul_cr_r6);
|
||||
reg_32x4_sum_cr_r7 = vpaddlq_s16(reg_16x8_mul_cr_r7);
|
||||
|
||||
reg_32x4x2_sum_r01 = vuzpq_s32(reg_32x4_sum_cb_r0, reg_32x4_sum_cb_r1);
|
||||
reg_32x4x2_sum_r23 = vuzpq_s32(reg_32x4_sum_cb_r2, reg_32x4_sum_cb_r3);
|
||||
reg_32x4x2_sum_r45 = vuzpq_s32(reg_32x4_sum_cb_r4, reg_32x4_sum_cb_r5);
|
||||
reg_32x4x2_sum_r67 = vuzpq_s32(reg_32x4_sum_cb_r6, reg_32x4_sum_cb_r7);
|
||||
|
||||
reg_32x4_sum_cb_r01 =
|
||||
vaddq_s32(reg_32x4x2_sum_r01.val[0], reg_32x4x2_sum_r01.val[1]);
|
||||
reg_32x4_sum_cb_r23 =
|
||||
vaddq_s32(reg_32x4x2_sum_r23.val[0], reg_32x4x2_sum_r23.val[1]);
|
||||
reg_32x4_sum_cb_r45 =
|
||||
vaddq_s32(reg_32x4x2_sum_r45.val[0], reg_32x4x2_sum_r45.val[1]);
|
||||
reg_32x4_sum_cb_r67 =
|
||||
vaddq_s32(reg_32x4x2_sum_r67.val[0], reg_32x4x2_sum_r67.val[1]);
|
||||
|
||||
reg_32x4x2_sum_r01 = vuzpq_s32(reg_32x4_sum_cb_r01, reg_32x4_sum_cb_r23);
|
||||
reg_32x4x2_sum_r45 = vuzpq_s32(reg_32x4_sum_cb_r45, reg_32x4_sum_cb_r67);
|
||||
reg_32x4_sum_cb_r01 =
|
||||
vaddq_s32(reg_32x4x2_sum_r01.val[0], reg_32x4x2_sum_r01.val[1]);
|
||||
reg_32x4_sum_cb_r45 =
|
||||
vaddq_s32(reg_32x4x2_sum_r45.val[0], reg_32x4x2_sum_r45.val[1]);
|
||||
|
||||
reg_32x4x2_sum_r01 = vuzpq_s32(reg_32x4_sum_cr_r0, reg_32x4_sum_cr_r1);
|
||||
reg_32x4x2_sum_r23 = vuzpq_s32(reg_32x4_sum_cr_r2, reg_32x4_sum_cr_r3);
|
||||
reg_32x4x2_sum_r45 = vuzpq_s32(reg_32x4_sum_cr_r4, reg_32x4_sum_cr_r5);
|
||||
reg_32x4x2_sum_r67 = vuzpq_s32(reg_32x4_sum_cr_r6, reg_32x4_sum_cr_r7);
|
||||
|
||||
reg_32x4_sum_cr_r01 =
|
||||
vaddq_s32(reg_32x4x2_sum_r01.val[0], reg_32x4x2_sum_r01.val[1]);
|
||||
reg_32x4_sum_cr_r23 =
|
||||
vaddq_s32(reg_32x4x2_sum_r23.val[0], reg_32x4x2_sum_r23.val[1]);
|
||||
reg_32x4_sum_cr_r45 =
|
||||
vaddq_s32(reg_32x4x2_sum_r45.val[0], reg_32x4x2_sum_r45.val[1]);
|
||||
reg_32x4_sum_cr_r67 =
|
||||
vaddq_s32(reg_32x4x2_sum_r67.val[0], reg_32x4x2_sum_r67.val[1]);
|
||||
|
||||
reg_32x4x2_sum_r01 = vuzpq_s32(reg_32x4_sum_cr_r01, reg_32x4_sum_cr_r23);
|
||||
reg_32x4x2_sum_r45 = vuzpq_s32(reg_32x4_sum_cr_r45, reg_32x4_sum_cr_r67);
|
||||
reg_32x4_sum_cr_r01 =
|
||||
vaddq_s32(reg_32x4x2_sum_r01.val[0], reg_32x4x2_sum_r01.val[1]);
|
||||
reg_32x4_sum_cr_r45 =
|
||||
vaddq_s32(reg_32x4x2_sum_r45.val[0], reg_32x4x2_sum_r45.val[1]);
|
||||
|
||||
reg_16x4_sum_cb_r01_23 = vqrshrun_n_s32(reg_32x4_sum_cb_r01, 7);
|
||||
reg_16x4_sum_cb_r45_67 = vqrshrun_n_s32(reg_32x4_sum_cb_r45, 7);
|
||||
|
||||
reg_16x4_sum_cr_r01_23 = vqrshrun_n_s32(reg_32x4_sum_cr_r01, 7);
|
||||
reg_16x4_sum_cr_r45_67 = vqrshrun_n_s32(reg_32x4_sum_cr_r45, 7);
|
||||
|
||||
reg_16x8_sum_cb_r0_r7 =
|
||||
vcombine_u16(reg_16x4_sum_cb_r01_23, reg_16x4_sum_cb_r45_67);
|
||||
reg_16x8_sum_cr_r0_r7 =
|
||||
vcombine_u16(reg_16x4_sum_cr_r01_23, reg_16x4_sum_cr_r45_67);
|
||||
|
||||
reg_8x8_sum_cb_r0_r7 = vqmovn_u16(reg_16x8_sum_cb_r0_r7);
|
||||
reg_8x8_sum_cr_r0_r7 = vqmovn_u16(reg_16x8_sum_cr_r0_r7);
|
||||
|
||||
vst1_u8(pu1_out_pixel, reg_8x8_sum_cb_r0_r7);
|
||||
vst1_u8(pu1_out_pixel + u4_out_stride, reg_8x8_sum_cr_r0_r7);
|
||||
|
||||
pu1_out_pixel += 8;
|
||||
|
||||
pu1_in_pixel += (u4_src_vert_increments * (u4_in_stride << 3)) >> DOWNSCALER_Q;
|
||||
|
||||
/* Update the context for next Loop Count */
|
||||
u4_center_pixel_pos += u4_src_horz_increments;
|
||||
}
|
||||
}
|
||||
|
||||
/* Loop for the remaining height less than 8 */
|
||||
if(u4_rem_vert_loop)
|
||||
{
|
||||
u4_rem_vert_loop_by_4 = u4_rem_vert_loop >> 2;
|
||||
u4_rem_vert_loop = u4_rem_vert_loop % 4;
|
||||
u4_height_finished = (u4_num_iterations_vertical_by_8 << 3);
|
||||
pu1_src_j = pu1_src + ((u4_height_finished) *u4_in_stride);
|
||||
pu1_dst_j = pu1_dst + u4_height_finished;
|
||||
|
||||
u4_center_pixel_pos = u4_center_pixel_pos_src;
|
||||
|
||||
/* 4<= remaining height < 8 */
|
||||
if(u4_rem_vert_loop_by_4)
|
||||
{
|
||||
for(i = 0; i < (WORD32) u4_blk_wd; i++)
|
||||
{
|
||||
u1_phase = get_filter_phase(u4_center_pixel_pos);
|
||||
pi1_filter_grid = pai1_filters[u1_phase];
|
||||
|
||||
u2_full_pixel_inc = u4_center_pixel_pos >> DOWNSCALER_Q;
|
||||
|
||||
pu1_in_pixel = pu1_src_j + (u2_full_pixel_inc << u1_is_chroma);
|
||||
|
||||
pu1_out_pixel = pu1_dst_j + ((i << u1_is_chroma) * u4_out_stride);
|
||||
|
||||
reg_8x8_filt_coeff_grid = vld1_s8(pi1_filter_grid);
|
||||
|
||||
for(j = u4_rem_vert_loop_by_4; j > 0; j--)
|
||||
{
|
||||
/******************************************************/
|
||||
/* This loop is going vertically in bottom direction */
|
||||
/* but the output pixels are stored in horizontal */
|
||||
/* direction in transpose manner */
|
||||
/******************************************************/
|
||||
|
||||
reg_8x16_src_r0 = vld1q_u8(pu1_in_pixel);
|
||||
reg_8x16_src_r1 = vld1q_u8(pu1_in_pixel + u4_in_stride);
|
||||
reg_8x16_src_r2 = vld1q_u8(pu1_in_pixel + 2 * u4_in_stride);
|
||||
reg_8x16_src_r3 = vld1q_u8(pu1_in_pixel + 3 * u4_in_stride);
|
||||
|
||||
reg_8x16x2_src_r0 = vuzpq_u8(reg_8x16_src_r0, reg_8x16_src_r1);
|
||||
reg_8x16x2_src_r1 = vuzpq_u8(reg_8x16_src_r2, reg_8x16_src_r3);
|
||||
|
||||
reg_16x8_src_cb_r0 = vmovl_u8(vget_low_u8(reg_8x16x2_src_r0.val[0]));
|
||||
reg_16x8_src_cb_r1 = vmovl_u8(vget_high_u8(reg_8x16x2_src_r0.val[0]));
|
||||
reg_16x8_src_cb_r2 = vmovl_u8(vget_low_u8(reg_8x16x2_src_r1.val[0]));
|
||||
reg_16x8_src_cb_r3 = vmovl_u8(vget_high_u8(reg_8x16x2_src_r1.val[0]));
|
||||
|
||||
reg_16x8_src_cr_r0 = vmovl_u8(vget_low_u8(reg_8x16x2_src_r0.val[1]));
|
||||
reg_16x8_src_cr_r1 = vmovl_u8(vget_high_u8(reg_8x16x2_src_r0.val[1]));
|
||||
reg_16x8_src_cr_r2 = vmovl_u8(vget_low_u8(reg_8x16x2_src_r1.val[1]));
|
||||
reg_16x8_src_cr_r3 = vmovl_u8(vget_high_u8(reg_8x16x2_src_r1.val[1]));
|
||||
|
||||
reg_16x8_filt_coeff_grid = vmovl_s8(reg_8x8_filt_coeff_grid);
|
||||
|
||||
reg_16x8_mul_cb_r0 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cb_r0),
|
||||
reg_16x8_filt_coeff_grid);
|
||||
reg_16x8_mul_cb_r1 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cb_r1),
|
||||
reg_16x8_filt_coeff_grid);
|
||||
reg_16x8_mul_cb_r2 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cb_r2),
|
||||
reg_16x8_filt_coeff_grid);
|
||||
reg_16x8_mul_cb_r3 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cb_r3),
|
||||
reg_16x8_filt_coeff_grid);
|
||||
|
||||
reg_16x8_mul_cr_r0 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cr_r0),
|
||||
reg_16x8_filt_coeff_grid);
|
||||
reg_16x8_mul_cr_r1 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cr_r1),
|
||||
reg_16x8_filt_coeff_grid);
|
||||
reg_16x8_mul_cr_r2 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cr_r2),
|
||||
reg_16x8_filt_coeff_grid);
|
||||
reg_16x8_mul_cr_r3 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cr_r3),
|
||||
reg_16x8_filt_coeff_grid);
|
||||
|
||||
reg_32x4_sum_cb_r0 = vpaddlq_s16(reg_16x8_mul_cb_r0);
|
||||
reg_32x4_sum_cb_r1 = vpaddlq_s16(reg_16x8_mul_cb_r1);
|
||||
reg_32x4_sum_cb_r2 = vpaddlq_s16(reg_16x8_mul_cb_r2);
|
||||
reg_32x4_sum_cb_r3 = vpaddlq_s16(reg_16x8_mul_cb_r3);
|
||||
|
||||
reg_32x4_sum_cr_r0 = vpaddlq_s16(reg_16x8_mul_cr_r0);
|
||||
reg_32x4_sum_cr_r1 = vpaddlq_s16(reg_16x8_mul_cr_r1);
|
||||
reg_32x4_sum_cr_r2 = vpaddlq_s16(reg_16x8_mul_cr_r2);
|
||||
reg_32x4_sum_cr_r3 = vpaddlq_s16(reg_16x8_mul_cr_r3);
|
||||
|
||||
reg_32x4x2_sum_r01 = vuzpq_s32(reg_32x4_sum_cb_r0, reg_32x4_sum_cb_r1);
|
||||
reg_32x4x2_sum_r23 = vuzpq_s32(reg_32x4_sum_cb_r2, reg_32x4_sum_cb_r3);
|
||||
reg_32x4_sum_cb_r01 =
|
||||
vaddq_s32(reg_32x4x2_sum_r01.val[0], reg_32x4x2_sum_r01.val[1]);
|
||||
reg_32x4_sum_cb_r23 =
|
||||
vaddq_s32(reg_32x4x2_sum_r23.val[0], reg_32x4x2_sum_r23.val[1]);
|
||||
reg_32x4x2_sum_r01 = vuzpq_s32(reg_32x4_sum_cb_r01, reg_32x4_sum_cb_r23);
|
||||
reg_32x4_sum_cb_r01 =
|
||||
vaddq_s32(reg_32x4x2_sum_r01.val[0], reg_32x4x2_sum_r01.val[1]);
|
||||
|
||||
reg_32x4x2_sum_r01 = vuzpq_s32(reg_32x4_sum_cr_r0, reg_32x4_sum_cr_r1);
|
||||
reg_32x4x2_sum_r23 = vuzpq_s32(reg_32x4_sum_cr_r2, reg_32x4_sum_cr_r3);
|
||||
reg_32x4_sum_cr_r01 =
|
||||
vaddq_s32(reg_32x4x2_sum_r01.val[0], reg_32x4x2_sum_r01.val[1]);
|
||||
reg_32x4_sum_cr_r23 =
|
||||
vaddq_s32(reg_32x4x2_sum_r23.val[0], reg_32x4x2_sum_r23.val[1]);
|
||||
reg_32x4x2_sum_r01 = vuzpq_s32(reg_32x4_sum_cr_r01, reg_32x4_sum_cr_r23);
|
||||
reg_32x4_sum_cr_r01 =
|
||||
vaddq_s32(reg_32x4x2_sum_r01.val[0], reg_32x4x2_sum_r01.val[1]);
|
||||
|
||||
reg_16x4_sum_cb_r01_23 = vqrshrun_n_s32(reg_32x4_sum_cb_r01, 7);
|
||||
reg_16x4_sum_cr_r01_23 = vqrshrun_n_s32(reg_32x4_sum_cr_r01, 7);
|
||||
|
||||
reg_16x8_sum_cb_cr_r0_r3 =
|
||||
vcombine_u16(reg_16x4_sum_cb_r01_23, reg_16x4_sum_cr_r01_23);
|
||||
reg_8x8_sum_cb_cr_r0_r3 = vmovn_u16(reg_16x8_sum_cb_cr_r0_r3);
|
||||
vst1_lane_u32((uint32_t *) (pu1_out_pixel),
|
||||
vreinterpret_u32_u8(reg_8x8_sum_cb_cr_r0_r3), 0);
|
||||
vst1_lane_u32((uint32_t *) (pu1_out_pixel + u4_out_stride),
|
||||
vreinterpret_u32_u8(reg_8x8_sum_cb_cr_r0_r3), 1);
|
||||
|
||||
pu1_out_pixel += 4;
|
||||
|
||||
pu1_in_pixel +=
|
||||
(u4_src_vert_increments * (u4_in_stride << 2)) >> DOWNSCALER_Q;
|
||||
}
|
||||
/* Update the context for next Loop Count */
|
||||
u4_center_pixel_pos += u4_src_horz_increments;
|
||||
}
|
||||
}
|
||||
|
||||
/* 1<= remaining height < 4 */
|
||||
if(u4_rem_vert_loop)
|
||||
{
|
||||
u4_height_finished =
|
||||
((u4_num_iterations_vertical_by_8 << 3) + (u4_rem_vert_loop_by_4 << 2));
|
||||
pu1_src_j = pu1_src + u4_height_finished * u4_in_stride;
|
||||
pu1_dst_j = pu1_dst + u4_height_finished;
|
||||
|
||||
u4_center_pixel_pos = u4_center_pixel_pos_src;
|
||||
for(i = 0; i < (WORD32) u4_blk_wd; i++)
|
||||
{
|
||||
u1_phase = get_filter_phase(u4_center_pixel_pos);
|
||||
pi1_filter_grid = pai1_filters[u1_phase];
|
||||
|
||||
u2_full_pixel_inc = u4_center_pixel_pos >> DOWNSCALER_Q;
|
||||
|
||||
pu1_in_pixel = pu1_src_j + (u2_full_pixel_inc << u1_is_chroma);
|
||||
|
||||
pu1_out_pixel = pu1_dst_j + ((i << u1_is_chroma) * u4_out_stride);
|
||||
|
||||
reg_8x8_filt_coeff_grid = vld1_s8(pi1_filter_grid);
|
||||
|
||||
for(j = u4_rem_vert_loop; j > 0; j--)
|
||||
{
|
||||
/******************************************************/
|
||||
/* This loop is going vertically in bottom direction */
|
||||
/* but the output pixels are stored in horizontal */
|
||||
/* direction in transpose manner */
|
||||
/******************************************************/
|
||||
|
||||
reg_8x16_src_r0 = vld1q_u8(pu1_in_pixel);
|
||||
|
||||
reg_8x16x2_src_r0 = vuzpq_u8(reg_8x16_src_r0, reg_8x16_src_r0);
|
||||
reg_16x8_src_cb_r0 = vmovl_u8(vget_low_u8(reg_8x16x2_src_r0.val[0]));
|
||||
reg_16x8_src_cr_r0 = vmovl_u8(vget_low_u8(reg_8x16x2_src_r0.val[1]));
|
||||
|
||||
reg_16x8_filt_coeff_grid = vmovl_s8(reg_8x8_filt_coeff_grid);
|
||||
|
||||
reg_16x8_mul_cb_r0 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cb_r0),
|
||||
reg_16x8_filt_coeff_grid);
|
||||
reg_16x8_mul_cr_r0 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cr_r0),
|
||||
reg_16x8_filt_coeff_grid);
|
||||
|
||||
reg_32x4_sum_cb_r0 = vpaddlq_s16(reg_16x8_mul_cb_r0);
|
||||
reg_32x4_sum_cr_r0 = vpaddlq_s16(reg_16x8_mul_cr_r0);
|
||||
|
||||
reg_32x4x2_sum_r01 = vuzpq_s32(reg_32x4_sum_cb_r0, reg_32x4_sum_cr_r0);
|
||||
reg_32x4_sum_cb_cr_r0 =
|
||||
vaddq_s32(reg_32x4x2_sum_r01.val[0], reg_32x4x2_sum_r01.val[1]);
|
||||
|
||||
reg_32x4x2_sum_r01 = vuzpq_s32(reg_32x4_sum_cb_cr_r0, reg_32x4_zero);
|
||||
reg_32x4_sum_cb_cr_r0 =
|
||||
vaddq_s32(reg_32x4x2_sum_r01.val[0], reg_32x4x2_sum_r01.val[1]);
|
||||
|
||||
reg_16x4_sum_cb_cr_r0 = vqrshrun_n_s32(reg_32x4_sum_cb_cr_r0, 7);
|
||||
vst1_lane_u8((pu1_out_pixel), vreinterpret_u8_u16(reg_16x4_sum_cb_cr_r0),
|
||||
0);
|
||||
vst1_lane_u8((pu1_out_pixel + u4_out_stride),
|
||||
vreinterpret_u8_u16(reg_16x4_sum_cb_cr_r0), 2);
|
||||
|
||||
pu1_out_pixel += 1;
|
||||
|
||||
pu1_in_pixel += (u4_src_vert_increments * (u4_in_stride)) >> DOWNSCALER_Q;
|
||||
}
|
||||
|
||||
/* Update the context for next Loop Count */
|
||||
u4_center_pixel_pos += u4_src_horz_increments;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
157
encoder/arm/svc/isvce_function_selector.c
Normal file
157
encoder/arm/svc/isvce_function_selector.c
Normal file
|
|
@ -0,0 +1,157 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
* @file
|
||||
* isvce_function_selector.c
|
||||
*
|
||||
* @brief
|
||||
* Contains functions to initialize function pointers used in h264
|
||||
*
|
||||
* @author
|
||||
* Ittiam
|
||||
*
|
||||
* @par List of Functions:
|
||||
*
|
||||
* @remarks
|
||||
* None
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
/*****************************************************************************/
|
||||
/* File Includes */
|
||||
/*****************************************************************************/
|
||||
|
||||
/* System Include Files */
|
||||
#include <stdio.h>
|
||||
#include <stddef.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
/* User Include Files */
|
||||
#include "ih264_typedefs.h"
|
||||
#include "iv2.h"
|
||||
#include "ive2.h"
|
||||
#include "isvc_defs.h"
|
||||
#include "ih264_size_defs.h"
|
||||
#include "isvce_defs.h"
|
||||
#include "ih264e_error.h"
|
||||
#include "ih264e_bitstream.h"
|
||||
#include "ime_distortion_metrics.h"
|
||||
#include "ime_defs.h"
|
||||
#include "ime_structs.h"
|
||||
#include "ih264_error.h"
|
||||
#include "isvc_structs.h"
|
||||
#include "isvc_trans_quant_itrans_iquant.h"
|
||||
#include "isvc_inter_pred_filters.h"
|
||||
#include "isvc_mem_fns.h"
|
||||
#include "ih264_padding.h"
|
||||
#include "ih264_intra_pred_filters.h"
|
||||
#include "ih264_deblk_edge_filters.h"
|
||||
#include "isvc_cabac_tables.h"
|
||||
#include "isvc_macros.h"
|
||||
#include "ih264_platform_macros.h"
|
||||
#include "irc_cntrl_param.h"
|
||||
#include "irc_frame_info_collector.h"
|
||||
#include "isvce_rate_control.h"
|
||||
#include "isvce_cabac_structs.h"
|
||||
#include "isvce_structs.h"
|
||||
#include "isvce_cabac.h"
|
||||
#include "ih264e_platform_macros.h"
|
||||
#include "isvce_platform_macros.h"
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief Initialize the intra/inter/transform/deblk function pointers of
|
||||
* codec context
|
||||
*
|
||||
* @par Description: the current routine initializes the function pointers of
|
||||
* codec context basing on the architecture in use
|
||||
*
|
||||
* @param[in] ps_codec
|
||||
* Codec context pointer
|
||||
*
|
||||
* @returns none
|
||||
*
|
||||
* @remarks none
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
void isvce_init_function_ptr(void *pv_codec)
|
||||
{
|
||||
isvce_codec_t *ps_codec = (isvce_codec_t *) pv_codec;
|
||||
isvce_init_function_ptr_generic(ps_codec);
|
||||
switch(ps_codec->s_cfg.e_arch)
|
||||
{
|
||||
#if defined(ARMV8)
|
||||
case ARCH_ARM_A53:
|
||||
case ARCH_ARM_A57:
|
||||
case ARCH_ARM_V8_NEON:
|
||||
default:
|
||||
isvce_init_function_ptr_neon_av8(ps_codec);
|
||||
break;
|
||||
#elif !defined(DISABLE_NEON)
|
||||
case ARCH_ARM_A9Q:
|
||||
case ARCH_ARM_A9A:
|
||||
case ARCH_ARM_A9:
|
||||
case ARCH_ARM_A7:
|
||||
case ARCH_ARM_A5:
|
||||
case ARCH_ARM_A15:
|
||||
default:
|
||||
isvce_init_function_ptr_neon_a9q(ps_codec);
|
||||
break;
|
||||
#else
|
||||
default:
|
||||
#endif
|
||||
case ARCH_X86_GENERIC:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief Determine the architecture of the encoder executing environment
|
||||
*
|
||||
* @par Description: This routine returns the architecture of the enviro-
|
||||
* ment in which the current encoder is being tested
|
||||
*
|
||||
* @param[in] void
|
||||
*
|
||||
* @returns IV_ARCH_T
|
||||
* architecture
|
||||
*
|
||||
* @remarks none
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
IV_ARCH_T isvce_default_arch(void)
|
||||
{
|
||||
#if defined(ARMV8)
|
||||
return ARCH_ARM_V8_NEON;
|
||||
#elif !defined(DISABLE_NEON)
|
||||
return ARCH_ARM_A9Q;
|
||||
#else
|
||||
return ARCH_GENERIC;
|
||||
#endif
|
||||
}
|
||||
270
encoder/arm/svc/isvce_function_selector_a9q.c
Normal file
270
encoder/arm/svc/isvce_function_selector_a9q.c
Normal file
|
|
@ -0,0 +1,270 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
/**
|
||||
*******************************************************************************
|
||||
* @file
|
||||
* isvce_function_selector_a9q.c
|
||||
*
|
||||
* @brief
|
||||
* Contains functions to initialize function pointers of codec context
|
||||
*
|
||||
* @author
|
||||
* Ittiam
|
||||
*
|
||||
* @par List of Functions:
|
||||
* - isvce_init_function_ptr_generic
|
||||
*
|
||||
* @remarks
|
||||
* None
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
/*****************************************************************************/
|
||||
/* File Includes */
|
||||
/*****************************************************************************/
|
||||
|
||||
/* System Include files */
|
||||
#include <stdio.h>
|
||||
#include <stddef.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
/* User Include files */
|
||||
#include "ih264_typedefs.h"
|
||||
#include "iv2.h"
|
||||
#include "ive2.h"
|
||||
#include "isvc_defs.h"
|
||||
#include "ih264_size_defs.h"
|
||||
#include "isvce_defs.h"
|
||||
#include "ih264e_error.h"
|
||||
#include "ih264e_bitstream.h"
|
||||
#include "ime_distortion_metrics.h"
|
||||
#include "ime_defs.h"
|
||||
#include "ime_structs.h"
|
||||
#include "ih264_error.h"
|
||||
#include "isvc_structs.h"
|
||||
#include "isvc_trans_quant_itrans_iquant.h"
|
||||
#include "ih264_inter_pred_filters.h"
|
||||
#include "ih264_mem_fns.h"
|
||||
#include "isvc_mem_fns.h"
|
||||
#include "ih264_padding.h"
|
||||
#include "ih264_intra_pred_filters.h"
|
||||
#include "ih264_deblk_edge_filters.h"
|
||||
#include "isvc_cabac_tables.h"
|
||||
#include "irc_cntrl_param.h"
|
||||
#include "irc_frame_info_collector.h"
|
||||
#include "isvce_rate_control.h"
|
||||
#include "isvce_cabac_structs.h"
|
||||
#include "isvce_structs.h"
|
||||
#include "ih264e_platform_macros.h"
|
||||
#include "isvce_cabac.h"
|
||||
#include "isvce_core_coding.h"
|
||||
#include "ih264_cavlc_tables.h"
|
||||
#include "isvce_cavlc.h"
|
||||
#include "ih264e_intra_modes_eval.h"
|
||||
#include "ih264e_fmt_conv.h"
|
||||
#include "ih264e_half_pel.h"
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief Initialize the intra/inter/transform/deblk function pointers of
|
||||
* codec context
|
||||
*
|
||||
* @par Description: the current routine initializes the function pointers of
|
||||
* codec context basing on the architecture in use
|
||||
*
|
||||
* @param[in] ps_codec
|
||||
* Codec context pointer
|
||||
*
|
||||
* @returns none
|
||||
*
|
||||
* @remarks none
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
void isvce_init_function_ptr_neon_a9q(isvce_codec_t *ps_codec)
|
||||
{
|
||||
WORD32 i = 0;
|
||||
|
||||
/* curr proc ctxt */
|
||||
isvce_process_ctxt_t *ps_proc = NULL;
|
||||
isvce_me_ctxt_t *ps_me_ctxt = NULL;
|
||||
isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns;
|
||||
enc_loop_fxns_t *ps_enc_loop_fxns = &ps_isa_dependent_fxns->s_enc_loop_fxns;
|
||||
inter_pred_fxns_t *ps_inter_pred_fxns = &ps_isa_dependent_fxns->s_inter_pred_fxns;
|
||||
mem_fxns_t *ps_mem_fxns = &ps_isa_dependent_fxns->s_mem_fxns;
|
||||
|
||||
/* Init function pointers for intra pred leaf level functions luma
|
||||
* Intra 16x16 */
|
||||
ps_codec->apf_intra_pred_16_l[0] = ih264_intra_pred_luma_16x16_mode_vert_a9q;
|
||||
ps_codec->apf_intra_pred_16_l[1] = ih264_intra_pred_luma_16x16_mode_horz_a9q;
|
||||
ps_codec->apf_intra_pred_16_l[2] = ih264_intra_pred_luma_16x16_mode_dc_a9q;
|
||||
ps_codec->apf_intra_pred_16_l[3] = ih264_intra_pred_luma_16x16_mode_plane_a9q;
|
||||
|
||||
/* Init function pointers for intra pred leaf level functions luma
|
||||
* Intra 4x4 */
|
||||
ps_codec->apf_intra_pred_4_l[0] = ih264_intra_pred_luma_4x4_mode_vert_a9q;
|
||||
ps_codec->apf_intra_pred_4_l[1] = ih264_intra_pred_luma_4x4_mode_horz_a9q;
|
||||
ps_codec->apf_intra_pred_4_l[2] = ih264_intra_pred_luma_4x4_mode_dc_a9q;
|
||||
ps_codec->apf_intra_pred_4_l[3] = ih264_intra_pred_luma_4x4_mode_diag_dl_a9q;
|
||||
ps_codec->apf_intra_pred_4_l[4] = ih264_intra_pred_luma_4x4_mode_diag_dr_a9q;
|
||||
ps_codec->apf_intra_pred_4_l[5] = ih264_intra_pred_luma_4x4_mode_vert_r_a9q;
|
||||
ps_codec->apf_intra_pred_4_l[6] = ih264_intra_pred_luma_4x4_mode_horz_d_a9q;
|
||||
ps_codec->apf_intra_pred_4_l[7] = ih264_intra_pred_luma_4x4_mode_vert_l_a9q;
|
||||
ps_codec->apf_intra_pred_4_l[8] = ih264_intra_pred_luma_4x4_mode_horz_u_a9q;
|
||||
|
||||
/* Init function pointers for intra pred leaf level functions luma
|
||||
* Intra 8x8 */
|
||||
ps_codec->apf_intra_pred_8_l[0] = ih264_intra_pred_luma_8x8_mode_vert_a9q;
|
||||
ps_codec->apf_intra_pred_8_l[2] = ih264_intra_pred_luma_8x8_mode_dc_a9q;
|
||||
ps_codec->apf_intra_pred_8_l[3] = ih264_intra_pred_luma_8x8_mode_diag_dl_a9q;
|
||||
ps_codec->apf_intra_pred_8_l[4] = ih264_intra_pred_luma_8x8_mode_diag_dr_a9q;
|
||||
ps_codec->apf_intra_pred_8_l[5] = ih264_intra_pred_luma_8x8_mode_vert_r_a9q;
|
||||
ps_codec->apf_intra_pred_8_l[6] = ih264_intra_pred_luma_8x8_mode_horz_d_a9q;
|
||||
ps_codec->apf_intra_pred_8_l[7] = ih264_intra_pred_luma_8x8_mode_vert_l_a9q;
|
||||
ps_codec->apf_intra_pred_8_l[8] = ih264_intra_pred_luma_8x8_mode_horz_u_a9q;
|
||||
|
||||
/* Init function pointers for intra pred leaf level functions chroma
|
||||
* Intra 8x8 */
|
||||
ps_codec->apf_intra_pred_c[0] = ih264_intra_pred_chroma_8x8_mode_dc_a9q;
|
||||
ps_codec->apf_intra_pred_c[1] = ih264_intra_pred_chroma_8x8_mode_horz_a9q;
|
||||
ps_codec->apf_intra_pred_c[2] = ih264_intra_pred_chroma_8x8_mode_vert_a9q;
|
||||
ps_codec->apf_intra_pred_c[3] = ih264_intra_pred_chroma_8x8_mode_plane_a9q;
|
||||
|
||||
/* Init forward transform fn ptr */
|
||||
ps_enc_loop_fxns->apf_resi_trans_quant_8x8[0] = isvc_resi_trans_quant_8x8;
|
||||
ps_enc_loop_fxns->apf_resi_trans_quant_8x8[1] = isvc_resi_trans_quant_8x8;
|
||||
ps_enc_loop_fxns->apf_resi_trans_quant_4x4[0] = isvc_resi_trans_quant_4x4_neon;
|
||||
ps_enc_loop_fxns->apf_resi_trans_quant_4x4[1] =
|
||||
isvc_resi_trans_quant_4x4_with_residual_sub_neon;
|
||||
ps_enc_loop_fxns->apf_resi_trans_quant_chroma_4x4[0] = isvc_resi_trans_quant_chroma_4x4_neon;
|
||||
ps_enc_loop_fxns->apf_resi_trans_quant_chroma_4x4[1] =
|
||||
isvc_resi_trans_quant_chroma_4x4_with_residual_sub_neon;
|
||||
|
||||
/* Init inverse transform fn ptr */
|
||||
ps_enc_loop_fxns->apf_iquant_itrans_recon_8x8[0] = isvc_iquant_itrans_recon_8x8;
|
||||
ps_enc_loop_fxns->apf_iquant_itrans_recon_8x8[1] = isvc_iquant_itrans_recon_8x8;
|
||||
ps_enc_loop_fxns->apf_iquant_itrans_recon_8x8[2] = isvc_iquant_itrans_recon_8x8;
|
||||
|
||||
ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4[0] =
|
||||
isvc_iquant_itrans_recon_4x4_with_res_output_neon;
|
||||
ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4[1] =
|
||||
isvc_iquant_itrans_recon_4x4_with_res_accumulate_neon;
|
||||
ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4[2] = isvc_iquant_itrans_recon_4x4_neon;
|
||||
|
||||
ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4_dc[0] =
|
||||
isvc_iquant_itrans_recon_4x4_dc_with_res_output_neon;
|
||||
ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4_dc[1] =
|
||||
isvc_iquant_itrans_recon_4x4_dc_with_res_accumulate_neon;
|
||||
ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4_dc[2] = isvc_iquant_itrans_recon_4x4_dc_neon;
|
||||
|
||||
ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4[0] =
|
||||
isvc_iquant_itrans_recon_chroma_4x4_with_res_output_neon;
|
||||
ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4[1] =
|
||||
isvc_iquant_itrans_recon_chroma_4x4_with_res_accumulate_neon;
|
||||
ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4[2] =
|
||||
isvc_iquant_itrans_recon_chroma_4x4_neon;
|
||||
|
||||
ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4_dc[0] =
|
||||
isvc_iquant_itrans_recon_chroma_4x4_dc_with_res_output_neon;
|
||||
ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4_dc[1] =
|
||||
isvc_iquant_itrans_recon_chroma_4x4_dc_with_res_accumulate_neon;
|
||||
ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4_dc[2] =
|
||||
isvc_iquant_itrans_recon_chroma_4x4_dc_neon;
|
||||
|
||||
ps_enc_loop_fxns->pf_ihadamard_scaling_4x4 = ih264_ihadamard_scaling_4x4_a9;
|
||||
ps_enc_loop_fxns->pf_ihadamard_scaling_2x2_uv = ih264_ihadamard_scaling_2x2_uv_a9;
|
||||
|
||||
/* Init fn ptr luma core coding */
|
||||
ps_enc_loop_fxns->apf_luma_energy_compaction[0] = isvce_code_luma_intra_macroblock_16x16;
|
||||
ps_enc_loop_fxns->apf_luma_energy_compaction[1] = isvce_code_luma_intra_macroblock_4x4;
|
||||
ps_enc_loop_fxns->apf_luma_energy_compaction[3] = isvce_code_luma_inter_macroblock_16x16;
|
||||
|
||||
/* Init fn ptr chroma core coding */
|
||||
ps_enc_loop_fxns->apf_chroma_energy_compaction[0] = isvce_code_chroma_intra_macroblock_8x8;
|
||||
ps_enc_loop_fxns->apf_chroma_energy_compaction[1] = isvce_code_chroma_inter_macroblock_8x8;
|
||||
|
||||
/* Init fn ptr luma deblocking */
|
||||
ps_codec->pf_deblk_luma_vert_bs4 = ih264_deblk_luma_vert_bs4_a9;
|
||||
ps_codec->pf_deblk_luma_vert_bslt4 = ih264_deblk_luma_vert_bslt4_a9;
|
||||
ps_codec->pf_deblk_luma_horz_bs4 = ih264_deblk_luma_horz_bs4_a9;
|
||||
ps_codec->pf_deblk_luma_horz_bslt4 = ih264_deblk_luma_horz_bslt4_a9;
|
||||
|
||||
/* Init fn ptr chroma deblocking */
|
||||
ps_codec->pf_deblk_chroma_vert_bs4 = ih264_deblk_chroma_vert_bs4_a9;
|
||||
ps_codec->pf_deblk_chroma_vert_bslt4 = ih264_deblk_chroma_vert_bslt4_a9;
|
||||
ps_codec->pf_deblk_chroma_horz_bs4 = ih264_deblk_chroma_horz_bs4_a9;
|
||||
ps_codec->pf_deblk_chroma_horz_bslt4 = ih264_deblk_chroma_horz_bslt4_a9;
|
||||
|
||||
/* write mb syntax layer */
|
||||
ps_codec->pf_write_mb_syntax_layer[CAVLC][ISLICE] = isvce_write_islice_mb_cavlc;
|
||||
ps_codec->pf_write_mb_syntax_layer[CAVLC][PSLICE] = isvce_write_pslice_mb_cavlc;
|
||||
ps_codec->pf_write_mb_syntax_layer[CAVLC][BSLICE] = isvce_write_bslice_mb_cavlc;
|
||||
ps_codec->pf_write_mb_syntax_layer[CABAC][ISLICE] = isvce_write_islice_mb_cabac;
|
||||
ps_codec->pf_write_mb_syntax_layer[CABAC][PSLICE] = isvce_write_pslice_mb_cabac;
|
||||
|
||||
/* Padding Functions */
|
||||
ps_codec->pf_pad_top = ih264_pad_top_a9q;
|
||||
ps_codec->pf_pad_bottom = ih264_pad_bottom;
|
||||
ps_codec->pf_pad_left_luma = ih264_pad_left_luma_a9q;
|
||||
ps_codec->pf_pad_left_chroma = ih264_pad_left_chroma_a9q;
|
||||
ps_codec->pf_pad_right_luma = ih264_pad_right_luma_a9q;
|
||||
ps_codec->pf_pad_right_chroma = ih264_pad_right_chroma_a9q;
|
||||
|
||||
/* Inter pred leaf level functions */
|
||||
ps_inter_pred_fxns->pf_inter_pred_luma_copy = ih264_inter_pred_luma_copy_a9q;
|
||||
ps_inter_pred_fxns->pf_inter_pred_luma_horz = ih264_inter_pred_luma_horz_a9q;
|
||||
ps_inter_pred_fxns->pf_inter_pred_luma_vert = ih264_inter_pred_luma_vert_a9q;
|
||||
ps_inter_pred_fxns->pf_inter_pred_luma_bilinear = ih264_inter_pred_luma_bilinear_a9q;
|
||||
ps_inter_pred_fxns->pf_inter_pred_chroma = ih264_inter_pred_chroma_a9q;
|
||||
|
||||
/* sad me level functions */
|
||||
ps_codec->apf_compute_sad_16x16[0] = ime_compute_sad_16x16_a9q;
|
||||
ps_codec->apf_compute_sad_16x16[1] = ime_compute_sad_16x16_fast_a9q;
|
||||
ps_codec->pf_compute_sad_16x8 = ime_compute_sad_16x8_a9q;
|
||||
|
||||
/* memor handling operations */
|
||||
ps_mem_fxns->pf_mem_cpy = ih264_memcpy_a9q;
|
||||
ps_mem_fxns->pf_mem_cpy_mul8 = ih264_memcpy_mul_8_a9q;
|
||||
ps_mem_fxns->pf_mem_set = ih264_memset_a9q;
|
||||
ps_mem_fxns->pf_mem_set_mul8 = ih264_memset_mul_8_a9q;
|
||||
|
||||
/* sad me level functions */
|
||||
for(i = 0; i < (MAX_PROCESS_CTXT); i++)
|
||||
{
|
||||
ps_proc = &ps_codec->as_process[i];
|
||||
ps_me_ctxt = &ps_proc->s_me_ctxt;
|
||||
ps_me_ctxt->pf_ime_compute_sad_16x16[0] = ime_compute_sad_16x16_a9q;
|
||||
ps_me_ctxt->pf_ime_compute_sad_16x16[1] = ime_compute_sad_16x16_fast_a9q;
|
||||
ps_me_ctxt->pf_ime_compute_sad_16x8 = ime_compute_sad_16x8_a9q;
|
||||
ps_me_ctxt->pf_ime_compute_sad4_diamond = ime_calculate_sad4_prog_a9q;
|
||||
ps_me_ctxt->pf_ime_compute_sad3_diamond = ime_calculate_sad3_prog_a9q;
|
||||
ps_me_ctxt->pf_ime_compute_sad2_diamond = ime_calculate_sad2_prog_a9q;
|
||||
ps_me_ctxt->pf_ime_sub_pel_compute_sad_16x16 = ime_sub_pel_compute_sad_16x16_a9q;
|
||||
ps_me_ctxt->pf_ime_compute_sad_stat_luma_16x16 = ime_compute_satqd_16x16_lumainter_a9q;
|
||||
}
|
||||
|
||||
/* intra mode eval -encoder level function */
|
||||
ps_codec->pf_ih264e_evaluate_intra16x16_modes = ih264e_evaluate_intra16x16_modes_a9q;
|
||||
ps_codec->pf_ih264e_evaluate_intra_chroma_modes = ih264e_evaluate_intra_chroma_modes_a9q;
|
||||
ps_codec->pf_ih264e_evaluate_intra_4x4_modes = ih264e_evaluate_intra_4x4_modes_a9q;
|
||||
}
|
||||
278
encoder/arm/svc/isvce_function_selector_av8.c
Normal file
278
encoder/arm/svc/isvce_function_selector_av8.c
Normal file
|
|
@ -0,0 +1,278 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
/**
|
||||
*******************************************************************************
|
||||
* @file
|
||||
* isvce_function_selector_av8.c
|
||||
*
|
||||
* @brief
|
||||
* Contains functions to initialize function pointers of codec context
|
||||
*
|
||||
* @author
|
||||
* Ittiam
|
||||
*
|
||||
* @par List of Functions:
|
||||
* - isvce_init_function_ptr_generic
|
||||
*
|
||||
* @remarks
|
||||
* None
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
/*****************************************************************************/
|
||||
/* File Includes */
|
||||
/*****************************************************************************/
|
||||
|
||||
/* System Include files */
|
||||
#include <stdio.h>
|
||||
#include <stddef.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
/* User Include files */
|
||||
#include "ih264_typedefs.h"
|
||||
#include "iv2.h"
|
||||
#include "ive2.h"
|
||||
#include "isvc_defs.h"
|
||||
#include "ih264_size_defs.h"
|
||||
#include "isvce_defs.h"
|
||||
#include "ih264e_error.h"
|
||||
#include "ih264e_bitstream.h"
|
||||
#include "ime_distortion_metrics.h"
|
||||
#include "ime_defs.h"
|
||||
#include "ime_structs.h"
|
||||
#include "ih264_error.h"
|
||||
#include "isvc_structs.h"
|
||||
#include "isvc_trans_quant_itrans_iquant.h"
|
||||
#include "ih264_inter_pred_filters.h"
|
||||
#include "ih264_mem_fns.h"
|
||||
#include "isvc_mem_fns.h"
|
||||
#include "ih264_padding.h"
|
||||
#include "ih264_intra_pred_filters.h"
|
||||
#include "ih264_deblk_edge_filters.h"
|
||||
#include "isvc_cabac_tables.h"
|
||||
#include "irc_cntrl_param.h"
|
||||
#include "irc_frame_info_collector.h"
|
||||
#include "isvce_rate_control.h"
|
||||
#include "isvce_cabac_structs.h"
|
||||
#include "isvce_structs.h"
|
||||
#include "ih264e_platform_macros.h"
|
||||
#include "isvce_cabac.h"
|
||||
#include "isvce_core_coding.h"
|
||||
#include "ih264_cavlc_tables.h"
|
||||
#include "isvce_cavlc.h"
|
||||
#include "ih264e_intra_modes_eval.h"
|
||||
#include "ih264e_fmt_conv.h"
|
||||
#include "ih264e_half_pel.h"
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief Initialize the intra/inter/transform/deblk function pointers of
|
||||
* codec context
|
||||
*
|
||||
* @par Description: the current routine initializes the function pointers of
|
||||
* codec context basing on the architecture in use
|
||||
*
|
||||
* @param[in] ps_codec
|
||||
* Codec context pointer
|
||||
*
|
||||
* @returns none
|
||||
*
|
||||
* @remarks none
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
void isvce_init_function_ptr_neon_av8(isvce_codec_t *ps_codec)
|
||||
{
|
||||
WORD32 i = 0;
|
||||
|
||||
/* curr proc ctxt */
|
||||
isvce_process_ctxt_t *ps_proc = NULL;
|
||||
isvce_me_ctxt_t *ps_me_ctxt = NULL;
|
||||
isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns;
|
||||
enc_loop_fxns_t *ps_enc_loop_fxns = &ps_isa_dependent_fxns->s_enc_loop_fxns;
|
||||
inter_pred_fxns_t *ps_inter_pred_fxns = &ps_isa_dependent_fxns->s_inter_pred_fxns;
|
||||
mem_fxns_t *ps_mem_fxns = &ps_isa_dependent_fxns->s_mem_fxns;
|
||||
|
||||
/* Init function pointers for intra pred leaf level functions luma
|
||||
* Intra 16x16 */
|
||||
ps_codec->apf_intra_pred_16_l[0] = ih264_intra_pred_luma_16x16_mode_vert_av8;
|
||||
ps_codec->apf_intra_pred_16_l[1] = ih264_intra_pred_luma_16x16_mode_horz_av8;
|
||||
ps_codec->apf_intra_pred_16_l[2] = ih264_intra_pred_luma_16x16_mode_dc_av8;
|
||||
ps_codec->apf_intra_pred_16_l[3] = ih264_intra_pred_luma_16x16_mode_plane_av8;
|
||||
|
||||
/* Init function pointers for intra pred leaf level functions luma
|
||||
* Intra 4x4 */
|
||||
ps_codec->apf_intra_pred_4_l[0] = ih264_intra_pred_luma_4x4_mode_vert_av8;
|
||||
ps_codec->apf_intra_pred_4_l[1] = ih264_intra_pred_luma_4x4_mode_horz_av8;
|
||||
ps_codec->apf_intra_pred_4_l[2] = ih264_intra_pred_luma_4x4_mode_dc_av8;
|
||||
ps_codec->apf_intra_pred_4_l[3] = ih264_intra_pred_luma_4x4_mode_diag_dl_av8;
|
||||
ps_codec->apf_intra_pred_4_l[4] = ih264_intra_pred_luma_4x4_mode_diag_dr_av8;
|
||||
ps_codec->apf_intra_pred_4_l[5] = ih264_intra_pred_luma_4x4_mode_vert_r_av8;
|
||||
ps_codec->apf_intra_pred_4_l[6] = ih264_intra_pred_luma_4x4_mode_horz_d_av8;
|
||||
ps_codec->apf_intra_pred_4_l[7] = ih264_intra_pred_luma_4x4_mode_vert_l_av8;
|
||||
ps_codec->apf_intra_pred_4_l[8] = ih264_intra_pred_luma_4x4_mode_horz_u_av8;
|
||||
|
||||
/* Init function pointers for intra pred leaf level functions luma
|
||||
* Intra 8x8 */
|
||||
ps_codec->apf_intra_pred_8_l[0] = ih264_intra_pred_luma_8x8_mode_vert_av8;
|
||||
ps_codec->apf_intra_pred_8_l[2] = ih264_intra_pred_luma_8x8_mode_dc_av8;
|
||||
ps_codec->apf_intra_pred_8_l[3] = ih264_intra_pred_luma_8x8_mode_diag_dl_av8;
|
||||
ps_codec->apf_intra_pred_8_l[4] = ih264_intra_pred_luma_8x8_mode_diag_dr_av8;
|
||||
ps_codec->apf_intra_pred_8_l[5] = ih264_intra_pred_luma_8x8_mode_vert_r_av8;
|
||||
ps_codec->apf_intra_pred_8_l[6] = ih264_intra_pred_luma_8x8_mode_horz_d_av8;
|
||||
ps_codec->apf_intra_pred_8_l[7] = ih264_intra_pred_luma_8x8_mode_vert_l_av8;
|
||||
ps_codec->apf_intra_pred_8_l[8] = ih264_intra_pred_luma_8x8_mode_horz_u_av8;
|
||||
|
||||
/* Init function pointers for intra pred leaf level functions chroma
|
||||
* Intra 8x8 */
|
||||
ps_codec->apf_intra_pred_c[0] = ih264_intra_pred_chroma_8x8_mode_dc_av8;
|
||||
ps_codec->apf_intra_pred_c[1] = ih264_intra_pred_chroma_8x8_mode_horz_av8;
|
||||
ps_codec->apf_intra_pred_c[2] = ih264_intra_pred_chroma_8x8_mode_vert_av8;
|
||||
ps_codec->apf_intra_pred_c[3] = ih264_intra_pred_chroma_8x8_mode_plane_av8;
|
||||
|
||||
/* Init forward transform fn ptr */
|
||||
ps_enc_loop_fxns->apf_resi_trans_quant_8x8[0] = isvc_resi_trans_quant_8x8;
|
||||
ps_enc_loop_fxns->apf_resi_trans_quant_8x8[1] = isvc_resi_trans_quant_8x8;
|
||||
ps_enc_loop_fxns->apf_resi_trans_quant_4x4[0] = isvc_resi_trans_quant_4x4_neon;
|
||||
ps_enc_loop_fxns->apf_resi_trans_quant_4x4[1] =
|
||||
isvc_resi_trans_quant_4x4_with_residual_sub_neon;
|
||||
ps_enc_loop_fxns->apf_resi_trans_quant_chroma_4x4[0] = isvc_resi_trans_quant_chroma_4x4_neon;
|
||||
ps_enc_loop_fxns->apf_resi_trans_quant_chroma_4x4[1] =
|
||||
isvc_resi_trans_quant_chroma_4x4_with_residual_sub_neon;
|
||||
|
||||
/* Init inverse transform fn ptr */
|
||||
ps_enc_loop_fxns->apf_iquant_itrans_recon_8x8[0] = isvc_iquant_itrans_recon_8x8;
|
||||
ps_enc_loop_fxns->apf_iquant_itrans_recon_8x8[1] = isvc_iquant_itrans_recon_8x8;
|
||||
ps_enc_loop_fxns->apf_iquant_itrans_recon_8x8[2] = isvc_iquant_itrans_recon_8x8;
|
||||
|
||||
ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4[0] =
|
||||
isvc_iquant_itrans_recon_4x4_with_res_output_neon;
|
||||
ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4[1] =
|
||||
isvc_iquant_itrans_recon_4x4_with_res_accumulate_neon;
|
||||
ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4[2] = isvc_iquant_itrans_recon_4x4_neon;
|
||||
|
||||
ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4_dc[0] =
|
||||
isvc_iquant_itrans_recon_4x4_dc_with_res_output_neon;
|
||||
ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4_dc[1] =
|
||||
isvc_iquant_itrans_recon_4x4_dc_with_res_accumulate_neon;
|
||||
ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4_dc[2] = isvc_iquant_itrans_recon_4x4_dc_neon;
|
||||
|
||||
ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4[0] =
|
||||
isvc_iquant_itrans_recon_chroma_4x4_with_res_output_neon;
|
||||
ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4[1] =
|
||||
isvc_iquant_itrans_recon_chroma_4x4_with_res_accumulate_neon;
|
||||
ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4[2] =
|
||||
isvc_iquant_itrans_recon_chroma_4x4_neon;
|
||||
|
||||
ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4_dc[0] =
|
||||
isvc_iquant_itrans_recon_chroma_4x4_dc_with_res_output_neon;
|
||||
ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4_dc[1] =
|
||||
isvc_iquant_itrans_recon_chroma_4x4_dc_with_res_accumulate_neon;
|
||||
ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4_dc[2] =
|
||||
isvc_iquant_itrans_recon_chroma_4x4_dc_neon;
|
||||
|
||||
ps_enc_loop_fxns->pf_ihadamard_scaling_4x4 = ih264_ihadamard_scaling_4x4_av8;
|
||||
ps_enc_loop_fxns->pf_ihadamard_scaling_2x2_uv = ih264_ihadamard_scaling_2x2_uv_av8;
|
||||
|
||||
/* Init fn ptr luma core coding */
|
||||
ps_enc_loop_fxns->apf_luma_energy_compaction[0] = isvce_code_luma_intra_macroblock_16x16;
|
||||
ps_enc_loop_fxns->apf_luma_energy_compaction[1] = isvce_code_luma_intra_macroblock_4x4;
|
||||
ps_enc_loop_fxns->apf_luma_energy_compaction[3] = isvce_code_luma_inter_macroblock_16x16;
|
||||
|
||||
/* Init fn ptr chroma core coding */
|
||||
ps_enc_loop_fxns->apf_chroma_energy_compaction[0] = isvce_code_chroma_intra_macroblock_8x8;
|
||||
ps_enc_loop_fxns->apf_chroma_energy_compaction[1] = isvce_code_chroma_inter_macroblock_8x8;
|
||||
|
||||
/* Init fn ptr luma deblocking */
|
||||
ps_codec->pf_deblk_luma_vert_bs4 = ih264_deblk_luma_vert_bs4_av8;
|
||||
ps_codec->pf_deblk_luma_vert_bslt4 = ih264_deblk_luma_vert_bslt4_av8;
|
||||
ps_codec->pf_deblk_luma_horz_bs4 = ih264_deblk_luma_horz_bs4_av8;
|
||||
ps_codec->pf_deblk_luma_horz_bslt4 = ih264_deblk_luma_horz_bslt4_av8;
|
||||
|
||||
/* Init fn ptr chroma deblocking */
|
||||
ps_codec->pf_deblk_chroma_vert_bs4 = ih264_deblk_chroma_vert_bs4_av8;
|
||||
ps_codec->pf_deblk_chroma_vert_bslt4 = ih264_deblk_chroma_vert_bslt4_av8;
|
||||
ps_codec->pf_deblk_chroma_horz_bs4 = ih264_deblk_chroma_horz_bs4_av8;
|
||||
ps_codec->pf_deblk_chroma_horz_bslt4 = ih264_deblk_chroma_horz_bslt4_av8;
|
||||
|
||||
/* write mb syntax layer */
|
||||
ps_codec->pf_write_mb_syntax_layer[CAVLC][ISLICE] = isvce_write_islice_mb_cavlc;
|
||||
ps_codec->pf_write_mb_syntax_layer[CAVLC][PSLICE] = isvce_write_pslice_mb_cavlc;
|
||||
ps_codec->pf_write_mb_syntax_layer[CAVLC][BSLICE] = isvce_write_bslice_mb_cavlc;
|
||||
ps_codec->pf_write_mb_syntax_layer[CABAC][ISLICE] = isvce_write_islice_mb_cabac;
|
||||
ps_codec->pf_write_mb_syntax_layer[CABAC][PSLICE] = isvce_write_pslice_mb_cabac;
|
||||
|
||||
/* Padding Functions */
|
||||
ps_codec->pf_pad_top = ih264_pad_top_av8;
|
||||
ps_codec->pf_pad_bottom = ih264_pad_bottom;
|
||||
ps_codec->pf_pad_left_luma = ih264_pad_left_luma_av8;
|
||||
ps_codec->pf_pad_left_chroma = ih264_pad_left_chroma_av8;
|
||||
ps_codec->pf_pad_right_luma = ih264_pad_right_luma_av8;
|
||||
ps_codec->pf_pad_right_chroma = ih264_pad_right_chroma_av8;
|
||||
|
||||
/* Inter pred leaf level functions */
|
||||
ps_inter_pred_fxns->pf_inter_pred_luma_copy = ih264_inter_pred_luma_copy_av8;
|
||||
ps_inter_pred_fxns->pf_inter_pred_luma_horz = ih264_inter_pred_luma_horz_av8;
|
||||
ps_inter_pred_fxns->pf_inter_pred_luma_vert = ih264_inter_pred_luma_vert_av8;
|
||||
ps_inter_pred_fxns->pf_inter_pred_luma_bilinear = ih264_inter_pred_luma_bilinear;
|
||||
ps_inter_pred_fxns->pf_inter_pred_chroma = ih264_inter_pred_chroma_av8;
|
||||
|
||||
/* sad me level functions */
|
||||
ps_codec->apf_compute_sad_16x16[0] = ime_compute_sad_16x16_av8;
|
||||
ps_codec->apf_compute_sad_16x16[1] = ime_compute_sad_16x16_fast_av8;
|
||||
ps_codec->pf_compute_sad_16x8 = ime_compute_sad_16x8_av8;
|
||||
|
||||
/* memor handling operations */
|
||||
ps_mem_fxns->pf_mem_cpy = ih264_memcpy_av8;
|
||||
ps_mem_fxns->pf_mem_cpy_mul8 = ih264_memcpy_mul_8_av8;
|
||||
ps_mem_fxns->pf_mem_set = ih264_memset_av8;
|
||||
ps_mem_fxns->pf_mem_set_mul8 = ih264_memset_mul_8_av8;
|
||||
|
||||
/* sad me level functions */
|
||||
for(i = 0; i < (MAX_PROCESS_CTXT); i++)
|
||||
{
|
||||
ps_proc = &ps_codec->as_process[i];
|
||||
ps_me_ctxt = &ps_proc->s_me_ctxt;
|
||||
ps_me_ctxt->pf_ime_compute_sad_16x16[0] = ime_compute_sad_16x16_av8;
|
||||
ps_me_ctxt->pf_ime_compute_sad_16x16[1] = ime_compute_sad_16x16_fast_av8;
|
||||
ps_me_ctxt->pf_ime_compute_sad_16x8 = ime_compute_sad_16x8_av8;
|
||||
ps_me_ctxt->pf_ime_compute_sad4_diamond = ime_calculate_sad4_prog_av8;
|
||||
ps_me_ctxt->pf_ime_compute_sad3_diamond = ime_calculate_sad3_prog_av8;
|
||||
ps_me_ctxt->pf_ime_compute_sad2_diamond = ime_calculate_sad2_prog_av8;
|
||||
ps_me_ctxt->pf_ime_sub_pel_compute_sad_16x16 = ime_sub_pel_compute_sad_16x16_av8;
|
||||
ps_me_ctxt->pf_ime_compute_sad_stat_luma_16x16 = ime_compute_satqd_16x16_lumainter_av8;
|
||||
}
|
||||
|
||||
/* intra mode eval -encoder level function */
|
||||
ps_codec->pf_ih264e_evaluate_intra16x16_modes = ih264e_evaluate_intra16x16_modes_av8;
|
||||
ps_codec->pf_ih264e_evaluate_intra_chroma_modes = ih264e_evaluate_intra_chroma_modes_av8;
|
||||
ps_codec->pf_ih264e_evaluate_intra_4x4_modes = ih264e_evaluate_intra_4x4_modes;
|
||||
|
||||
/* csc */
|
||||
ps_codec->pf_ih264e_conv_420p_to_420sp = ih264e_fmt_conv_420p_to_420sp;
|
||||
ps_codec->pf_ih264e_fmt_conv_422i_to_420sp = ih264e_fmt_conv_422i_to_420sp;
|
||||
|
||||
/* Halp pel generation function - encoder level*/
|
||||
ps_codec->pf_ih264e_sixtapfilter_horz = ih264e_sixtapfilter_horz_av8;
|
||||
ps_codec->pf_ih264e_sixtap_filter_2dvh_vert = ih264e_sixtap_filter_2dvh_vert_av8;
|
||||
}
|
||||
139
encoder/arm/svc/isvce_platform_macros.h
Normal file
139
encoder/arm/svc/isvce_platform_macros.h
Normal file
|
|
@ -0,0 +1,139 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
/**
|
||||
*******************************************************************************
|
||||
* @file
|
||||
* ih264e_platform_macros.h
|
||||
*
|
||||
* @brief
|
||||
* Contains platform specific routines used for codec context intialization
|
||||
*
|
||||
* @author
|
||||
* ittiam
|
||||
*
|
||||
* @remarks
|
||||
* none
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef _ISVCE_PLATFORM_MACROS_H_
|
||||
#define _ISVCE_PLATFORM_MACROS_H_
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief Initialize the intra/inter/transform/deblk function pointers of
|
||||
* codec context
|
||||
*
|
||||
* @par Description: the current routine initializes the function pointers of
|
||||
* codec context basing on the architecture in use
|
||||
*
|
||||
* @param[in] ps_codec
|
||||
* Codec context pointer
|
||||
*
|
||||
* @returns none
|
||||
*
|
||||
* @remarks none
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
void isvce_init_function_ptr_neon_a9q(isvce_codec_t *ps_codec);
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief Initialize the intra/inter/transform/deblk function pointers of
|
||||
* codec context
|
||||
*
|
||||
* @par Description: the current routine initializes the function pointers of
|
||||
* codec context basing on the architecture in use
|
||||
*
|
||||
* @param[in] ps_codec
|
||||
* Codec context pointer
|
||||
*
|
||||
* @returns none
|
||||
*
|
||||
* @remarks none
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
void isvce_init_function_ptr_neon_av8(isvce_codec_t *ps_codec);
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief Initialize the intra/inter/transform/deblk function pointers of
|
||||
* codec context
|
||||
*
|
||||
* @par Description: the current routine initializes the function pointers of
|
||||
* codec context basing on the architecture in use
|
||||
*
|
||||
* @param[in] ps_codec
|
||||
* Codec context pointer
|
||||
*
|
||||
* @returns none
|
||||
*
|
||||
* @remarks none
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
void isvce_init_function_ptr_generic(isvce_codec_t *ps_codec);
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief Initialize the intra/inter/transform/deblk function pointers of
|
||||
* codec context
|
||||
*
|
||||
* @par Description: the current routine initializes the function pointers of
|
||||
* codec context basing on the architecture in use
|
||||
*
|
||||
* @param[in] ps_codec
|
||||
* Codec context pointer
|
||||
*
|
||||
* @returns none
|
||||
*
|
||||
* @remarks none
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
void isvce_init_function_ptr(void *pv_codec);
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief Determine the architecture of the encoder executing environment
|
||||
*
|
||||
* @par Description: This routine returns the architecture of the enviro-
|
||||
* ment in which the current encoder is being tested
|
||||
*
|
||||
* @param[in] void
|
||||
*
|
||||
* @returns IV_ARCH_T
|
||||
* architecture
|
||||
*
|
||||
* @remarks none
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
IV_ARCH_T isvce_default_arch(void);
|
||||
|
||||
#endif
|
||||
625
encoder/arm/svc/isvce_rc_utils_neon.c
Normal file
625
encoder/arm/svc/isvce_rc_utils_neon.c
Normal file
|
|
@ -0,0 +1,625 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
|
||||
/**
|
||||
******************************************************************************
|
||||
* @file isvce_svc_rc_utils_neon.c
|
||||
*
|
||||
* @brief
|
||||
* This file contains the neom SIMD version of the function which computes
|
||||
* gradient per pixel value being used in Init Qp
|
||||
*
|
||||
* @author
|
||||
* Ittiam
|
||||
*
|
||||
* @par List of Functions:
|
||||
* - isvce_get_gpp_neon()
|
||||
*
|
||||
* @remarks
|
||||
* None
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
#include "ih264_typedefs.h"
|
||||
#include "ih264_debug.h"
|
||||
#include "isvc_structs.h"
|
||||
#include "isvce_rc_utils_private_defs.h"
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* get gpp function
|
||||
*
|
||||
* @par Description:
|
||||
* computes gradient per pixel value for a given frame
|
||||
*
|
||||
* @param[in] ps_input_buf
|
||||
* pointer to yuv buffer properties
|
||||
*
|
||||
* @returns
|
||||
* calculated gpp value
|
||||
*
|
||||
* @remarks
|
||||
* none
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
DOUBLE isvce_get_gpp_neon(yuv_buf_props_t *ps_input_buf)
|
||||
{
|
||||
UWORD8 *pu1_input_buf;
|
||||
UWORD32 i, j, k;
|
||||
UWORD32 u4_width, u4_height, i4_input_stride;
|
||||
DOUBLE d_gpp_y, d_gpp_u, d_gpp_v, d_gpp;
|
||||
|
||||
uint8x8_t reg_8x8_src_r0, reg_8x8_src_r1, reg_8x8_src_r2, reg_8x8_src_r3, reg_8x8_src_r4,
|
||||
reg_8x8_src_r5, reg_8x8_src_r6, reg_8x8_src_r7, reg_8x8_src_r8;
|
||||
uint8x8_t reg_8x8_src_right_r0, reg_8x8_src_right_r1, reg_8x8_src_right_r2,
|
||||
reg_8x8_src_right_r3, reg_8x8_src_right_r4, reg_8x8_src_right_r5, reg_8x8_src_right_r6,
|
||||
reg_8x8_src_right_r7;
|
||||
uint16x8_t reg_16x8_abs_diff_y, reg_16x8_abs_diff_uv;
|
||||
uint64x2_t reg_64x2_gpp_y, reg_64x2_gpp_uv;
|
||||
|
||||
uint8x8_t reg_8x8_shuffle = {0, 2, 4, 6, 1, 3, 5, 7};
|
||||
uint16x8_t reg_16x8_and_mask_y = {0xffff, 0xffff, 0xffff, 0xffff,
|
||||
0xffff, 0xffff, 0xffff, 0x0000};
|
||||
uint16x8_t reg_16x8_and_mask_uv = {0xffff, 0xffff, 0xffff, 0x0000,
|
||||
0xffff, 0xffff, 0xffff, 0x0000};
|
||||
uint32x4_t reg_32x4_abs_diff_hadd_y = vdupq_n_u32(0);
|
||||
uint32x4_t reg_32x4_abs_diff_hadd_uv = vdupq_n_u32(0);
|
||||
|
||||
d_gpp_y = 0;
|
||||
d_gpp_u = 0;
|
||||
d_gpp_v = 0;
|
||||
d_gpp = 0;
|
||||
pu1_input_buf = (UWORD8 *) ps_input_buf->as_component_bufs[0].pv_data;
|
||||
i4_input_stride = ps_input_buf->as_component_bufs[0].i4_data_stride;
|
||||
u4_width = ps_input_buf->u4_width;
|
||||
u4_height = ps_input_buf->u4_height;
|
||||
|
||||
ASSERT((u4_width % 8) == 0);
|
||||
|
||||
/***********************************************************/
|
||||
/* For Luma - */
|
||||
/* This code block calculates gpp value for luma by adding */
|
||||
/* the absolute difference between the current pixel and */
|
||||
/* it's immediate right pixel with the absolute difference */
|
||||
/* between the current pixel and it's immediate bottom */
|
||||
/* pixel and accumulating for every pixel in the frame. */
|
||||
/***********************************************************/
|
||||
/* -8 in the checks below since right column and bottow row being used for gradients, */
|
||||
/* and last row and column are ignored for gradient computation. */
|
||||
/* Note that input is not required to be padded */
|
||||
for(i = 0; i < u4_height - 8; i += 8)
|
||||
{
|
||||
for(j = 0; j < u4_width - 8; j += 8)
|
||||
{
|
||||
reg_8x8_src_r0 = vld1_u8(pu1_input_buf + j);
|
||||
reg_8x8_src_r1 = vld1_u8(pu1_input_buf + i4_input_stride + j);
|
||||
reg_8x8_src_r2 = vld1_u8(pu1_input_buf + (i4_input_stride * 2) + j);
|
||||
reg_8x8_src_r3 = vld1_u8(pu1_input_buf + (i4_input_stride * 3) + j);
|
||||
reg_8x8_src_r4 = vld1_u8(pu1_input_buf + (i4_input_stride * 4) + j);
|
||||
reg_8x8_src_r5 = vld1_u8(pu1_input_buf + (i4_input_stride * 5) + j);
|
||||
reg_8x8_src_r6 = vld1_u8(pu1_input_buf + (i4_input_stride * 6) + j);
|
||||
reg_8x8_src_r7 = vld1_u8(pu1_input_buf + (i4_input_stride * 7) + j);
|
||||
reg_8x8_src_r8 = vld1_u8(pu1_input_buf + (i4_input_stride * 8) + j);
|
||||
|
||||
reg_8x8_src_right_r0 = vld1_u8(pu1_input_buf + j + 1);
|
||||
reg_8x8_src_right_r1 = vld1_u8(pu1_input_buf + i4_input_stride + j + 1);
|
||||
reg_8x8_src_right_r2 = vld1_u8(pu1_input_buf + (i4_input_stride * 2) + j + 1);
|
||||
reg_8x8_src_right_r3 = vld1_u8(pu1_input_buf + (i4_input_stride * 3) + j + 1);
|
||||
reg_8x8_src_right_r4 = vld1_u8(pu1_input_buf + (i4_input_stride * 4) + j + 1);
|
||||
reg_8x8_src_right_r5 = vld1_u8(pu1_input_buf + (i4_input_stride * 5) + j + 1);
|
||||
reg_8x8_src_right_r6 = vld1_u8(pu1_input_buf + (i4_input_stride * 6) + j + 1);
|
||||
reg_8x8_src_right_r7 = vld1_u8(pu1_input_buf + (i4_input_stride * 7) + j + 1);
|
||||
|
||||
reg_16x8_abs_diff_y = vabdl_u8(reg_8x8_src_r0, reg_8x8_src_r1);
|
||||
reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r1, reg_8x8_src_r2);
|
||||
reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r2, reg_8x8_src_r3);
|
||||
reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r3, reg_8x8_src_r4);
|
||||
reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r4, reg_8x8_src_r5);
|
||||
reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r5, reg_8x8_src_r6);
|
||||
reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r6, reg_8x8_src_r7);
|
||||
reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r7, reg_8x8_src_r8);
|
||||
|
||||
reg_16x8_abs_diff_y =
|
||||
vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r0, reg_8x8_src_right_r0);
|
||||
reg_16x8_abs_diff_y =
|
||||
vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r1, reg_8x8_src_right_r1);
|
||||
reg_16x8_abs_diff_y =
|
||||
vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r2, reg_8x8_src_right_r2);
|
||||
reg_16x8_abs_diff_y =
|
||||
vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r3, reg_8x8_src_right_r3);
|
||||
reg_16x8_abs_diff_y =
|
||||
vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r4, reg_8x8_src_right_r4);
|
||||
reg_16x8_abs_diff_y =
|
||||
vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r5, reg_8x8_src_right_r5);
|
||||
reg_16x8_abs_diff_y =
|
||||
vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r6, reg_8x8_src_right_r6);
|
||||
reg_16x8_abs_diff_y =
|
||||
vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r7, reg_8x8_src_right_r7);
|
||||
|
||||
reg_32x4_abs_diff_hadd_y = vpadalq_u16(reg_32x4_abs_diff_hadd_y, reg_16x8_abs_diff_y);
|
||||
}
|
||||
|
||||
/************************************************************/
|
||||
/* Remaining width - */
|
||||
/* Since Last pixel is not getting processed, remaining 7 */
|
||||
/* pixels are getting processed separately by performing */
|
||||
/* and operations with reg_16x8_and_mask_y */
|
||||
/************************************************************/
|
||||
ASSERT((u4_width - j) == 8);
|
||||
reg_8x8_src_r0 = vld1_u8(pu1_input_buf + j);
|
||||
reg_8x8_src_r1 = vld1_u8(pu1_input_buf + i4_input_stride + j);
|
||||
reg_8x8_src_r2 = vld1_u8(pu1_input_buf + (i4_input_stride * 2) + j);
|
||||
reg_8x8_src_r3 = vld1_u8(pu1_input_buf + (i4_input_stride * 3) + j);
|
||||
reg_8x8_src_r4 = vld1_u8(pu1_input_buf + (i4_input_stride * 4) + j);
|
||||
reg_8x8_src_r5 = vld1_u8(pu1_input_buf + (i4_input_stride * 5) + j);
|
||||
reg_8x8_src_r6 = vld1_u8(pu1_input_buf + (i4_input_stride * 6) + j);
|
||||
reg_8x8_src_r7 = vld1_u8(pu1_input_buf + (i4_input_stride * 7) + j);
|
||||
reg_8x8_src_r8 = vld1_u8(pu1_input_buf + (i4_input_stride * 8) + j);
|
||||
|
||||
reg_8x8_src_right_r0 = vext_u8(reg_8x8_src_r0, reg_8x8_src_r0, 1);
|
||||
reg_8x8_src_right_r1 = vext_u8(reg_8x8_src_r1, reg_8x8_src_r1, 1);
|
||||
reg_8x8_src_right_r2 = vext_u8(reg_8x8_src_r2, reg_8x8_src_r2, 1);
|
||||
reg_8x8_src_right_r3 = vext_u8(reg_8x8_src_r3, reg_8x8_src_r3, 1);
|
||||
reg_8x8_src_right_r4 = vext_u8(reg_8x8_src_r4, reg_8x8_src_r4, 1);
|
||||
reg_8x8_src_right_r5 = vext_u8(reg_8x8_src_r5, reg_8x8_src_r5, 1);
|
||||
reg_8x8_src_right_r6 = vext_u8(reg_8x8_src_r6, reg_8x8_src_r6, 1);
|
||||
reg_8x8_src_right_r7 = vext_u8(reg_8x8_src_r7, reg_8x8_src_r7, 1);
|
||||
|
||||
reg_16x8_abs_diff_y = vabdl_u8(reg_8x8_src_r0, reg_8x8_src_r1);
|
||||
reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r1, reg_8x8_src_r2);
|
||||
reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r2, reg_8x8_src_r3);
|
||||
reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r3, reg_8x8_src_r4);
|
||||
reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r4, reg_8x8_src_r5);
|
||||
reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r5, reg_8x8_src_r6);
|
||||
reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r6, reg_8x8_src_r7);
|
||||
reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r7, reg_8x8_src_r8);
|
||||
|
||||
reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r0, reg_8x8_src_right_r0);
|
||||
reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r1, reg_8x8_src_right_r1);
|
||||
reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r2, reg_8x8_src_right_r2);
|
||||
reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r3, reg_8x8_src_right_r3);
|
||||
reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r4, reg_8x8_src_right_r4);
|
||||
reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r5, reg_8x8_src_right_r5);
|
||||
reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r6, reg_8x8_src_right_r6);
|
||||
reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r7, reg_8x8_src_right_r7);
|
||||
|
||||
reg_16x8_abs_diff_y = vandq_u16(reg_16x8_abs_diff_y, reg_16x8_and_mask_y);
|
||||
|
||||
reg_32x4_abs_diff_hadd_y = vpadalq_u16(reg_32x4_abs_diff_hadd_y, reg_16x8_abs_diff_y);
|
||||
|
||||
pu1_input_buf += (i4_input_stride * 8);
|
||||
}
|
||||
|
||||
/* Loop for remaining height less than 8 */
|
||||
/* 4 <= remaining_height < 8 */
|
||||
for(k = i; k < u4_height - 4; k += 4, i += 4)
|
||||
{
|
||||
for(j = 0; j < u4_width - 8; j += 8)
|
||||
{
|
||||
reg_8x8_src_r0 = vld1_u8(pu1_input_buf + j);
|
||||
reg_8x8_src_r1 = vld1_u8(pu1_input_buf + i4_input_stride + j);
|
||||
reg_8x8_src_r2 = vld1_u8(pu1_input_buf + (i4_input_stride * 2) + j);
|
||||
reg_8x8_src_r3 = vld1_u8(pu1_input_buf + (i4_input_stride * 3) + j);
|
||||
reg_8x8_src_r4 = vld1_u8(pu1_input_buf + (i4_input_stride * 4) + j);
|
||||
reg_8x8_src_right_r0 = vld1_u8(pu1_input_buf + j + 1);
|
||||
reg_8x8_src_right_r1 = vld1_u8(pu1_input_buf + i4_input_stride + j + 1);
|
||||
reg_8x8_src_right_r2 = vld1_u8(pu1_input_buf + (i4_input_stride * 2) + j + 1);
|
||||
reg_8x8_src_right_r3 = vld1_u8(pu1_input_buf + (i4_input_stride * 3) + j + 1);
|
||||
|
||||
reg_16x8_abs_diff_y = vabdl_u8(reg_8x8_src_r0, reg_8x8_src_r1);
|
||||
reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r1, reg_8x8_src_r2);
|
||||
reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r2, reg_8x8_src_r3);
|
||||
reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r3, reg_8x8_src_r4);
|
||||
|
||||
reg_16x8_abs_diff_y =
|
||||
vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r0, reg_8x8_src_right_r0);
|
||||
reg_16x8_abs_diff_y =
|
||||
vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r1, reg_8x8_src_right_r1);
|
||||
reg_16x8_abs_diff_y =
|
||||
vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r2, reg_8x8_src_right_r2);
|
||||
reg_16x8_abs_diff_y =
|
||||
vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r3, reg_8x8_src_right_r3);
|
||||
|
||||
reg_32x4_abs_diff_hadd_y = vpadalq_u16(reg_32x4_abs_diff_hadd_y, reg_16x8_abs_diff_y);
|
||||
}
|
||||
|
||||
/************************************************************/
|
||||
/* Remaining width - */
|
||||
/* Since Last pixel is not getting processed, remaining 7 */
|
||||
/* pixels are getting processed separately by performing */
|
||||
/* and operations with reg_16x8_and_mask_y */
|
||||
/************************************************************/
|
||||
ASSERT((u4_width - j) == 8);
|
||||
reg_8x8_src_r0 = vld1_u8(pu1_input_buf + j);
|
||||
reg_8x8_src_r1 = vld1_u8(pu1_input_buf + i4_input_stride + j);
|
||||
reg_8x8_src_r2 = vld1_u8(pu1_input_buf + (i4_input_stride * 2) + j);
|
||||
reg_8x8_src_r3 = vld1_u8(pu1_input_buf + (i4_input_stride * 3) + j);
|
||||
reg_8x8_src_r4 = vld1_u8(pu1_input_buf + (i4_input_stride * 4) + j);
|
||||
|
||||
reg_8x8_src_right_r0 = vext_u8(reg_8x8_src_r0, reg_8x8_src_r0, 1);
|
||||
reg_8x8_src_right_r1 = vext_u8(reg_8x8_src_r1, reg_8x8_src_r1, 1);
|
||||
reg_8x8_src_right_r2 = vext_u8(reg_8x8_src_r2, reg_8x8_src_r2, 1);
|
||||
reg_8x8_src_right_r3 = vext_u8(reg_8x8_src_r3, reg_8x8_src_r3, 1);
|
||||
|
||||
reg_16x8_abs_diff_y = vabdl_u8(reg_8x8_src_r0, reg_8x8_src_r1);
|
||||
reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r1, reg_8x8_src_r2);
|
||||
reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r2, reg_8x8_src_r3);
|
||||
reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r3, reg_8x8_src_r4);
|
||||
|
||||
reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r0, reg_8x8_src_right_r0);
|
||||
reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r1, reg_8x8_src_right_r1);
|
||||
reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r2, reg_8x8_src_right_r2);
|
||||
reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r3, reg_8x8_src_right_r3);
|
||||
|
||||
reg_16x8_abs_diff_y = vandq_u16(reg_16x8_abs_diff_y, reg_16x8_and_mask_y);
|
||||
|
||||
reg_32x4_abs_diff_hadd_y = vpadalq_u16(reg_32x4_abs_diff_hadd_y, reg_16x8_abs_diff_y);
|
||||
|
||||
pu1_input_buf += (i4_input_stride * 4);
|
||||
}
|
||||
|
||||
/* Loop for remaining height less than 4 */
|
||||
/* 0 <= remaining_height < 4 */
|
||||
for(k = i; k < u4_height - 1; k++)
|
||||
{
|
||||
for(j = 0; j < u4_width - 8; j += 8)
|
||||
{
|
||||
reg_8x8_src_r0 = vld1_u8(pu1_input_buf + j);
|
||||
reg_8x8_src_r1 = vld1_u8(pu1_input_buf + i4_input_stride + j);
|
||||
reg_8x8_src_right_r0 = vld1_u8(pu1_input_buf + j + 1);
|
||||
|
||||
reg_16x8_abs_diff_y = vabdl_u8(reg_8x8_src_r0, reg_8x8_src_r1);
|
||||
reg_16x8_abs_diff_y =
|
||||
vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r0, reg_8x8_src_right_r0);
|
||||
|
||||
reg_32x4_abs_diff_hadd_y = vpadalq_u16(reg_32x4_abs_diff_hadd_y, reg_16x8_abs_diff_y);
|
||||
}
|
||||
|
||||
/************************************************************/
|
||||
/* Remaining width - */
|
||||
/* Since Last pixel is not getting processed, remaining 7 */
|
||||
/* pixels are getting processed separately by performing */
|
||||
/* and operations with reg_16x8_and_mask_y */
|
||||
/************************************************************/
|
||||
reg_8x8_src_r0 = vld1_u8(pu1_input_buf + j);
|
||||
reg_8x8_src_r1 = vld1_u8(pu1_input_buf + i4_input_stride + j);
|
||||
reg_8x8_src_right_r0 = vext_u8(reg_8x8_src_r0, reg_8x8_src_r0, 1);
|
||||
|
||||
reg_16x8_abs_diff_y = vabdl_u8(reg_8x8_src_r0, reg_8x8_src_r1);
|
||||
reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r0, reg_8x8_src_right_r0);
|
||||
|
||||
reg_16x8_abs_diff_y = vandq_u16(reg_16x8_abs_diff_y, reg_16x8_and_mask_y);
|
||||
|
||||
reg_32x4_abs_diff_hadd_y = vpadalq_u16(reg_32x4_abs_diff_hadd_y, reg_16x8_abs_diff_y);
|
||||
|
||||
pu1_input_buf += i4_input_stride;
|
||||
}
|
||||
|
||||
/* Pairwise add reg_32x4_abs_diff_hadd_y to get final gpp value */
|
||||
reg_64x2_gpp_y = vpaddlq_u32(reg_32x4_abs_diff_hadd_y);
|
||||
d_gpp_y = vgetq_lane_u64(reg_64x2_gpp_y, 0);
|
||||
d_gpp_y += vgetq_lane_u64(reg_64x2_gpp_y, 1);
|
||||
|
||||
pu1_input_buf = (UWORD8 *) ps_input_buf->as_component_bufs[1].pv_data;
|
||||
i4_input_stride = ps_input_buf->as_component_bufs[1].i4_data_stride;
|
||||
|
||||
/***************************************************************/
|
||||
/* For Chroma - */
|
||||
/* This code block first deinterleaves the Cb and Cr values, */
|
||||
/* calculates gpp value for both Cb and Cr separately by */
|
||||
/* adding the absolute difference between the current pixel */
|
||||
/* and it's immediate right pixel with the absolute */
|
||||
/* difference between the current pixel and it's immediate */
|
||||
/* bottom pixel and accumulating for every pixel in the frame. */
|
||||
/***************************************************************/
|
||||
for(i = 0; i < (u4_height >> 1) - 8; i += 8)
|
||||
{
|
||||
for(j = 0; j < u4_width - 8; j += 8)
|
||||
{
|
||||
reg_8x8_src_r0 = vld1_u8(pu1_input_buf + j);
|
||||
reg_8x8_src_r1 = vld1_u8(pu1_input_buf + i4_input_stride + j);
|
||||
reg_8x8_src_r2 = vld1_u8(pu1_input_buf + (i4_input_stride * 2) + j);
|
||||
reg_8x8_src_r3 = vld1_u8(pu1_input_buf + (i4_input_stride * 3) + j);
|
||||
reg_8x8_src_r4 = vld1_u8(pu1_input_buf + (i4_input_stride * 4) + j);
|
||||
reg_8x8_src_r5 = vld1_u8(pu1_input_buf + (i4_input_stride * 5) + j);
|
||||
reg_8x8_src_r6 = vld1_u8(pu1_input_buf + (i4_input_stride * 6) + j);
|
||||
reg_8x8_src_r7 = vld1_u8(pu1_input_buf + (i4_input_stride * 7) + j);
|
||||
reg_8x8_src_r8 = vld1_u8(pu1_input_buf + (i4_input_stride * 8) + j);
|
||||
|
||||
reg_8x8_src_right_r0 = vld1_u8(pu1_input_buf + j + 2);
|
||||
reg_8x8_src_right_r1 = vld1_u8(pu1_input_buf + i4_input_stride + j + 2);
|
||||
reg_8x8_src_right_r2 = vld1_u8(pu1_input_buf + (i4_input_stride * 2) + j + 2);
|
||||
reg_8x8_src_right_r3 = vld1_u8(pu1_input_buf + (i4_input_stride * 3) + j + 2);
|
||||
reg_8x8_src_right_r4 = vld1_u8(pu1_input_buf + (i4_input_stride * 4) + j + 2);
|
||||
reg_8x8_src_right_r5 = vld1_u8(pu1_input_buf + (i4_input_stride * 5) + j + 2);
|
||||
reg_8x8_src_right_r6 = vld1_u8(pu1_input_buf + (i4_input_stride * 6) + j + 2);
|
||||
reg_8x8_src_right_r7 = vld1_u8(pu1_input_buf + (i4_input_stride * 7) + j + 2);
|
||||
|
||||
/* separating u and v */
|
||||
reg_8x8_src_r0 = vtbl1_u8(reg_8x8_src_r0, reg_8x8_shuffle);
|
||||
reg_8x8_src_r1 = vtbl1_u8(reg_8x8_src_r1, reg_8x8_shuffle);
|
||||
reg_8x8_src_r2 = vtbl1_u8(reg_8x8_src_r2, reg_8x8_shuffle);
|
||||
reg_8x8_src_r3 = vtbl1_u8(reg_8x8_src_r3, reg_8x8_shuffle);
|
||||
reg_8x8_src_r4 = vtbl1_u8(reg_8x8_src_r4, reg_8x8_shuffle);
|
||||
reg_8x8_src_r5 = vtbl1_u8(reg_8x8_src_r5, reg_8x8_shuffle);
|
||||
reg_8x8_src_r6 = vtbl1_u8(reg_8x8_src_r6, reg_8x8_shuffle);
|
||||
reg_8x8_src_r7 = vtbl1_u8(reg_8x8_src_r7, reg_8x8_shuffle);
|
||||
reg_8x8_src_r8 = vtbl1_u8(reg_8x8_src_r8, reg_8x8_shuffle);
|
||||
reg_8x8_src_right_r0 = vtbl1_u8(reg_8x8_src_right_r0, reg_8x8_shuffle);
|
||||
reg_8x8_src_right_r1 = vtbl1_u8(reg_8x8_src_right_r1, reg_8x8_shuffle);
|
||||
reg_8x8_src_right_r2 = vtbl1_u8(reg_8x8_src_right_r2, reg_8x8_shuffle);
|
||||
reg_8x8_src_right_r3 = vtbl1_u8(reg_8x8_src_right_r3, reg_8x8_shuffle);
|
||||
reg_8x8_src_right_r4 = vtbl1_u8(reg_8x8_src_right_r4, reg_8x8_shuffle);
|
||||
reg_8x8_src_right_r5 = vtbl1_u8(reg_8x8_src_right_r5, reg_8x8_shuffle);
|
||||
reg_8x8_src_right_r6 = vtbl1_u8(reg_8x8_src_right_r6, reg_8x8_shuffle);
|
||||
reg_8x8_src_right_r7 = vtbl1_u8(reg_8x8_src_right_r7, reg_8x8_shuffle);
|
||||
|
||||
reg_16x8_abs_diff_uv = vabdl_u8(reg_8x8_src_r0, reg_8x8_src_r1);
|
||||
reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r1, reg_8x8_src_r2);
|
||||
reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r2, reg_8x8_src_r3);
|
||||
reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r3, reg_8x8_src_r4);
|
||||
reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r4, reg_8x8_src_r5);
|
||||
reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r5, reg_8x8_src_r6);
|
||||
reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r6, reg_8x8_src_r7);
|
||||
reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r7, reg_8x8_src_r8);
|
||||
reg_16x8_abs_diff_uv =
|
||||
vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r0, reg_8x8_src_right_r0);
|
||||
reg_16x8_abs_diff_uv =
|
||||
vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r1, reg_8x8_src_right_r1);
|
||||
reg_16x8_abs_diff_uv =
|
||||
vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r2, reg_8x8_src_right_r2);
|
||||
reg_16x8_abs_diff_uv =
|
||||
vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r3, reg_8x8_src_right_r3);
|
||||
reg_16x8_abs_diff_uv =
|
||||
vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r4, reg_8x8_src_right_r4);
|
||||
reg_16x8_abs_diff_uv =
|
||||
vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r5, reg_8x8_src_right_r5);
|
||||
reg_16x8_abs_diff_uv =
|
||||
vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r6, reg_8x8_src_right_r6);
|
||||
reg_16x8_abs_diff_uv =
|
||||
vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r7, reg_8x8_src_right_r7);
|
||||
|
||||
reg_32x4_abs_diff_hadd_uv =
|
||||
vpadalq_u16(reg_32x4_abs_diff_hadd_uv, reg_16x8_abs_diff_uv);
|
||||
}
|
||||
|
||||
/************************************************************/
|
||||
/* Remaining width - */
|
||||
/* Since Last pixel is not getting processed, remaining 6 */
|
||||
/* pixels are getting processed separately by performing */
|
||||
/* and operations with reg_16x8_and_mask_uv */
|
||||
/************************************************************/
|
||||
ASSERT((u4_width - j) == 8);
|
||||
reg_8x8_src_r0 = vld1_u8(pu1_input_buf + j);
|
||||
reg_8x8_src_r1 = vld1_u8(pu1_input_buf + i4_input_stride + j);
|
||||
reg_8x8_src_r2 = vld1_u8(pu1_input_buf + (i4_input_stride * 2) + j);
|
||||
reg_8x8_src_r3 = vld1_u8(pu1_input_buf + (i4_input_stride * 3) + j);
|
||||
reg_8x8_src_r4 = vld1_u8(pu1_input_buf + (i4_input_stride * 4) + j);
|
||||
reg_8x8_src_r5 = vld1_u8(pu1_input_buf + (i4_input_stride * 5) + j);
|
||||
reg_8x8_src_r6 = vld1_u8(pu1_input_buf + (i4_input_stride * 6) + j);
|
||||
reg_8x8_src_r7 = vld1_u8(pu1_input_buf + (i4_input_stride * 7) + j);
|
||||
reg_8x8_src_r8 = vld1_u8(pu1_input_buf + (i4_input_stride * 8) + j);
|
||||
reg_8x8_src_right_r0 = vext_u8(reg_8x8_src_r0, reg_8x8_src_r0, 2);
|
||||
reg_8x8_src_right_r1 = vext_u8(reg_8x8_src_r1, reg_8x8_src_r1, 2);
|
||||
reg_8x8_src_right_r2 = vext_u8(reg_8x8_src_r2, reg_8x8_src_r2, 2);
|
||||
reg_8x8_src_right_r3 = vext_u8(reg_8x8_src_r3, reg_8x8_src_r3, 2);
|
||||
reg_8x8_src_right_r4 = vext_u8(reg_8x8_src_r4, reg_8x8_src_r4, 2);
|
||||
reg_8x8_src_right_r5 = vext_u8(reg_8x8_src_r5, reg_8x8_src_r5, 2);
|
||||
reg_8x8_src_right_r6 = vext_u8(reg_8x8_src_r6, reg_8x8_src_r6, 2);
|
||||
reg_8x8_src_right_r7 = vext_u8(reg_8x8_src_r7, reg_8x8_src_r7, 2);
|
||||
|
||||
/* separating u and v */
|
||||
reg_8x8_src_r0 = vtbl1_u8(reg_8x8_src_r0, reg_8x8_shuffle);
|
||||
reg_8x8_src_r1 = vtbl1_u8(reg_8x8_src_r1, reg_8x8_shuffle);
|
||||
reg_8x8_src_r2 = vtbl1_u8(reg_8x8_src_r2, reg_8x8_shuffle);
|
||||
reg_8x8_src_r3 = vtbl1_u8(reg_8x8_src_r3, reg_8x8_shuffle);
|
||||
reg_8x8_src_r4 = vtbl1_u8(reg_8x8_src_r4, reg_8x8_shuffle);
|
||||
reg_8x8_src_r5 = vtbl1_u8(reg_8x8_src_r5, reg_8x8_shuffle);
|
||||
reg_8x8_src_r6 = vtbl1_u8(reg_8x8_src_r6, reg_8x8_shuffle);
|
||||
reg_8x8_src_r7 = vtbl1_u8(reg_8x8_src_r7, reg_8x8_shuffle);
|
||||
reg_8x8_src_r8 = vtbl1_u8(reg_8x8_src_r8, reg_8x8_shuffle);
|
||||
reg_8x8_src_right_r0 = vtbl1_u8(reg_8x8_src_right_r0, reg_8x8_shuffle);
|
||||
reg_8x8_src_right_r1 = vtbl1_u8(reg_8x8_src_right_r1, reg_8x8_shuffle);
|
||||
reg_8x8_src_right_r2 = vtbl1_u8(reg_8x8_src_right_r2, reg_8x8_shuffle);
|
||||
reg_8x8_src_right_r3 = vtbl1_u8(reg_8x8_src_right_r3, reg_8x8_shuffle);
|
||||
reg_8x8_src_right_r4 = vtbl1_u8(reg_8x8_src_right_r4, reg_8x8_shuffle);
|
||||
reg_8x8_src_right_r5 = vtbl1_u8(reg_8x8_src_right_r5, reg_8x8_shuffle);
|
||||
reg_8x8_src_right_r6 = vtbl1_u8(reg_8x8_src_right_r6, reg_8x8_shuffle);
|
||||
reg_8x8_src_right_r7 = vtbl1_u8(reg_8x8_src_right_r7, reg_8x8_shuffle);
|
||||
|
||||
reg_16x8_abs_diff_uv = vabdl_u8(reg_8x8_src_r0, reg_8x8_src_r1);
|
||||
reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r1, reg_8x8_src_r2);
|
||||
reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r2, reg_8x8_src_r3);
|
||||
reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r3, reg_8x8_src_r4);
|
||||
reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r4, reg_8x8_src_r5);
|
||||
reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r5, reg_8x8_src_r6);
|
||||
reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r6, reg_8x8_src_r7);
|
||||
reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r7, reg_8x8_src_r8);
|
||||
reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r0, reg_8x8_src_right_r0);
|
||||
reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r1, reg_8x8_src_right_r1);
|
||||
reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r2, reg_8x8_src_right_r2);
|
||||
reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r3, reg_8x8_src_right_r3);
|
||||
reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r4, reg_8x8_src_right_r4);
|
||||
reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r5, reg_8x8_src_right_r5);
|
||||
reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r6, reg_8x8_src_right_r6);
|
||||
reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r7, reg_8x8_src_right_r7);
|
||||
|
||||
reg_16x8_abs_diff_uv = vandq_u16(reg_16x8_abs_diff_uv, reg_16x8_and_mask_uv);
|
||||
|
||||
reg_32x4_abs_diff_hadd_uv = vpadalq_u16(reg_32x4_abs_diff_hadd_uv, reg_16x8_abs_diff_uv);
|
||||
|
||||
pu1_input_buf += (i4_input_stride * 8);
|
||||
}
|
||||
|
||||
/* Loop for remaining height less than 8 */
|
||||
/* 4 <= remaining_height < 8 */
|
||||
for(k = i; k < (u4_height >> 1) - 4; k += 4, i += 4)
|
||||
{
|
||||
for(j = 0; j < u4_width - 8; j += 8)
|
||||
{
|
||||
reg_8x8_src_r0 = vld1_u8(pu1_input_buf + j);
|
||||
reg_8x8_src_r1 = vld1_u8(pu1_input_buf + i4_input_stride + j);
|
||||
reg_8x8_src_r2 = vld1_u8(pu1_input_buf + (i4_input_stride * 2) + j);
|
||||
reg_8x8_src_r3 = vld1_u8(pu1_input_buf + (i4_input_stride * 3) + j);
|
||||
reg_8x8_src_r4 = vld1_u8(pu1_input_buf + (i4_input_stride * 4) + j);
|
||||
reg_8x8_src_right_r0 = vld1_u8(pu1_input_buf + j + 2);
|
||||
reg_8x8_src_right_r1 = vld1_u8(pu1_input_buf + i4_input_stride + j + 2);
|
||||
reg_8x8_src_right_r2 = vld1_u8(pu1_input_buf + (i4_input_stride * 2) + j + 2);
|
||||
reg_8x8_src_right_r3 = vld1_u8(pu1_input_buf + (i4_input_stride * 3) + j + 2);
|
||||
|
||||
/* separating u and v */
|
||||
reg_8x8_src_r0 = vtbl1_u8(reg_8x8_src_r0, reg_8x8_shuffle);
|
||||
reg_8x8_src_r1 = vtbl1_u8(reg_8x8_src_r1, reg_8x8_shuffle);
|
||||
reg_8x8_src_r2 = vtbl1_u8(reg_8x8_src_r2, reg_8x8_shuffle);
|
||||
reg_8x8_src_r3 = vtbl1_u8(reg_8x8_src_r3, reg_8x8_shuffle);
|
||||
reg_8x8_src_r4 = vtbl1_u8(reg_8x8_src_r4, reg_8x8_shuffle);
|
||||
reg_8x8_src_right_r0 = vtbl1_u8(reg_8x8_src_right_r0, reg_8x8_shuffle);
|
||||
reg_8x8_src_right_r1 = vtbl1_u8(reg_8x8_src_right_r1, reg_8x8_shuffle);
|
||||
reg_8x8_src_right_r2 = vtbl1_u8(reg_8x8_src_right_r2, reg_8x8_shuffle);
|
||||
reg_8x8_src_right_r3 = vtbl1_u8(reg_8x8_src_right_r3, reg_8x8_shuffle);
|
||||
|
||||
reg_16x8_abs_diff_uv = vabdl_u8(reg_8x8_src_r0, reg_8x8_src_r1);
|
||||
reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r1, reg_8x8_src_r2);
|
||||
reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r2, reg_8x8_src_r3);
|
||||
reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r3, reg_8x8_src_r4);
|
||||
reg_16x8_abs_diff_uv =
|
||||
vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r0, reg_8x8_src_right_r0);
|
||||
reg_16x8_abs_diff_uv =
|
||||
vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r1, reg_8x8_src_right_r1);
|
||||
reg_16x8_abs_diff_uv =
|
||||
vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r2, reg_8x8_src_right_r2);
|
||||
reg_16x8_abs_diff_uv =
|
||||
vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r3, reg_8x8_src_right_r3);
|
||||
|
||||
reg_32x4_abs_diff_hadd_uv =
|
||||
vpadalq_u16(reg_32x4_abs_diff_hadd_uv, reg_16x8_abs_diff_uv);
|
||||
}
|
||||
|
||||
/************************************************************/
|
||||
/* Remaining width - */
|
||||
/* Since Last pixel is not getting processed, remaining 6 */
|
||||
/* pixels are getting processed separately by performing */
|
||||
/* and operations with reg_16x8_and_mask_uv */
|
||||
/************************************************************/
|
||||
reg_8x8_src_r0 = vld1_u8(pu1_input_buf + j);
|
||||
reg_8x8_src_r1 = vld1_u8(pu1_input_buf + i4_input_stride + j);
|
||||
reg_8x8_src_r2 = vld1_u8(pu1_input_buf + (i4_input_stride * 2) + j);
|
||||
reg_8x8_src_r3 = vld1_u8(pu1_input_buf + (i4_input_stride * 3) + j);
|
||||
reg_8x8_src_r4 = vld1_u8(pu1_input_buf + (i4_input_stride * 4) + j);
|
||||
reg_8x8_src_right_r0 = vext_u8(reg_8x8_src_r0, reg_8x8_src_r0, 2);
|
||||
reg_8x8_src_right_r1 = vext_u8(reg_8x8_src_r1, reg_8x8_src_r1, 2);
|
||||
reg_8x8_src_right_r2 = vext_u8(reg_8x8_src_r2, reg_8x8_src_r2, 2);
|
||||
reg_8x8_src_right_r3 = vext_u8(reg_8x8_src_r3, reg_8x8_src_r3, 2);
|
||||
|
||||
/* separating u and v */
|
||||
reg_8x8_src_r0 = vtbl1_u8(reg_8x8_src_r0, reg_8x8_shuffle);
|
||||
reg_8x8_src_r1 = vtbl1_u8(reg_8x8_src_r1, reg_8x8_shuffle);
|
||||
reg_8x8_src_r2 = vtbl1_u8(reg_8x8_src_r2, reg_8x8_shuffle);
|
||||
reg_8x8_src_r3 = vtbl1_u8(reg_8x8_src_r3, reg_8x8_shuffle);
|
||||
reg_8x8_src_r4 = vtbl1_u8(reg_8x8_src_r4, reg_8x8_shuffle);
|
||||
reg_8x8_src_right_r0 = vtbl1_u8(reg_8x8_src_right_r0, reg_8x8_shuffle);
|
||||
reg_8x8_src_right_r1 = vtbl1_u8(reg_8x8_src_right_r1, reg_8x8_shuffle);
|
||||
reg_8x8_src_right_r2 = vtbl1_u8(reg_8x8_src_right_r2, reg_8x8_shuffle);
|
||||
reg_8x8_src_right_r3 = vtbl1_u8(reg_8x8_src_right_r3, reg_8x8_shuffle);
|
||||
|
||||
reg_16x8_abs_diff_uv = vabdl_u8(reg_8x8_src_r0, reg_8x8_src_r1);
|
||||
reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r1, reg_8x8_src_r2);
|
||||
reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r2, reg_8x8_src_r3);
|
||||
reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r3, reg_8x8_src_r4);
|
||||
reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r0, reg_8x8_src_right_r0);
|
||||
reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r1, reg_8x8_src_right_r1);
|
||||
reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r2, reg_8x8_src_right_r2);
|
||||
reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r3, reg_8x8_src_right_r3);
|
||||
|
||||
reg_16x8_abs_diff_uv = vandq_u16(reg_16x8_abs_diff_uv, reg_16x8_and_mask_uv);
|
||||
|
||||
reg_32x4_abs_diff_hadd_uv = vpadalq_u16(reg_32x4_abs_diff_hadd_uv, reg_16x8_abs_diff_uv);
|
||||
|
||||
pu1_input_buf += (i4_input_stride * 4);
|
||||
}
|
||||
|
||||
/* Loop for remaining height less than 4 */
|
||||
/* 0 <= remaining_height < 4 */
|
||||
for(k = i; k < (u4_height >> 1) - 1; k++)
|
||||
{
|
||||
for(j = 0; j < u4_width - 8; j += 8)
|
||||
{
|
||||
reg_8x8_src_r0 = vld1_u8(pu1_input_buf + j);
|
||||
reg_8x8_src_r1 = vld1_u8(pu1_input_buf + i4_input_stride + j);
|
||||
reg_8x8_src_right_r0 = vld1_u8(pu1_input_buf + j + 2);
|
||||
|
||||
/* separating u and v */
|
||||
reg_8x8_src_r0 = vtbl1_u8(reg_8x8_src_r0, reg_8x8_shuffle);
|
||||
reg_8x8_src_r1 = vtbl1_u8(reg_8x8_src_r1, reg_8x8_shuffle);
|
||||
reg_8x8_src_right_r0 = vtbl1_u8(reg_8x8_src_right_r0, reg_8x8_shuffle);
|
||||
|
||||
reg_16x8_abs_diff_uv = vabdl_u8(reg_8x8_src_r0, reg_8x8_src_r1);
|
||||
reg_16x8_abs_diff_uv =
|
||||
vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r0, reg_8x8_src_right_r0);
|
||||
|
||||
reg_32x4_abs_diff_hadd_uv =
|
||||
vpadalq_u16(reg_32x4_abs_diff_hadd_uv, reg_16x8_abs_diff_uv);
|
||||
}
|
||||
|
||||
/************************************************************/
|
||||
/* Remaining width - */
|
||||
/* Since Last pixel is not getting processed, remaining 6 */
|
||||
/* pixels are getting processed separately by performing */
|
||||
/* and operations with reg_16x8_and_mask_uv */
|
||||
/************************************************************/
|
||||
reg_8x8_src_r0 = vld1_u8(pu1_input_buf + j);
|
||||
reg_8x8_src_r1 = vld1_u8(pu1_input_buf + i4_input_stride + j);
|
||||
reg_8x8_src_right_r0 = vext_u8(reg_8x8_src_r0, reg_8x8_src_r0, 2);
|
||||
|
||||
/* separating u and v */
|
||||
reg_8x8_src_r0 = vtbl1_u8(reg_8x8_src_r0, reg_8x8_shuffle);
|
||||
reg_8x8_src_r1 = vtbl1_u8(reg_8x8_src_r1, reg_8x8_shuffle);
|
||||
reg_8x8_src_right_r0 = vtbl1_u8(reg_8x8_src_right_r0, reg_8x8_shuffle);
|
||||
|
||||
reg_16x8_abs_diff_uv = vabdl_u8(reg_8x8_src_r0, reg_8x8_src_r1);
|
||||
reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r0, reg_8x8_src_right_r0);
|
||||
|
||||
reg_16x8_abs_diff_uv = vandq_u16(reg_16x8_abs_diff_uv, reg_16x8_and_mask_uv);
|
||||
|
||||
reg_32x4_abs_diff_hadd_uv = vpadalq_u16(reg_32x4_abs_diff_hadd_uv, reg_16x8_abs_diff_uv);
|
||||
|
||||
pu1_input_buf += i4_input_stride;
|
||||
}
|
||||
|
||||
/* Pairwise add u4_abd_hadd_uv to get final gpp_u and gpp_v value */
|
||||
reg_64x2_gpp_uv = vpaddlq_u32(reg_32x4_abs_diff_hadd_uv);
|
||||
d_gpp_u = vgetq_lane_u64(reg_64x2_gpp_uv, 0);
|
||||
d_gpp_v = vgetq_lane_u64(reg_64x2_gpp_uv, 1);
|
||||
|
||||
d_gpp_y /= (u4_width * u4_height);
|
||||
d_gpp_u /= ((u4_width / 2) * (u4_height / 2));
|
||||
d_gpp_v /= ((u4_width / 2) * (u4_height / 2));
|
||||
|
||||
d_gpp = (DOUBLE) ((WT_LUMA_GPP * d_gpp_y) + d_gpp_u + d_gpp_v) / WT_TOTAL_GPP;
|
||||
|
||||
return d_gpp;
|
||||
}
|
||||
666
encoder/arm/svc/isvce_residual_pred_neon.c
Normal file
666
encoder/arm/svc/isvce_residual_pred_neon.c
Normal file
|
|
@ -0,0 +1,666 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @file
|
||||
* isvce_svc_residual_pred_neon.c
|
||||
*
|
||||
* @brief
|
||||
* Contains functions
|
||||
* used for SVC residual
|
||||
* prediction
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
#include <arm_neon.h>
|
||||
|
||||
#include "ih264_typedefs.h"
|
||||
#include "ih264_macros.h"
|
||||
#include "ih264_size_defs.h"
|
||||
#include "isvc_macros.h"
|
||||
#include "isvc_structs.h"
|
||||
|
||||
void isvce_luma_residual_sampler_2x_neon(coordinates_t *ps_ref_array_positions,
|
||||
coordinates_t *ps_ref_array_phases,
|
||||
buffer_container_t *ps_inp, buffer_container_t *ps_out,
|
||||
buffer_container_t *ps_scratch, UWORD32 u4_ref_nnz,
|
||||
UWORD8 u1_ref_tx_size)
|
||||
{
|
||||
WORD16 *pi2_inp_data = (WORD16 *) ps_inp->pv_data;
|
||||
WORD16 *pi2_out_res = (WORD16 *) ps_out->pv_data;
|
||||
WORD32 i4_inp_data_stride = ps_inp->i4_data_stride;
|
||||
WORD32 i4_out_res_stride = ps_out->i4_data_stride;
|
||||
WORD16 *pi2_refarray_buffer = (WORD16 *) ps_scratch->pv_data;
|
||||
WORD32 i4_blk_ctr;
|
||||
|
||||
UNUSED(ps_ref_array_positions);
|
||||
UNUSED(ps_ref_array_phases);
|
||||
|
||||
/* For 2x scaling, offsets always point to TL pixel outside MB */
|
||||
/* Hence, refTransBlkIdc will be different and since phase */
|
||||
/* for first refArray pos for horiz filtering samples > 8, */
|
||||
/* first row and first column from the refArray is never used */
|
||||
pi2_inp_data += 1 + i4_inp_data_stride;
|
||||
|
||||
if((u1_ref_tx_size) && (0 != u4_ref_nnz))
|
||||
{
|
||||
WORD16 *pi2_ref_data_byte;
|
||||
WORD32 *pi4_ref_array;
|
||||
WORD32 i4_i, i4_j;
|
||||
|
||||
/* ----------- Horizontal Interpolation ---------------- */
|
||||
int16x8_t i2_coeff_add_16x8_r0;
|
||||
int16x8_t i2_coeff_16x8_r0_0, i2_coeff_16x8_r0_1;
|
||||
int16x8_t i2_coeff_16x8_sl_r0_0, i2_coeff_16x8_sl_r0_1;
|
||||
int16x8_t result_16x8_r0_0, result_16x8_r0_1;
|
||||
|
||||
int16x8_t i2_coeff_add_16x8_r1;
|
||||
int16x8_t i2_coeff_16x8_r1_0, i2_coeff_16x8_r1_1;
|
||||
int16x8_t i2_coeff_16x8_sl_r1_0, i2_coeff_16x8_sl_r1_1;
|
||||
int16x8_t result_16x8_r1_0, result_16x8_r1_1;
|
||||
int16x8x2_t final_result_16x8x2_r0, final_result_16x8x2_r1;
|
||||
|
||||
pi2_ref_data_byte = pi2_inp_data;
|
||||
|
||||
/* ----------- Horizontal Interpolation ---------------- */
|
||||
pi4_ref_array = (WORD32 *) pi2_refarray_buffer;
|
||||
|
||||
for(i4_i = 0; i4_i < BLK8x8SIZE; i4_i += 2)
|
||||
{
|
||||
i2_coeff_16x8_r0_0 = vld1q_s16(pi2_ref_data_byte);
|
||||
i2_coeff_16x8_r0_1 = vld1q_s16((pi2_ref_data_byte + 1));
|
||||
|
||||
i2_coeff_16x8_r1_0 = vld1q_s16(pi2_ref_data_byte + i4_inp_data_stride);
|
||||
i2_coeff_16x8_r1_1 = vld1q_s16((pi2_ref_data_byte + i4_inp_data_stride + 1));
|
||||
|
||||
i2_coeff_add_16x8_r0 = vaddq_s16(i2_coeff_16x8_r0_0, i2_coeff_16x8_r0_1);
|
||||
i2_coeff_16x8_sl_r0_0 = vshlq_n_s16(i2_coeff_16x8_r0_0, 1);
|
||||
i2_coeff_16x8_sl_r0_1 = vshlq_n_s16(i2_coeff_16x8_r0_1, 1);
|
||||
|
||||
i2_coeff_add_16x8_r1 = vaddq_s16(i2_coeff_16x8_r1_0, i2_coeff_16x8_r1_1);
|
||||
i2_coeff_16x8_sl_r1_0 = vshlq_n_s16(i2_coeff_16x8_r1_0, 1);
|
||||
i2_coeff_16x8_sl_r1_1 = vshlq_n_s16(i2_coeff_16x8_r1_1, 1);
|
||||
|
||||
result_16x8_r0_0 = vaddq_s16(i2_coeff_16x8_sl_r0_0, i2_coeff_add_16x8_r0);
|
||||
result_16x8_r0_1 = vaddq_s16(i2_coeff_16x8_sl_r0_1, i2_coeff_add_16x8_r0);
|
||||
|
||||
result_16x8_r1_0 = vaddq_s16(i2_coeff_16x8_sl_r1_0, i2_coeff_add_16x8_r1);
|
||||
result_16x8_r1_1 = vaddq_s16(i2_coeff_16x8_sl_r1_1, i2_coeff_add_16x8_r1);
|
||||
|
||||
final_result_16x8x2_r0 = vzipq_s16(result_16x8_r0_0, result_16x8_r0_1);
|
||||
final_result_16x8x2_r1 = vzipq_s16(result_16x8_r1_0, result_16x8_r1_1);
|
||||
|
||||
vst1q_s32(pi4_ref_array + 1, vmovl_s16(vget_low_s16(final_result_16x8x2_r0.val[0])));
|
||||
vst1q_s32(pi4_ref_array + 5, vmovl_s16(vget_high_s16(final_result_16x8x2_r0.val[0])));
|
||||
vst1q_s32(pi4_ref_array + 9, vmovl_s16(vget_low_s16(final_result_16x8x2_r0.val[1])));
|
||||
vst1q_s32(pi4_ref_array + 13, vmovl_s16(vget_high_s16(final_result_16x8x2_r0.val[1])));
|
||||
|
||||
pi4_ref_array[0] = pi2_ref_data_byte[0] << 2;
|
||||
pi4_ref_array[15] = pi2_ref_data_byte[7] << 2;
|
||||
pi4_ref_array += 16;
|
||||
pi2_ref_data_byte += i4_inp_data_stride;
|
||||
|
||||
vst1q_s32(pi4_ref_array + 1, vmovl_s16(vget_low_s16(final_result_16x8x2_r1.val[0])));
|
||||
vst1q_s32(pi4_ref_array + 5, vmovl_s16(vget_high_s16(final_result_16x8x2_r1.val[0])));
|
||||
vst1q_s32(pi4_ref_array + 9, vmovl_s16(vget_low_s16(final_result_16x8x2_r1.val[1])));
|
||||
vst1q_s32(pi4_ref_array + 13, vmovl_s16(vget_high_s16(final_result_16x8x2_r1.val[1])));
|
||||
|
||||
pi4_ref_array[0] = pi2_ref_data_byte[0] << 2;
|
||||
pi4_ref_array[15] = pi2_ref_data_byte[7] << 2;
|
||||
pi4_ref_array += 16;
|
||||
/* vertical loop updates */
|
||||
pi2_ref_data_byte = pi2_inp_data + ((i4_i + 2) * i4_inp_data_stride);
|
||||
}
|
||||
|
||||
/* ----------- Vertical Interpolation ---------------- */
|
||||
pi4_ref_array = (WORD32 *) pi2_refarray_buffer;
|
||||
{
|
||||
WORD32 *pi4_ref_array_temp;
|
||||
WORD16 *pi2_out;
|
||||
int32x4_t i4_horz_samp_32x4_r1_1, i4_horz_samp_32x4_r1_2, i4_horz_samp_32x4_r1_3,
|
||||
i4_horz_samp_32x4_r1_4;
|
||||
int32x4_t i4_horz_samp_32x4_r2_1, i4_horz_samp_32x4_r2_2, i4_horz_samp_32x4_r2_3,
|
||||
i4_horz_samp_32x4_r2_4;
|
||||
|
||||
int32x4_t i4_horz_res_32x4_r1_1, i4_horz_res_32x4_r1_2, i4_horz_res_32x4_r1_3,
|
||||
i4_horz_res_32x4_r1_4;
|
||||
int32x4_t i4_horz_res_32x4_r2_1, i4_horz_res_32x4_r2_2, i4_horz_res_32x4_r2_3,
|
||||
i4_horz_res_32x4_r2_4;
|
||||
int32x4_t i4_horz_res_32x4_r3_1, i4_horz_res_32x4_r3_2, i4_horz_res_32x4_r3_3,
|
||||
i4_horz_res_32x4_r3_4;
|
||||
int32x4_t horz_add_32x4_r2_1, horz_add_32x4_r2_2, horz_add_32x4_r2_3,
|
||||
horz_add_32x4_r2_4;
|
||||
|
||||
int16x8_t comb_horz_16x8_1, comb_horz_16x8_2, comb_horz_16x8_3, comb_horz_16x8_4;
|
||||
pi4_ref_array_temp = pi4_ref_array;
|
||||
pi2_out = pi2_out_res;
|
||||
|
||||
i4_horz_samp_32x4_r1_1 = vld1q_s32(pi4_ref_array_temp);
|
||||
i4_horz_samp_32x4_r1_2 = vld1q_s32(pi4_ref_array_temp + 4);
|
||||
i4_horz_samp_32x4_r1_3 = vld1q_s32(pi4_ref_array_temp + 8);
|
||||
i4_horz_samp_32x4_r1_4 = vld1q_s32(pi4_ref_array_temp + 12);
|
||||
|
||||
/* populate the first inter sample */
|
||||
i4_horz_res_32x4_r1_1 = vrshrq_n_s32(i4_horz_samp_32x4_r1_1, 2);
|
||||
i4_horz_res_32x4_r1_2 = vrshrq_n_s32(i4_horz_samp_32x4_r1_2, 2);
|
||||
i4_horz_res_32x4_r1_3 = vrshrq_n_s32(i4_horz_samp_32x4_r1_3, 2);
|
||||
i4_horz_res_32x4_r1_4 = vrshrq_n_s32(i4_horz_samp_32x4_r1_4, 2);
|
||||
|
||||
comb_horz_16x8_1 =
|
||||
vcombine_s16(vmovn_s32(i4_horz_res_32x4_r1_1), vmovn_s32(i4_horz_res_32x4_r1_2));
|
||||
comb_horz_16x8_2 =
|
||||
vcombine_s16(vmovn_s32(i4_horz_res_32x4_r1_3), vmovn_s32(i4_horz_res_32x4_r1_4));
|
||||
vst1q_s16(pi2_out, comb_horz_16x8_1);
|
||||
vst1q_s16(pi2_out + 8, comb_horz_16x8_2);
|
||||
|
||||
pi2_out += i4_out_res_stride;
|
||||
|
||||
for(i4_j = 0; i4_j < 14; i4_j += 2)
|
||||
{
|
||||
pi4_ref_array_temp += MB_SIZE;
|
||||
i4_horz_samp_32x4_r2_1 = vld1q_s32(pi4_ref_array_temp);
|
||||
i4_horz_samp_32x4_r2_2 = vld1q_s32(pi4_ref_array_temp + 4);
|
||||
i4_horz_samp_32x4_r2_3 = vld1q_s32(pi4_ref_array_temp + 8);
|
||||
i4_horz_samp_32x4_r2_4 = vld1q_s32(pi4_ref_array_temp + 12);
|
||||
|
||||
horz_add_32x4_r2_1 = vaddq_s32(i4_horz_samp_32x4_r1_1, i4_horz_samp_32x4_r2_1);
|
||||
horz_add_32x4_r2_2 = vaddq_s32(i4_horz_samp_32x4_r1_2, i4_horz_samp_32x4_r2_2);
|
||||
horz_add_32x4_r2_3 = vaddq_s32(i4_horz_samp_32x4_r1_3, i4_horz_samp_32x4_r2_3);
|
||||
horz_add_32x4_r2_4 = vaddq_s32(i4_horz_samp_32x4_r1_4, i4_horz_samp_32x4_r2_4);
|
||||
|
||||
i4_horz_res_32x4_r2_1 =
|
||||
vaddq_s32(vshlq_n_s32(i4_horz_samp_32x4_r1_1, 1), horz_add_32x4_r2_1);
|
||||
i4_horz_res_32x4_r2_2 =
|
||||
vaddq_s32(vshlq_n_s32(i4_horz_samp_32x4_r1_2, 1), horz_add_32x4_r2_2);
|
||||
i4_horz_res_32x4_r2_3 =
|
||||
vaddq_s32(vshlq_n_s32(i4_horz_samp_32x4_r1_3, 1), horz_add_32x4_r2_3);
|
||||
i4_horz_res_32x4_r2_4 =
|
||||
vaddq_s32(vshlq_n_s32(i4_horz_samp_32x4_r1_4, 1), horz_add_32x4_r2_4);
|
||||
|
||||
i4_horz_res_32x4_r3_1 =
|
||||
vaddq_s32(vshlq_n_s32(i4_horz_samp_32x4_r2_1, 1), horz_add_32x4_r2_1);
|
||||
i4_horz_res_32x4_r3_2 =
|
||||
vaddq_s32(vshlq_n_s32(i4_horz_samp_32x4_r2_2, 1), horz_add_32x4_r2_2);
|
||||
i4_horz_res_32x4_r3_3 =
|
||||
vaddq_s32(vshlq_n_s32(i4_horz_samp_32x4_r2_3, 1), horz_add_32x4_r2_3);
|
||||
i4_horz_res_32x4_r3_4 =
|
||||
vaddq_s32(vshlq_n_s32(i4_horz_samp_32x4_r2_4, 1), horz_add_32x4_r2_4);
|
||||
|
||||
i4_horz_res_32x4_r2_1 = vrshrq_n_s32(i4_horz_res_32x4_r2_1, 4);
|
||||
i4_horz_res_32x4_r2_2 = vrshrq_n_s32(i4_horz_res_32x4_r2_2, 4);
|
||||
i4_horz_res_32x4_r2_3 = vrshrq_n_s32(i4_horz_res_32x4_r2_3, 4);
|
||||
i4_horz_res_32x4_r2_4 = vrshrq_n_s32(i4_horz_res_32x4_r2_4, 4);
|
||||
|
||||
i4_horz_res_32x4_r3_1 = vrshrq_n_s32(i4_horz_res_32x4_r3_1, 4);
|
||||
i4_horz_res_32x4_r3_2 = vrshrq_n_s32(i4_horz_res_32x4_r3_2, 4);
|
||||
i4_horz_res_32x4_r3_3 = vrshrq_n_s32(i4_horz_res_32x4_r3_3, 4);
|
||||
i4_horz_res_32x4_r3_4 = vrshrq_n_s32(i4_horz_res_32x4_r3_4, 4);
|
||||
|
||||
comb_horz_16x8_1 = vcombine_s16(vmovn_s32(i4_horz_res_32x4_r2_1),
|
||||
vmovn_s32(i4_horz_res_32x4_r2_2));
|
||||
comb_horz_16x8_2 = vcombine_s16(vmovn_s32(i4_horz_res_32x4_r2_3),
|
||||
vmovn_s32(i4_horz_res_32x4_r2_4));
|
||||
|
||||
comb_horz_16x8_3 = vcombine_s16(vmovn_s32(i4_horz_res_32x4_r3_1),
|
||||
vmovn_s32(i4_horz_res_32x4_r3_2));
|
||||
comb_horz_16x8_4 = vcombine_s16(vmovn_s32(i4_horz_res_32x4_r3_3),
|
||||
vmovn_s32(i4_horz_res_32x4_r3_4));
|
||||
|
||||
/* populate 2 samples based on current coeffs */
|
||||
vst1q_s16(pi2_out, comb_horz_16x8_1);
|
||||
vst1q_s16(pi2_out + 8, comb_horz_16x8_2);
|
||||
pi2_out += i4_out_res_stride;
|
||||
|
||||
vst1q_s16(pi2_out, comb_horz_16x8_3);
|
||||
vst1q_s16(pi2_out + 8, comb_horz_16x8_4);
|
||||
pi2_out += i4_out_res_stride;
|
||||
|
||||
/* store the coeff 2 to coeff 1 */
|
||||
/* (used in next iteration) */
|
||||
i4_horz_samp_32x4_r1_1 = i4_horz_samp_32x4_r2_1;
|
||||
i4_horz_samp_32x4_r1_2 = i4_horz_samp_32x4_r2_2;
|
||||
i4_horz_samp_32x4_r1_3 = i4_horz_samp_32x4_r2_3;
|
||||
i4_horz_samp_32x4_r1_4 = i4_horz_samp_32x4_r2_4;
|
||||
}
|
||||
|
||||
/* populate the first inter sample */
|
||||
i4_horz_res_32x4_r1_1 = vrshrq_n_s32(i4_horz_samp_32x4_r1_1, 2);
|
||||
i4_horz_res_32x4_r1_2 = vrshrq_n_s32(i4_horz_samp_32x4_r1_2, 2);
|
||||
i4_horz_res_32x4_r1_3 = vrshrq_n_s32(i4_horz_samp_32x4_r1_3, 2);
|
||||
i4_horz_res_32x4_r1_4 = vrshrq_n_s32(i4_horz_samp_32x4_r1_4, 2);
|
||||
|
||||
comb_horz_16x8_1 =
|
||||
vcombine_s16(vmovn_s32(i4_horz_res_32x4_r1_1), vmovn_s32(i4_horz_res_32x4_r1_2));
|
||||
comb_horz_16x8_2 =
|
||||
vcombine_s16(vmovn_s32(i4_horz_res_32x4_r1_3), vmovn_s32(i4_horz_res_32x4_r1_4));
|
||||
vst1q_s16(pi2_out, comb_horz_16x8_1);
|
||||
vst1q_s16(pi2_out + 8, comb_horz_16x8_2);
|
||||
|
||||
/* horizontal loop updates */
|
||||
pi4_ref_array++;
|
||||
pi2_out_res++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* ----------------------------------------------------------------- */
|
||||
/* LOOP over number of blocks */
|
||||
/* ----------------------------------------------------------------- */
|
||||
for(i4_blk_ctr = 0; i4_blk_ctr < 4; i4_blk_ctr++)
|
||||
{
|
||||
/* if reference layer is not coded then no processing */
|
||||
if(0 != (u4_ref_nnz & 0x1))
|
||||
{
|
||||
int16x8_t i2_coeff1_16x8_r0_0, i2_coeff1_16x8_r0_1;
|
||||
int16x8_t i2_coeff1_16x8_r1_0, i2_coeff1_16x8_r1_1;
|
||||
int16x8_t i2_coeff1_16x8_r2_0, i2_coeff1_16x8_r2_1;
|
||||
int16x8_t i2_coeff1_16x8_r3_0, i2_coeff1_16x8_r3_1;
|
||||
int16x8_t i2_add_16x8_r0_0;
|
||||
int16x8_t i2_add_16x8_r1_0;
|
||||
int16x8_t i2_add_16x8_r2_0;
|
||||
int16x8_t i2_add_16x8_r3_0;
|
||||
int16x8_t i2_res_16x8_r0_0, i2_res_16x8_r0_1;
|
||||
int16x8_t i2_res_16x8_r1_0, i2_res_16x8_r1_1;
|
||||
int16x8_t i2_res_16x8_r2_0, i2_res_16x8_r2_1;
|
||||
int16x8_t i2_res_16x8_r3_0, i2_res_16x8_r3_1;
|
||||
int16x4_t i4_horz_samp_16x4_r0_1, i4_horz_samp_16x4_r0_2;
|
||||
int16x4_t i4_horz_samp_16x4_r1_1, i4_horz_samp_16x4_r1_2;
|
||||
int16x4_t i4_horz_samp_16x4_r2_1, i4_horz_samp_16x4_r2_2;
|
||||
int16x4_t i4_horz_samp_16x4_r3_1, i4_horz_samp_16x4_r3_2;
|
||||
int32x4_t i4_horz_samp_32x4_r0_1, i4_horz_samp_32x4_r0_2;
|
||||
int32x4_t i4_horz_samp_32x4_r1_1, i4_horz_samp_32x4_r1_2;
|
||||
int32x4_t i4_horz_samp_32x4_r2_1, i4_horz_samp_32x4_r2_2;
|
||||
int32x4_t i4_horz_samp_32x4_r3_1, i4_horz_samp_32x4_r3_2;
|
||||
int32x4_t i4_horz_add_32x4_r1_1, i4_horz_add_32x4_r1_2;
|
||||
int32x4_t i4_horz_add_32x4_r2_1, i4_horz_add_32x4_r2_2;
|
||||
int32x4_t i4_horz_add_32x4_r3_1, i4_horz_add_32x4_r3_2;
|
||||
int16x4_t i4_horz_res_16x4_r0_1, i4_horz_res_16x4_r0_2;
|
||||
int16x4_t i4_horz_res_16x4_r1_1, i4_horz_res_16x4_r1_2;
|
||||
int16x4_t i4_horz_res_16x4_r2_1, i4_horz_res_16x4_r2_2;
|
||||
int16x4_t i4_horz_res_16x4_r3_1, i4_horz_res_16x4_r3_2;
|
||||
int16x4_t i4_horz_res_16x4_r4_1, i4_horz_res_16x4_r4_2;
|
||||
int16x4_t i4_horz_res_16x4_r5_1, i4_horz_res_16x4_r5_2;
|
||||
int16x4_t i4_horz_res_16x4_r6_1, i4_horz_res_16x4_r6_2;
|
||||
int16x4_t i4_horz_res_16x4_r7_1, i4_horz_res_16x4_r7_2;
|
||||
int32x4_t i4_horz_res_32x4_r1_1, i4_horz_res_32x4_r1_2;
|
||||
int32x4_t i4_horz_res_32x4_r2_1, i4_horz_res_32x4_r2_2;
|
||||
int32x4_t i4_horz_res_32x4_r3_1, i4_horz_res_32x4_r3_2;
|
||||
int32x4_t i4_horz_res_32x4_r4_1, i4_horz_res_32x4_r4_2;
|
||||
int32x4_t i4_horz_res_32x4_r5_1, i4_horz_res_32x4_r5_2;
|
||||
int32x4_t i4_horz_res_32x4_r6_1, i4_horz_res_32x4_r6_2;
|
||||
int16x8x2_t ti2_res_16x8x2_r0, ti2_res_16x8x2_r1;
|
||||
int16x8x2_t ti2_res_16x8x2_r2, ti2_res_16x8x2_r3;
|
||||
|
||||
i2_coeff1_16x8_r0_0 = vld1q_s16(pi2_inp_data);
|
||||
i2_coeff1_16x8_r1_0 = vld1q_s16(pi2_inp_data + i4_inp_data_stride);
|
||||
i2_coeff1_16x8_r2_0 = vld1q_s16(pi2_inp_data + (i4_inp_data_stride << 1));
|
||||
i2_coeff1_16x8_r3_0 =
|
||||
vld1q_s16(pi2_inp_data + (i4_inp_data_stride << 1) + i4_inp_data_stride);
|
||||
|
||||
i2_coeff1_16x8_r0_1 = vextq_s16(i2_coeff1_16x8_r0_0, i2_coeff1_16x8_r0_0, 1);
|
||||
i2_coeff1_16x8_r1_1 = vextq_s16(i2_coeff1_16x8_r1_0, i2_coeff1_16x8_r1_0, 1);
|
||||
i2_coeff1_16x8_r2_1 = vextq_s16(i2_coeff1_16x8_r2_0, i2_coeff1_16x8_r2_0, 1);
|
||||
i2_coeff1_16x8_r3_1 = vextq_s16(i2_coeff1_16x8_r3_0, i2_coeff1_16x8_r3_0, 1);
|
||||
|
||||
i2_add_16x8_r0_0 = vaddq_s16(i2_coeff1_16x8_r0_1, i2_coeff1_16x8_r0_0);
|
||||
i2_add_16x8_r1_0 = vaddq_s16(i2_coeff1_16x8_r1_1, i2_coeff1_16x8_r1_0);
|
||||
i2_add_16x8_r2_0 = vaddq_s16(i2_coeff1_16x8_r2_1, i2_coeff1_16x8_r2_0);
|
||||
i2_add_16x8_r3_0 = vaddq_s16(i2_coeff1_16x8_r3_1, i2_coeff1_16x8_r3_0);
|
||||
|
||||
i2_coeff1_16x8_r0_0 = vshlq_n_s16(i2_coeff1_16x8_r0_0, 1);
|
||||
i2_coeff1_16x8_r1_0 = vshlq_n_s16(i2_coeff1_16x8_r1_0, 1);
|
||||
i2_coeff1_16x8_r2_0 = vshlq_n_s16(i2_coeff1_16x8_r2_0, 1);
|
||||
i2_coeff1_16x8_r3_0 = vshlq_n_s16(i2_coeff1_16x8_r3_0, 1);
|
||||
|
||||
i2_coeff1_16x8_r0_1 = vshlq_n_s16(i2_coeff1_16x8_r0_1, 1);
|
||||
i2_coeff1_16x8_r1_1 = vshlq_n_s16(i2_coeff1_16x8_r1_1, 1);
|
||||
i2_coeff1_16x8_r2_1 = vshlq_n_s16(i2_coeff1_16x8_r2_1, 1);
|
||||
i2_coeff1_16x8_r3_1 = vshlq_n_s16(i2_coeff1_16x8_r3_1, 1);
|
||||
|
||||
i2_res_16x8_r0_0 = vaddq_s16(i2_coeff1_16x8_r0_0, i2_add_16x8_r0_0);
|
||||
i2_res_16x8_r1_0 = vaddq_s16(i2_coeff1_16x8_r1_0, i2_add_16x8_r1_0);
|
||||
i2_res_16x8_r2_0 = vaddq_s16(i2_coeff1_16x8_r2_0, i2_add_16x8_r2_0);
|
||||
i2_res_16x8_r3_0 = vaddq_s16(i2_coeff1_16x8_r3_0, i2_add_16x8_r3_0);
|
||||
|
||||
i2_res_16x8_r0_1 = vaddq_s16(i2_coeff1_16x8_r0_1, i2_add_16x8_r0_0);
|
||||
i2_res_16x8_r1_1 = vaddq_s16(i2_coeff1_16x8_r1_1, i2_add_16x8_r1_0);
|
||||
i2_res_16x8_r2_1 = vaddq_s16(i2_coeff1_16x8_r2_1, i2_add_16x8_r2_0);
|
||||
i2_res_16x8_r3_1 = vaddq_s16(i2_coeff1_16x8_r3_1, i2_add_16x8_r3_0);
|
||||
|
||||
ti2_res_16x8x2_r0 = vzipq_s16(i2_res_16x8_r0_0, i2_res_16x8_r0_1);
|
||||
ti2_res_16x8x2_r1 = vzipq_s16(i2_res_16x8_r1_0, i2_res_16x8_r1_1);
|
||||
ti2_res_16x8x2_r2 = vzipq_s16(i2_res_16x8_r2_0, i2_res_16x8_r2_1);
|
||||
ti2_res_16x8x2_r3 = vzipq_s16(i2_res_16x8_r3_0, i2_res_16x8_r3_1);
|
||||
|
||||
i2_coeff1_16x8_r0_0 = vshlq_n_s16(i2_coeff1_16x8_r0_0, 1);
|
||||
i2_coeff1_16x8_r1_0 = vshlq_n_s16(i2_coeff1_16x8_r1_0, 1);
|
||||
i2_coeff1_16x8_r2_0 = vshlq_n_s16(i2_coeff1_16x8_r2_0, 1);
|
||||
i2_coeff1_16x8_r3_0 = vshlq_n_s16(i2_coeff1_16x8_r3_0, 1);
|
||||
|
||||
vst1q_s16(pi2_refarray_buffer + 1, ti2_res_16x8x2_r0.val[0]);
|
||||
vst1q_lane_s16(pi2_refarray_buffer, i2_coeff1_16x8_r0_0, 0);
|
||||
vst1q_lane_s16(pi2_refarray_buffer + 7, i2_coeff1_16x8_r0_0, 3);
|
||||
|
||||
vst1q_s16(pi2_refarray_buffer + 9, ti2_res_16x8x2_r1.val[0]);
|
||||
vst1q_lane_s16(pi2_refarray_buffer + 8, i2_coeff1_16x8_r1_0, 0);
|
||||
vst1q_lane_s16(pi2_refarray_buffer + 15, i2_coeff1_16x8_r1_0, 3);
|
||||
|
||||
vst1q_s16(pi2_refarray_buffer + 17, ti2_res_16x8x2_r2.val[0]);
|
||||
vst1q_lane_s16(pi2_refarray_buffer + 16, i2_coeff1_16x8_r2_0, 0);
|
||||
vst1q_lane_s16(pi2_refarray_buffer + 23, i2_coeff1_16x8_r2_0, 3);
|
||||
|
||||
vst1q_s16(pi2_refarray_buffer + 25, ti2_res_16x8x2_r3.val[0]);
|
||||
vst1q_lane_s16(pi2_refarray_buffer + 24, i2_coeff1_16x8_r3_0, 0);
|
||||
vst1q_lane_s16(pi2_refarray_buffer + 31, i2_coeff1_16x8_r3_0, 3);
|
||||
|
||||
i4_horz_samp_16x4_r0_1 = vld1_s16(pi2_refarray_buffer);
|
||||
i4_horz_samp_16x4_r0_2 = vld1_s16(pi2_refarray_buffer + 4);
|
||||
|
||||
i4_horz_samp_16x4_r1_1 = vld1_s16(pi2_refarray_buffer + 8);
|
||||
i4_horz_samp_16x4_r1_2 = vld1_s16(pi2_refarray_buffer + 12);
|
||||
|
||||
i4_horz_samp_16x4_r2_1 = vld1_s16(pi2_refarray_buffer + 16);
|
||||
i4_horz_samp_16x4_r2_2 = vld1_s16(pi2_refarray_buffer + 20);
|
||||
|
||||
i4_horz_samp_16x4_r3_1 = vld1_s16(pi2_refarray_buffer + 24);
|
||||
i4_horz_samp_16x4_r3_2 = vld1_s16(pi2_refarray_buffer + 28);
|
||||
|
||||
i4_horz_res_16x4_r0_1 = vrshr_n_s16(i4_horz_samp_16x4_r0_1, 2);
|
||||
i4_horz_res_16x4_r0_2 = vrshr_n_s16(i4_horz_samp_16x4_r0_2, 2);
|
||||
|
||||
i4_horz_add_32x4_r1_1 = vaddl_s16(i4_horz_samp_16x4_r0_1, i4_horz_samp_16x4_r1_1);
|
||||
i4_horz_add_32x4_r1_2 = vaddl_s16(i4_horz_samp_16x4_r0_2, i4_horz_samp_16x4_r1_2);
|
||||
|
||||
i4_horz_add_32x4_r2_1 = vaddl_s16(i4_horz_samp_16x4_r1_1, i4_horz_samp_16x4_r2_1);
|
||||
i4_horz_add_32x4_r2_2 = vaddl_s16(i4_horz_samp_16x4_r1_2, i4_horz_samp_16x4_r2_2);
|
||||
|
||||
i4_horz_add_32x4_r3_1 = vaddl_s16(i4_horz_samp_16x4_r2_1, i4_horz_samp_16x4_r3_1);
|
||||
i4_horz_add_32x4_r3_2 = vaddl_s16(i4_horz_samp_16x4_r2_2, i4_horz_samp_16x4_r3_2);
|
||||
|
||||
i4_horz_samp_32x4_r0_1 = vshll_n_s16(i4_horz_samp_16x4_r0_1, 1);
|
||||
i4_horz_samp_32x4_r0_2 = vshll_n_s16(i4_horz_samp_16x4_r0_2, 1);
|
||||
|
||||
i4_horz_samp_32x4_r1_1 = vshll_n_s16(i4_horz_samp_16x4_r1_1, 1);
|
||||
i4_horz_samp_32x4_r1_2 = vshll_n_s16(i4_horz_samp_16x4_r1_2, 1);
|
||||
|
||||
i4_horz_samp_32x4_r2_1 = vshll_n_s16(i4_horz_samp_16x4_r2_1, 1);
|
||||
i4_horz_samp_32x4_r2_2 = vshll_n_s16(i4_horz_samp_16x4_r2_2, 1);
|
||||
|
||||
i4_horz_samp_32x4_r3_1 = vshll_n_s16(i4_horz_samp_16x4_r3_1, 1);
|
||||
i4_horz_samp_32x4_r3_2 = vshll_n_s16(i4_horz_samp_16x4_r3_2, 1);
|
||||
|
||||
i4_horz_res_32x4_r1_1 = vaddq_s32(i4_horz_samp_32x4_r0_1, i4_horz_add_32x4_r1_1);
|
||||
i4_horz_res_32x4_r1_2 = vaddq_s32(i4_horz_samp_32x4_r0_2, i4_horz_add_32x4_r1_2);
|
||||
|
||||
i4_horz_res_32x4_r2_1 = vaddq_s32(i4_horz_samp_32x4_r1_1, i4_horz_add_32x4_r1_1);
|
||||
i4_horz_res_32x4_r2_2 = vaddq_s32(i4_horz_samp_32x4_r1_2, i4_horz_add_32x4_r1_2);
|
||||
|
||||
i4_horz_res_32x4_r3_1 = vaddq_s32(i4_horz_samp_32x4_r1_1, i4_horz_add_32x4_r2_1);
|
||||
i4_horz_res_32x4_r3_2 = vaddq_s32(i4_horz_samp_32x4_r1_2, i4_horz_add_32x4_r2_2);
|
||||
|
||||
i4_horz_res_32x4_r4_1 = vaddq_s32(i4_horz_samp_32x4_r2_1, i4_horz_add_32x4_r2_1);
|
||||
i4_horz_res_32x4_r4_2 = vaddq_s32(i4_horz_samp_32x4_r2_2, i4_horz_add_32x4_r2_2);
|
||||
|
||||
i4_horz_res_32x4_r5_1 = vaddq_s32(i4_horz_samp_32x4_r2_1, i4_horz_add_32x4_r3_1);
|
||||
i4_horz_res_32x4_r5_2 = vaddq_s32(i4_horz_samp_32x4_r2_2, i4_horz_add_32x4_r3_2);
|
||||
|
||||
i4_horz_res_32x4_r6_1 = vaddq_s32(i4_horz_samp_32x4_r3_1, i4_horz_add_32x4_r3_1);
|
||||
i4_horz_res_32x4_r6_2 = vaddq_s32(i4_horz_samp_32x4_r3_2, i4_horz_add_32x4_r3_2);
|
||||
|
||||
i4_horz_res_16x4_r1_1 = vqrshrn_n_s32(i4_horz_res_32x4_r1_1, 4);
|
||||
i4_horz_res_16x4_r1_2 = vqrshrn_n_s32(i4_horz_res_32x4_r1_2, 4);
|
||||
|
||||
i4_horz_res_16x4_r2_1 = vqrshrn_n_s32(i4_horz_res_32x4_r2_1, 4);
|
||||
i4_horz_res_16x4_r2_2 = vqrshrn_n_s32(i4_horz_res_32x4_r2_2, 4);
|
||||
|
||||
i4_horz_res_16x4_r3_1 = vqrshrn_n_s32(i4_horz_res_32x4_r3_1, 4);
|
||||
i4_horz_res_16x4_r3_2 = vqrshrn_n_s32(i4_horz_res_32x4_r3_2, 4);
|
||||
|
||||
i4_horz_res_16x4_r4_1 = vqrshrn_n_s32(i4_horz_res_32x4_r4_1, 4);
|
||||
i4_horz_res_16x4_r4_2 = vqrshrn_n_s32(i4_horz_res_32x4_r4_2, 4);
|
||||
|
||||
i4_horz_res_16x4_r5_1 = vqrshrn_n_s32(i4_horz_res_32x4_r5_1, 4);
|
||||
i4_horz_res_16x4_r5_2 = vqrshrn_n_s32(i4_horz_res_32x4_r5_2, 4);
|
||||
|
||||
i4_horz_res_16x4_r6_1 = vqrshrn_n_s32(i4_horz_res_32x4_r6_1, 4);
|
||||
i4_horz_res_16x4_r6_2 = vqrshrn_n_s32(i4_horz_res_32x4_r6_2, 4);
|
||||
|
||||
i4_horz_res_16x4_r7_1 = vrshr_n_s16(i4_horz_samp_16x4_r3_1, 2);
|
||||
i4_horz_res_16x4_r7_2 = vrshr_n_s16(i4_horz_samp_16x4_r3_2, 2);
|
||||
|
||||
vst1_s16(pi2_out_res, i4_horz_res_16x4_r0_1);
|
||||
vst1_s16(pi2_out_res + 4, i4_horz_res_16x4_r0_2);
|
||||
|
||||
vst1_s16(pi2_out_res + i4_out_res_stride, i4_horz_res_16x4_r1_1);
|
||||
vst1_s16(pi2_out_res + i4_out_res_stride + 4, i4_horz_res_16x4_r1_2);
|
||||
|
||||
vst1_s16(pi2_out_res + (i4_out_res_stride << 1), i4_horz_res_16x4_r2_1);
|
||||
vst1_s16(pi2_out_res + (i4_out_res_stride << 1) + 4, i4_horz_res_16x4_r2_2);
|
||||
|
||||
vst1_s16(pi2_out_res + (i4_out_res_stride * 3), i4_horz_res_16x4_r3_1);
|
||||
vst1_s16(pi2_out_res + (i4_out_res_stride * 3) + 4, i4_horz_res_16x4_r3_2);
|
||||
|
||||
vst1_s16(pi2_out_res + (i4_out_res_stride << 2), i4_horz_res_16x4_r4_1);
|
||||
vst1_s16(pi2_out_res + (i4_out_res_stride << 2) + 4, i4_horz_res_16x4_r4_2);
|
||||
|
||||
vst1_s16(pi2_out_res + (i4_out_res_stride * 5), i4_horz_res_16x4_r5_1);
|
||||
vst1_s16(pi2_out_res + (i4_out_res_stride * 5) + 4, i4_horz_res_16x4_r5_2);
|
||||
|
||||
vst1_s16(pi2_out_res + (i4_out_res_stride * 6), i4_horz_res_16x4_r6_1);
|
||||
vst1_s16(pi2_out_res + (i4_out_res_stride * 6) + 4, i4_horz_res_16x4_r6_2);
|
||||
|
||||
vst1_s16(pi2_out_res + (i4_out_res_stride * 7), i4_horz_res_16x4_r7_1);
|
||||
vst1_s16(pi2_out_res + (i4_out_res_stride * 7) + 4, i4_horz_res_16x4_r7_2);
|
||||
|
||||
pi2_out_res += BLK8x8SIZE;
|
||||
}
|
||||
else
|
||||
{
|
||||
pi2_out_res += BLK8x8SIZE;
|
||||
}
|
||||
|
||||
/* Block level loop updates */
|
||||
if(1 == i4_blk_ctr)
|
||||
{
|
||||
pi2_inp_data -= SUB_BLK_WIDTH_4x4;
|
||||
pi2_inp_data += (i4_inp_data_stride * SUB_BLK_HEIGHT_4x4);
|
||||
pi2_out_res -= MB_SIZE;
|
||||
pi2_out_res += (i4_out_res_stride * BLK8x8SIZE);
|
||||
u4_ref_nnz >>= 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
pi2_inp_data += SUB_BLK_HEIGHT_4x4;
|
||||
}
|
||||
u4_ref_nnz >>= 1;
|
||||
}
|
||||
/* The above loop iterates over all the blocks */
|
||||
}
|
||||
}
|
||||
|
||||
UWORD32 isvce_get_sad_with_residual_pred_neon(buffer_container_t *ps_src,
|
||||
buffer_container_t *ps_pred,
|
||||
buffer_container_t *ps_res, UWORD32 u4_mb_wd,
|
||||
UWORD32 u4_mb_ht)
|
||||
{
|
||||
UWORD32 i, j, u4_sad = 0;
|
||||
UWORD8 *pu1_src = (UWORD8 *) ps_src->pv_data;
|
||||
UWORD8 *pu1_pred = (UWORD8 *) ps_pred->pv_data;
|
||||
WORD16 *pi2_res = (WORD16 *) ps_res->pv_data;
|
||||
WORD32 i4_src_stride = ps_src->i4_data_stride;
|
||||
WORD32 i4_pred_stride = ps_pred->i4_data_stride;
|
||||
WORD32 i4_res_stride = ps_res->i4_data_stride;
|
||||
UWORD32 u4_num_rows_per_loop = 8;
|
||||
UWORD32 u4_ht_by_8 = u4_mb_ht / u4_num_rows_per_loop;
|
||||
uint8x8_t src0, src1, src2, src3;
|
||||
uint8x8_t src4, src5, src6, src7;
|
||||
uint8x8_t pred0, pred1, pred2, pred3;
|
||||
uint8x8_t pred4, pred5, pred6, pred7;
|
||||
int16x8_t res0_16x8, res1_16x8, res2_16x8, res3_16x8, res4_16x8, res5_16x8, res6_16x8,
|
||||
res7_16x8;
|
||||
uint16x8_t res0_u16x8, res1_u16x8, res2_u16x8, res3_u16x8, res4_u16x8, res5_u16x8, res6_u16x8,
|
||||
res7_u16x8;
|
||||
int16x8_t respred0_16x8, respred1_16x8, respred2_16x8, respred3_16x8, respred4_16x8,
|
||||
respred5_16x8, respred6_16x8, respred7_16x8;
|
||||
int16x8_t temp0_16x8, temp1_16x8, temp2_16x8, temp3_16x8, temp4_16x8, temp5_16x8, temp6_16x8,
|
||||
temp7_16x8;
|
||||
int32x4_t temp0_32x4;
|
||||
int32x2_t temp0_32x2;
|
||||
|
||||
if((u4_mb_wd == 16) && (u4_mb_ht % 8 == 0))
|
||||
{
|
||||
for(i = 0; i < u4_ht_by_8; i++)
|
||||
{
|
||||
/* This loop processes 4 rows of 16 bytes each iteration */
|
||||
/* So, 8 rows are processed across two iterations */
|
||||
for(j = 0; j < 2; j++)
|
||||
{
|
||||
src0 = vld1_u8(pu1_src);
|
||||
src1 = vld1_u8(pu1_src + 8);
|
||||
|
||||
pu1_src += i4_src_stride;
|
||||
|
||||
src2 = vld1_u8(pu1_src);
|
||||
src3 = vld1_u8(pu1_src + 8);
|
||||
|
||||
pu1_src += i4_src_stride;
|
||||
|
||||
src4 = vld1_u8(pu1_src);
|
||||
src5 = vld1_u8(pu1_src + 8);
|
||||
|
||||
pu1_src += i4_src_stride;
|
||||
|
||||
src6 = vld1_u8(pu1_src);
|
||||
src7 = vld1_u8(pu1_src + 8);
|
||||
|
||||
pu1_src += i4_src_stride;
|
||||
|
||||
pred0 = vld1_u8(pu1_pred);
|
||||
pred1 = vld1_u8(pu1_pred + 8);
|
||||
|
||||
pu1_pred += i4_pred_stride;
|
||||
|
||||
pred2 = vld1_u8(pu1_pred);
|
||||
pred3 = vld1_u8(pu1_pred + 8);
|
||||
|
||||
pu1_pred += i4_pred_stride;
|
||||
|
||||
pred4 = vld1_u8(pu1_pred);
|
||||
pred5 = vld1_u8(pu1_pred + 8);
|
||||
|
||||
pu1_pred += i4_pred_stride;
|
||||
|
||||
pred6 = vld1_u8(pu1_pred);
|
||||
pred7 = vld1_u8(pu1_pred + 8);
|
||||
|
||||
pu1_pred += i4_pred_stride;
|
||||
|
||||
res0_u16x8 = vsubl_u8(src0, pred0);
|
||||
res1_u16x8 = vsubl_u8(src1, pred1);
|
||||
res2_u16x8 = vsubl_u8(src2, pred2);
|
||||
res3_u16x8 = vsubl_u8(src3, pred3);
|
||||
res4_u16x8 = vsubl_u8(src4, pred4);
|
||||
res5_u16x8 = vsubl_u8(src5, pred5);
|
||||
res6_u16x8 = vsubl_u8(src6, pred6);
|
||||
res7_u16x8 = vsubl_u8(src7, pred7);
|
||||
|
||||
res0_16x8 = vreinterpretq_s16_u16(res0_u16x8);
|
||||
res1_16x8 = vreinterpretq_s16_u16(res1_u16x8);
|
||||
res2_16x8 = vreinterpretq_s16_u16(res2_u16x8);
|
||||
res3_16x8 = vreinterpretq_s16_u16(res3_u16x8);
|
||||
res4_16x8 = vreinterpretq_s16_u16(res4_u16x8);
|
||||
res5_16x8 = vreinterpretq_s16_u16(res5_u16x8);
|
||||
res6_16x8 = vreinterpretq_s16_u16(res6_u16x8);
|
||||
res7_16x8 = vreinterpretq_s16_u16(res7_u16x8);
|
||||
|
||||
respred0_16x8 = vld1q_s16(pi2_res);
|
||||
respred1_16x8 = vld1q_s16(pi2_res + 8);
|
||||
|
||||
pi2_res += i4_res_stride;
|
||||
|
||||
respred2_16x8 = vld1q_s16(pi2_res);
|
||||
respred3_16x8 = vld1q_s16(pi2_res + 8);
|
||||
|
||||
pi2_res += i4_res_stride;
|
||||
|
||||
respred4_16x8 = vld1q_s16(pi2_res);
|
||||
respred5_16x8 = vld1q_s16(pi2_res + 8);
|
||||
|
||||
pi2_res += i4_res_stride;
|
||||
|
||||
respred6_16x8 = vld1q_s16(pi2_res);
|
||||
respred7_16x8 = vld1q_s16(pi2_res + 8);
|
||||
|
||||
pi2_res += i4_res_stride;
|
||||
|
||||
temp0_16x8 = vsubq_s16(res0_16x8, respred0_16x8);
|
||||
temp1_16x8 = vsubq_s16(res1_16x8, respred1_16x8);
|
||||
temp2_16x8 = vsubq_s16(res2_16x8, respred2_16x8);
|
||||
temp3_16x8 = vsubq_s16(res3_16x8, respred3_16x8);
|
||||
temp4_16x8 = vsubq_s16(res4_16x8, respred4_16x8);
|
||||
temp5_16x8 = vsubq_s16(res5_16x8, respred5_16x8);
|
||||
temp6_16x8 = vsubq_s16(res6_16x8, respred6_16x8);
|
||||
temp7_16x8 = vsubq_s16(res7_16x8, respred7_16x8);
|
||||
|
||||
temp0_16x8 = vabsq_s16(temp0_16x8);
|
||||
temp1_16x8 = vabsq_s16(temp1_16x8);
|
||||
temp2_16x8 = vabsq_s16(temp2_16x8);
|
||||
temp3_16x8 = vabsq_s16(temp3_16x8);
|
||||
temp4_16x8 = vabsq_s16(temp4_16x8);
|
||||
temp5_16x8 = vabsq_s16(temp5_16x8);
|
||||
temp6_16x8 = vabsq_s16(temp6_16x8);
|
||||
temp7_16x8 = vabsq_s16(temp7_16x8);
|
||||
|
||||
temp0_16x8 = vaddq_s16(temp0_16x8, temp1_16x8);
|
||||
temp1_16x8 = vaddq_s16(temp2_16x8, temp3_16x8);
|
||||
temp2_16x8 = vaddq_s16(temp4_16x8, temp5_16x8);
|
||||
temp3_16x8 = vaddq_s16(temp6_16x8, temp7_16x8);
|
||||
|
||||
temp0_16x8 = vaddq_s16(temp0_16x8, temp1_16x8);
|
||||
temp1_16x8 = vaddq_s16(temp2_16x8, temp3_16x8);
|
||||
|
||||
temp0_16x8 = vaddq_s16(temp0_16x8, temp1_16x8);
|
||||
|
||||
temp0_32x4 = vpaddlq_s16(temp0_16x8);
|
||||
temp0_32x2 = vpadd_s32(vget_low_s32(temp0_32x4), vget_high_s32(temp0_32x4));
|
||||
|
||||
u4_sad += vget_lane_s32(temp0_32x2, 0);
|
||||
u4_sad += vget_lane_s32(temp0_32x2, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for(i = 0; i < u4_mb_ht; i++)
|
||||
{
|
||||
for(j = 0; j < u4_mb_wd; j++)
|
||||
{
|
||||
WORD16 i2_src = pu1_src[j + i * i4_src_stride];
|
||||
WORD16 i2_pred = pu1_pred[j + i * i4_pred_stride];
|
||||
WORD16 i2_res = pi2_res[j + i * i4_res_stride];
|
||||
u4_sad += ABS(i2_src - i2_pred - i2_res);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return u4_sad;
|
||||
}
|
||||
|
|
@ -16,7 +16,7 @@
|
|||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
*/
|
||||
|
||||
#ifndef _RATE_CONTROL_API_STRUCTS_H_
|
||||
#define _RATE_CONTROL_API_STRUCTS_H_
|
||||
|
|
@ -74,7 +74,9 @@ typedef struct rate_control_api_t
|
|||
|
||||
UWORD8 u1_is_first_frm;
|
||||
|
||||
UWORD8 au1_min_max_qp[(MAX_PIC_TYPE << 1)];
|
||||
UWORD8 au1_min_max_qp[MAX_PIC_TYPE * 2];
|
||||
|
||||
UWORD8 au1_min_max_avc_qp[MAX_PIC_TYPE * 2];
|
||||
|
||||
WORD32 i4_prev_frm_est_bits;
|
||||
|
||||
|
|
@ -89,5 +91,4 @@ typedef struct rate_control_api_t
|
|||
|
||||
} rate_control_api_t;
|
||||
|
||||
#endif/*_RATE_CONTROL_API_STRUCTS_H_*/
|
||||
|
||||
#endif /*_RATE_CONTROL_API_STRUCTS_H_*/
|
||||
|
|
|
|||
80
encoder/riscv/svc/isvce_function_selector.c
Normal file
80
encoder/riscv/svc/isvce_function_selector.c
Normal file
|
|
@ -0,0 +1,80 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
* @file
|
||||
* isvce_function_selector.c
|
||||
*
|
||||
* @brief
|
||||
* Contains functions to initialize function pointers used in svc
|
||||
*
|
||||
* @author
|
||||
* Ittiam
|
||||
*
|
||||
* @par List of Functions:
|
||||
*
|
||||
* @remarks
|
||||
* None
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#include "iv2.h"
|
||||
#include "isvce_structs.h"
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief Initialize the intra/inter/transform/deblk function pointers of
|
||||
* codec context
|
||||
*
|
||||
* @par Description: the current routine initializes the function pointers of
|
||||
* codec context basing on the architecture in use
|
||||
*
|
||||
* @param[in] ps_codec
|
||||
* Codec context pointer
|
||||
*
|
||||
* @returns none
|
||||
*
|
||||
* @remarks none
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
void isvce_init_function_ptr(isvce_codec_t *ps_codec) { isvce_init_function_ptr_generic(ps_codec); }
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief Determine the architecture of the encoder executing environment
|
||||
*
|
||||
* @par Description: This routine returns the architecture of the enviro-
|
||||
* ment in which the current encoder is being tested
|
||||
*
|
||||
* @param[in] void
|
||||
*
|
||||
* @returns IV_ARCH_T
|
||||
* architecture
|
||||
*
|
||||
* @remarks none
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
IV_ARCH_T isvce_default_arch(void) { return ARCH_NA; }
|
||||
103
encoder/riscv/svc/isvce_platform_macros.h
Normal file
103
encoder/riscv/svc/isvce_platform_macros.h
Normal file
|
|
@ -0,0 +1,103 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
/**
|
||||
*******************************************************************************
|
||||
* @file
|
||||
* isvce_platform_macros.h
|
||||
*
|
||||
* @brief
|
||||
* Contains platform specific routines used for codec context intialization
|
||||
*
|
||||
* @author
|
||||
* ittiam
|
||||
*
|
||||
* @remarks
|
||||
* none
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef _ISVCE_PLATFORM_MACROS_H_
|
||||
#define _ISVCE_PLATFORM_MACROS_H_
|
||||
|
||||
/*****************************************************************************/
|
||||
/* Extern Function Declarations */
|
||||
/*****************************************************************************/
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief Initialize the intra/inter/transform/deblk function pointers of
|
||||
* codec context
|
||||
*
|
||||
* @par Description: the current routine initializes the function pointers of
|
||||
* codec context basing on the architecture in use
|
||||
*
|
||||
* @param[in] ps_codec
|
||||
* Codec context pointer
|
||||
*
|
||||
* @returns none
|
||||
*
|
||||
* @remarks none
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
void isvce_init_function_ptr_generic(isvce_codec_t *ps_codec);
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief Initialize the intra/inter/transform/deblk function pointers of
|
||||
* codec context
|
||||
*
|
||||
* @par Description: the current routine initializes the function pointers of
|
||||
* codec context basing on the architecture in use
|
||||
*
|
||||
* @param[in] ps_codec
|
||||
* Codec context pointer
|
||||
*
|
||||
* @returns none
|
||||
*
|
||||
* @remarks none
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
void isvce_init_function_ptr(isvce_codec_t *ps_codec);
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief Determine the architecture of the encoder executing environment
|
||||
*
|
||||
* @par Description: This routine returns the architecture of the enviro-
|
||||
* ment in which the current encoder is being tested
|
||||
*
|
||||
* @param[in] void
|
||||
*
|
||||
* @returns IV_ARCH_T
|
||||
* architecture
|
||||
*
|
||||
* @remarks none
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
IV_ARCH_T isvce_default_arch(void);
|
||||
|
||||
#endif
|
||||
116
encoder/svc/irc_svc_rate_control_api.c
Normal file
116
encoder/svc/irc_svc_rate_control_api.c
Normal file
|
|
@ -0,0 +1,116 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
|
||||
/*****************************************************************************/
|
||||
/* Includes */
|
||||
/*****************************************************************************/
|
||||
|
||||
/* System include files */
|
||||
#include "stdio.h"
|
||||
|
||||
/* User include files */
|
||||
#include "irc_datatypes.h"
|
||||
#include "irc_common.h"
|
||||
#include "irc_cntrl_param.h"
|
||||
#include "irc_mem_req_and_acq.h"
|
||||
#include "irc_rd_model.h"
|
||||
#include "irc_est_sad.h"
|
||||
#include "irc_fixed_point_error_bits.h"
|
||||
#include "irc_vbr_storage_vbv.h"
|
||||
#include "irc_picture_type.h"
|
||||
#include "irc_bit_allocation.h"
|
||||
#include "irc_mb_model_based.h"
|
||||
#include "irc_cbr_buffer_control.h"
|
||||
#include "irc_vbr_str_prms.h"
|
||||
#include "irc_rate_control_api.h"
|
||||
#include "irc_rate_control_api_structs.h"
|
||||
#include "irc_trace_support.h"
|
||||
|
||||
#define MIN(a, b) (((a) < (b)) ? (a) : (b))
|
||||
#define MAX(a, b) (((a) > (b)) ? (a) : (b))
|
||||
|
||||
#define DEV_Q 4 /*Q format(Shift) for Deviation range factor */
|
||||
#define HI_DEV_FCTR 22 /* 1.4*16 */
|
||||
#define LO_DEV_FCTR 12 /* 0.75*16 */
|
||||
#define GET_HI_DEV_QP(Qprev) ((((WORD32) Qprev) * HI_DEV_FCTR + (1 << (DEV_Q - 1))) >> DEV_Q)
|
||||
#define GET_LO_DEV_QP(Qprev) ((((WORD32) Qprev) * LO_DEV_FCTR + (1 << (DEV_Q - 1))) >> DEV_Q)
|
||||
#define CLIP_QP(Qc, hi_d, lo_d) (((Qc) < (lo_d)) ? ((lo_d)) : (((Qc) > (hi_d)) ? (hi_d) : (Qc)))
|
||||
|
||||
/*******************************************************************************
|
||||
* Description : Gets the frame level qp for the given picture type
|
||||
* based on bits per pixel and gradient per pixel
|
||||
******************************************************************************/
|
||||
/* Get frame level QP based on BPP and GPP */
|
||||
UWORD8 irc_get_frame_level_init_qp(rate_control_handle *ps_rate_control_api, rc_type_e e_rc_type,
|
||||
picture_type_e e_pic_type, DOUBLE d_bpp, DOUBLE d_gpp)
|
||||
{
|
||||
DOUBLE d_frame_qp;
|
||||
|
||||
UWORD8 u1_min_qp =
|
||||
((rate_control_api_t *) (ps_rate_control_api))->au1_min_max_avc_qp[(e_pic_type << 1)];
|
||||
UWORD8 u1_max_qp =
|
||||
((rate_control_api_t *) (ps_rate_control_api))->au1_min_max_avc_qp[(e_pic_type << 1) + 1];
|
||||
|
||||
if((e_rc_type != VBR_STORAGE) && (e_rc_type != VBR_STORAGE_DVD_COMP) &&
|
||||
(e_rc_type != CBR_NLDRC) && (e_rc_type != CONST_QP) && (e_rc_type != VBR_STREAMING))
|
||||
{
|
||||
trace_printf(
|
||||
(const WORD8 *) (const WORD8 *) " Only VBR,NLDRC and CONST QP supported for now \n");
|
||||
return (0);
|
||||
}
|
||||
|
||||
if(d_bpp <= 0.18)
|
||||
{
|
||||
d_frame_qp = 43.49 + (0.59 * d_gpp) - (106.45 * d_bpp);
|
||||
}
|
||||
else if(d_bpp <= 0.6)
|
||||
{
|
||||
d_frame_qp = 25.12 + (0.69 * d_gpp) - (29.23 * (d_bpp - 0.18));
|
||||
}
|
||||
else
|
||||
{
|
||||
d_frame_qp = 13.93 + (0.74 * d_gpp) - (18.4 * (d_bpp - 0.6));
|
||||
}
|
||||
|
||||
/* Truncating the QP to the Max and Min Qp values possible */
|
||||
if(d_frame_qp < u1_min_qp) d_frame_qp = u1_min_qp;
|
||||
if(d_frame_qp > u1_max_qp) d_frame_qp = u1_max_qp;
|
||||
|
||||
return ((UWORD8) (d_frame_qp + 0.5));
|
||||
}
|
||||
|
||||
void irc_change_qp_constraints(rate_control_api_t *ps_rate_control_api, UWORD8 *pu1_min_max_qp,
|
||||
UWORD8 *pu1_min_max_avc_qp)
|
||||
{
|
||||
WORD32 i;
|
||||
|
||||
for(i = 0; i < MAX_PIC_TYPE; i++)
|
||||
{
|
||||
ps_rate_control_api->au1_min_max_qp[(i << 1)] = pu1_min_max_qp[(i << 1)];
|
||||
ps_rate_control_api->au1_min_max_qp[(i << 1) + 1] = pu1_min_max_qp[(i << 1) + 1];
|
||||
ps_rate_control_api->au1_min_max_avc_qp[(i << 1)] = pu1_min_max_avc_qp[(i << 1)];
|
||||
ps_rate_control_api->au1_min_max_avc_qp[(i << 1) + 1] = pu1_min_max_avc_qp[(i << 1) + 1];
|
||||
}
|
||||
}
|
||||
|
||||
UWORD8 irc_is_scenecut(rate_control_api_t *ps_rate_control_api)
|
||||
{
|
||||
return ((rate_control_api_t *) (ps_rate_control_api))->u1_scd_detected;
|
||||
}
|
||||
46
encoder/svc/irc_svc_rate_control_api.h
Normal file
46
encoder/svc/irc_svc_rate_control_api.h
Normal file
|
|
@ -0,0 +1,46 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
|
||||
#ifndef _IRC_SVC_RATE_CONTROL_API_H_
|
||||
#define _IRC_SVC_RATE_CONTROL_API_H_
|
||||
|
||||
/* Dependencies of 'irc_rate_control_api_structs' */
|
||||
#include "irc_picture_type.h"
|
||||
#include "irc_rd_model.h"
|
||||
#include "irc_vbr_storage_vbv.h"
|
||||
#include "irc_est_sad.h"
|
||||
#include "irc_bit_allocation.h"
|
||||
#include "irc_mb_model_based.h"
|
||||
#include "irc_cbr_buffer_control.h"
|
||||
#include "irc_vbr_str_prms.h"
|
||||
#include "irc_common.h"
|
||||
|
||||
#include "irc_rate_control_api_structs.h"
|
||||
|
||||
/* Get frame level QP based on BPP and GPP */
|
||||
UWORD8 irc_get_frame_level_init_qp(rate_control_api_t *ps_rate_control_api, rc_type_e e_rc_type,
|
||||
picture_type_e e_pic_type, DOUBLE d_bpp, DOUBLE d_gpp);
|
||||
|
||||
void irc_change_qp_constraints(rate_control_api_t *ps_rate_control_api, UWORD8 *pu1_min_max_qp,
|
||||
UWORD8 *pu1_min_max_avc_qp);
|
||||
|
||||
extern UWORD8 irc_is_scenecut(rate_control_api_t *ps_rate_control_api);
|
||||
|
||||
#endif
|
||||
1023
encoder/svc/isvce.h
Normal file
1023
encoder/svc/isvce.h
Normal file
File diff suppressed because it is too large
Load diff
6054
encoder/svc/isvce_api.c
Normal file
6054
encoder/svc/isvce_api.c
Normal file
File diff suppressed because it is too large
Load diff
753
encoder/svc/isvce_cabac.c
Normal file
753
encoder/svc/isvce_cabac.c
Normal file
|
|
@ -0,0 +1,753 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
* @file
|
||||
* isvce_cabac.c
|
||||
*
|
||||
* @brief
|
||||
* Contains all leaf level functions for CABAC entropy coding.
|
||||
*
|
||||
*
|
||||
* @author
|
||||
* Doney Alex
|
||||
*
|
||||
* @par List of Functions:
|
||||
*
|
||||
*
|
||||
* @remarks
|
||||
* None
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
/*****************************************************************************/
|
||||
/* File Includes */
|
||||
/*****************************************************************************/
|
||||
|
||||
/* System include files */
|
||||
#include <stdio.h>
|
||||
#include <assert.h>
|
||||
#include <limits.h>
|
||||
#include <string.h>
|
||||
|
||||
/* User include files */
|
||||
#include "ih264e_config.h"
|
||||
#include "ih264_typedefs.h"
|
||||
#include "iv2.h"
|
||||
#include "ive2.h"
|
||||
#include "ih264_debug.h"
|
||||
#include "ih264_macros.h"
|
||||
#include "isvc_defs.h"
|
||||
#include "isvce_defs.h"
|
||||
#include "isvc_macros.h"
|
||||
#include "ih264e_error.h"
|
||||
#include "ih264e_bitstream.h"
|
||||
#include "ime_distortion_metrics.h"
|
||||
#include "ime_defs.h"
|
||||
#include "ime_structs.h"
|
||||
#include "ih264_error.h"
|
||||
#include "isvc_structs.h"
|
||||
#include "isvc_trans_quant_itrans_iquant.h"
|
||||
#include "isvc_inter_pred_filters.h"
|
||||
#include "isvc_mem_fns.h"
|
||||
#include "ih264_padding.h"
|
||||
#include "ih264_platform_macros.h"
|
||||
#include "ih264_intra_pred_filters.h"
|
||||
#include "ih264_deblk_edge_filters.h"
|
||||
#include "isvc_cabac_tables.h"
|
||||
#include "irc_cntrl_param.h"
|
||||
#include "irc_frame_info_collector.h"
|
||||
#include "isvce_rate_control.h"
|
||||
#include "isvce_cabac_structs.h"
|
||||
#include "isvce_structs.h"
|
||||
#include "isvce_cabac.h"
|
||||
#include "isvce_encode_header.h"
|
||||
#include "ih264_cavlc_tables.h"
|
||||
#include "ih264e_statistics.h"
|
||||
#include "ih264e_trace.h"
|
||||
|
||||
/*****************************************************************************/
|
||||
/* Function Definitions */
|
||||
/*****************************************************************************/
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* k-th order Exp-Golomb (UEGk) binarization process: Implements concatenated
|
||||
* unary/ k-th order Exp-Golomb (UEGk) binarization process,
|
||||
* where k = 0 as defined in 9.3.2.3 of ITU_T_H264-201402
|
||||
*
|
||||
* @param[in] i2_sufs
|
||||
* Suffix bit string
|
||||
*
|
||||
* @param[in] pi1_bins_len
|
||||
* Pointer to length of tthe string
|
||||
*
|
||||
* @returns Binarized value
|
||||
*
|
||||
* @remarks
|
||||
* None
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
UWORD32 isvce_cabac_UEGk0_binarization(WORD16 i2_sufs, WORD8 *pi1_bins_len)
|
||||
{
|
||||
WORD32 unary_length;
|
||||
UWORD32 u4_sufs_shiftk_plus1, u4_egk, u4_unary_bins;
|
||||
|
||||
u4_sufs_shiftk_plus1 = i2_sufs + 1;
|
||||
|
||||
unary_length = (32 - CLZ(u4_sufs_shiftk_plus1) + (0 == u4_sufs_shiftk_plus1));
|
||||
|
||||
/* unary code with (unary_length-1) '1's and terminating '0' bin */
|
||||
u4_unary_bins = (1 << unary_length) - 2;
|
||||
|
||||
/* insert the symbol prefix of (unary length - 1) bins */
|
||||
u4_egk = (u4_unary_bins << (unary_length - 1)) |
|
||||
(u4_sufs_shiftk_plus1 & ((1 << (unary_length - 1)) - 1));
|
||||
|
||||
/* length of the code = 2 *(unary_length - 1) + 1 + k */
|
||||
*pi1_bins_len = (2 * unary_length) - 1;
|
||||
|
||||
return (u4_egk);
|
||||
}
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* Get cabac context for the MB :calculates the pointers to Top and left
|
||||
* cabac neighbor context depending upon neighbor availability.
|
||||
*
|
||||
* @param[in] ps_ent_ctxt
|
||||
* Pointer to entropy context structure
|
||||
*
|
||||
* @param[in] u4_mb_type
|
||||
* Type of MB
|
||||
*
|
||||
* @returns
|
||||
*
|
||||
* @remarks
|
||||
* None
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
void isvce_get_cabac_context(isvce_entropy_ctxt_t *ps_ent_ctxt, WORD32 u4_mb_type)
|
||||
{
|
||||
/* CABAC context */
|
||||
isvce_cabac_ctxt_t *ps_cabac_ctxt = ps_ent_ctxt->ps_cabac;
|
||||
isvce_mb_info_ctxt_t *ps_ctx_inc_mb_map;
|
||||
cab_csbp_t *ps_lft_csbp;
|
||||
|
||||
WORD32 i4_lft_avail, i4_top_avail, i4_is_intra;
|
||||
WORD32 i4_mb_x, i4_mb_y;
|
||||
UWORD8 *pu1_slice_idx = ps_ent_ctxt->pu1_slice_idx;
|
||||
|
||||
i4_is_intra = ((u4_mb_type == I16x16) || (u4_mb_type == I8x8) || (u4_mb_type == I4x4));
|
||||
|
||||
/* derive neighbor availability */
|
||||
i4_mb_x = ps_ent_ctxt->i4_mb_x;
|
||||
i4_mb_y = ps_ent_ctxt->i4_mb_y;
|
||||
pu1_slice_idx += (i4_mb_y * ps_ent_ctxt->i4_wd_mbs);
|
||||
/* left macroblock availability */
|
||||
i4_lft_avail = (i4_mb_x == 0 || (pu1_slice_idx[i4_mb_x - 1] != pu1_slice_idx[i4_mb_x])) ? 0 : 1;
|
||||
/* top macroblock availability */
|
||||
i4_top_avail = (i4_mb_y == 0 ||
|
||||
(pu1_slice_idx[i4_mb_x - ps_ent_ctxt->i4_wd_mbs] != pu1_slice_idx[i4_mb_x]))
|
||||
? 0
|
||||
: 1;
|
||||
i4_mb_x = ps_ent_ctxt->i4_mb_x;
|
||||
ps_ctx_inc_mb_map = ps_cabac_ctxt->ps_mb_map_ctxt_inc;
|
||||
ps_cabac_ctxt->ps_curr_ctxt_mb_info = ps_ctx_inc_mb_map + i4_mb_x;
|
||||
ps_cabac_ctxt->ps_left_ctxt_mb_info = ps_cabac_ctxt->ps_def_ctxt_mb_info;
|
||||
ps_cabac_ctxt->ps_top_ctxt_mb_info = ps_cabac_ctxt->ps_def_ctxt_mb_info;
|
||||
ps_lft_csbp = ps_cabac_ctxt->ps_lft_csbp;
|
||||
ps_cabac_ctxt->pu1_left_y_ac_csbp = &ps_lft_csbp->u1_y_ac_csbp_top_mb;
|
||||
ps_cabac_ctxt->pu1_left_uv_ac_csbp = &ps_lft_csbp->u1_uv_ac_csbp_top_mb;
|
||||
ps_cabac_ctxt->pu1_left_yuv_dc_csbp = &ps_lft_csbp->u1_yuv_dc_csbp_top_mb;
|
||||
ps_cabac_ctxt->pi1_left_ref_idx_ctxt_inc = &ps_cabac_ctxt->i1_left_ref_idx_ctx_inc_arr[0][0];
|
||||
ps_cabac_ctxt->pu1_left_mv_ctxt_inc = ps_cabac_ctxt->u1_left_mv_ctxt_inc_arr[0];
|
||||
|
||||
if(i4_lft_avail) ps_cabac_ctxt->ps_left_ctxt_mb_info = ps_cabac_ctxt->ps_curr_ctxt_mb_info - 1;
|
||||
if(i4_top_avail) ps_cabac_ctxt->ps_top_ctxt_mb_info = ps_cabac_ctxt->ps_curr_ctxt_mb_info;
|
||||
|
||||
if(!i4_lft_avail)
|
||||
{
|
||||
UWORD8 u1_def_csbp = i4_is_intra ? 0xf : 0;
|
||||
*(ps_cabac_ctxt->pu1_left_y_ac_csbp) = u1_def_csbp;
|
||||
*(ps_cabac_ctxt->pu1_left_uv_ac_csbp) = u1_def_csbp;
|
||||
*(ps_cabac_ctxt->pu1_left_yuv_dc_csbp) = u1_def_csbp;
|
||||
*((UWORD32 *) ps_cabac_ctxt->pi1_left_ref_idx_ctxt_inc) = 0;
|
||||
memset(ps_cabac_ctxt->pu1_left_mv_ctxt_inc, 0, 16);
|
||||
}
|
||||
if(!i4_top_avail)
|
||||
{
|
||||
UWORD8 u1_def_csbp = i4_is_intra ? 0xff : 0;
|
||||
ps_cabac_ctxt->ps_top_ctxt_mb_info->u1_yuv_ac_csbp = u1_def_csbp;
|
||||
ps_cabac_ctxt->ps_top_ctxt_mb_info->u1_yuv_dc_csbp = u1_def_csbp;
|
||||
ps_cabac_ctxt->ps_curr_ctxt_mb_info->i1_ref_idx[0] =
|
||||
ps_cabac_ctxt->ps_curr_ctxt_mb_info->i1_ref_idx[1] =
|
||||
ps_cabac_ctxt->ps_curr_ctxt_mb_info->i1_ref_idx[2] =
|
||||
ps_cabac_ctxt->ps_curr_ctxt_mb_info->i1_ref_idx[3] = 0;
|
||||
memset(ps_cabac_ctxt->ps_curr_ctxt_mb_info->u1_mv, 0, 16);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
* @brief
|
||||
* flushing at termination: Explained in flowchart 9-12(ITU_T_H264-201402).
|
||||
*
|
||||
* @param[in] ps_cabac_ctxt
|
||||
* pointer to cabac context (handle)
|
||||
*
|
||||
* @returns none
|
||||
*
|
||||
* @remarks
|
||||
* None
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
void isvce_cabac_flush(isvce_cabac_ctxt_t *ps_cabac_ctxt)
|
||||
{
|
||||
/* bit stream ptr */
|
||||
bitstrm_t *ps_stream = ps_cabac_ctxt->ps_bitstrm;
|
||||
encoding_envirnoment_t *ps_cab_enc_env = &(ps_cabac_ctxt->s_cab_enc_env);
|
||||
UWORD32 u4_low = ps_cab_enc_env->u4_code_int_low;
|
||||
UWORD32 u4_bits_gen = ps_cab_enc_env->u4_bits_gen;
|
||||
UWORD8 *pu1_strm_buf = ps_stream->pu1_strm_buffer;
|
||||
UWORD32 u4_strm_buf_offset = ps_stream->u4_strm_buf_offset;
|
||||
WORD32 zero_run = ps_stream->i4_zero_bytes_run;
|
||||
UWORD32 u4_out_standing_bytes = ps_cab_enc_env->u4_out_standing_bytes;
|
||||
|
||||
/************************************************************************/
|
||||
/* Insert the carry (propogated in previous byte) along with */
|
||||
/* outstanding bytes (if any) and flush remaining bits */
|
||||
/************************************************************************/
|
||||
{
|
||||
/* carry = 1 => putbit(1); carry propogated due to L renorm */
|
||||
WORD32 carry = (u4_low >> (u4_bits_gen + CABAC_BITS)) & 0x1;
|
||||
WORD32 last_byte;
|
||||
WORD32 bits_left;
|
||||
WORD32 rem_bits;
|
||||
|
||||
if(carry)
|
||||
{
|
||||
/* CORNER CASE: if the previous data is 0x000003, then EPB will be
|
||||
inserted and the data will become 0x00000303 and if the carry is present,
|
||||
it will be added with the last byte and it will become 0x00000304 which
|
||||
is not correct as per standard */
|
||||
/* so check for previous four bytes and if it is equal to 0x00000303
|
||||
then subtract u4_strm_buf_offset by 1 */
|
||||
if(pu1_strm_buf[u4_strm_buf_offset - 1] == 0x03 &&
|
||||
pu1_strm_buf[u4_strm_buf_offset - 2] == 0x03 &&
|
||||
pu1_strm_buf[u4_strm_buf_offset - 3] == 0x00 &&
|
||||
pu1_strm_buf[u4_strm_buf_offset - 4] == 0x00)
|
||||
{
|
||||
u4_strm_buf_offset -= 1;
|
||||
}
|
||||
/* previous byte carry add will not result in overflow to */
|
||||
/* u4_strm_buf_offset - 2 as we track 0xff as outstanding bytes */
|
||||
pu1_strm_buf[u4_strm_buf_offset - 1] += carry;
|
||||
zero_run = 0;
|
||||
}
|
||||
|
||||
/* Insert outstanding bytes (if any) */
|
||||
while(u4_out_standing_bytes)
|
||||
{
|
||||
UWORD8 u1_0_or_ff = carry ? 0 : 0xFF;
|
||||
|
||||
PUTBYTE_EPB(pu1_strm_buf, u4_strm_buf_offset, u1_0_or_ff, zero_run);
|
||||
u4_out_standing_bytes--;
|
||||
}
|
||||
|
||||
/* clear the carry in low */
|
||||
u4_low &= ((1 << (u4_bits_gen + CABAC_BITS)) - 1);
|
||||
|
||||
/* extract the remaining bits; */
|
||||
/* includes additional msb bit of low as per Figure 9-12 */
|
||||
bits_left = u4_bits_gen + 1;
|
||||
rem_bits = (u4_low >> (u4_bits_gen + CABAC_BITS - bits_left));
|
||||
|
||||
if(bits_left >= 8)
|
||||
{
|
||||
last_byte = (rem_bits >> (bits_left - 8)) & 0xFF;
|
||||
PUTBYTE_EPB(pu1_strm_buf, u4_strm_buf_offset, last_byte, zero_run);
|
||||
bits_left -= 8;
|
||||
}
|
||||
|
||||
/* insert last byte along with rbsp stop bit(1) and 0's in the end */
|
||||
last_byte =
|
||||
(rem_bits << (8 - bits_left)) | (1 << (7 - bits_left) | (1 << (7 - bits_left - 1)));
|
||||
last_byte &= 0xFF;
|
||||
PUTBYTE_EPB(pu1_strm_buf, u4_strm_buf_offset, last_byte, zero_run);
|
||||
|
||||
/* update the state variables and return success */
|
||||
ps_stream->u4_strm_buf_offset = u4_strm_buf_offset;
|
||||
ps_stream->i4_zero_bytes_run = 0;
|
||||
/* Default init values for scratch variables of bitstream context */
|
||||
ps_stream->u4_cur_word = 0;
|
||||
ps_stream->i4_bits_left_in_cw = WORD_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
******************************************************************************
|
||||
*
|
||||
* @brief Puts new byte (and outstanding bytes) into bitstream after cabac
|
||||
* renormalization
|
||||
*
|
||||
* @par Description
|
||||
* 1. Extract the leading byte of low(L)
|
||||
* 2. If leading byte=0xff increment outstanding bytes and return
|
||||
* (as the actual bits depend on carry propogation later)
|
||||
* 3. If leading byte is not 0xff check for any carry propogation
|
||||
* 4. Insert the carry (propogated in previous byte) along with outstanding
|
||||
* bytes (if any) and leading byte
|
||||
*
|
||||
*
|
||||
* @param[in] ps_cabac_ctxt
|
||||
* pointer to cabac context (handle)
|
||||
*
|
||||
* @return
|
||||
*
|
||||
******************************************************************************
|
||||
*/
|
||||
void isvce_cabac_put_byte(isvce_cabac_ctxt_t *ps_cabac_ctxt)
|
||||
{
|
||||
/* bit stream ptr */
|
||||
bitstrm_t *ps_stream = ps_cabac_ctxt->ps_bitstrm;
|
||||
encoding_envirnoment_t *ps_cab_enc_env = &(ps_cabac_ctxt->s_cab_enc_env);
|
||||
UWORD32 u4_low = ps_cab_enc_env->u4_code_int_low;
|
||||
UWORD32 u4_bits_gen = ps_cab_enc_env->u4_bits_gen;
|
||||
WORD32 lead_byte = u4_low >> (u4_bits_gen + CABAC_BITS - 8);
|
||||
|
||||
/* Sanity checks */
|
||||
ASSERT((ps_cab_enc_env->u4_code_int_range >= 256) && (ps_cab_enc_env->u4_code_int_range < 512));
|
||||
ASSERT((u4_bits_gen >= 8));
|
||||
|
||||
/* update bits generated and low after extracting leading byte */
|
||||
u4_bits_gen -= 8;
|
||||
ps_cab_enc_env->u4_code_int_low &= ((1 << (CABAC_BITS + u4_bits_gen)) - 1);
|
||||
ps_cab_enc_env->u4_bits_gen = u4_bits_gen;
|
||||
|
||||
/************************************************************************/
|
||||
/* 1. Extract the leading byte of low(L) */
|
||||
/* 2. If leading byte=0xff increment outstanding bytes and return */
|
||||
/* (as the actual bits depend on carry propogation later) */
|
||||
/* 3. If leading byte is not 0xff check for any carry propogation */
|
||||
/* 4. Insert the carry (propogated in previous byte) along with */
|
||||
/* outstanding bytes (if any) and leading byte */
|
||||
/************************************************************************/
|
||||
if(lead_byte == 0xff)
|
||||
{
|
||||
/* actual bits depend on carry propogration */
|
||||
ps_cab_enc_env->u4_out_standing_bytes++;
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
UWORD8 *pu1_strm_buf = ps_stream->pu1_strm_buffer;
|
||||
UWORD32 u4_strm_buf_offset = ps_stream->u4_strm_buf_offset;
|
||||
/* carry = 1 => putbit(1); carry propogated due to L renorm */
|
||||
WORD32 carry = (lead_byte >> 8) & 0x1;
|
||||
WORD32 zero_run = ps_stream->i4_zero_bytes_run;
|
||||
UWORD32 u4_out_standing_bytes = ps_cab_enc_env->u4_out_standing_bytes;
|
||||
|
||||
/*********************************************************************/
|
||||
/* Insert the carry propogated in previous byte */
|
||||
/* */
|
||||
/* Note : Do not worry about corruption into slice header align byte */
|
||||
/* This is because the first bin cannot result in overflow */
|
||||
/*********************************************************************/
|
||||
if(carry)
|
||||
{
|
||||
/* CORNER CASE: if the previous data is 0x000003, then EPB will be
|
||||
inserted and the data will become 0x00000303 and if the carry is present,
|
||||
it will be added with the last byte and it will become 0x00000304 which
|
||||
is not correct as per standard */
|
||||
/* so check for previous four bytes and if it is equal to 0x00000303
|
||||
then subtract u4_strm_buf_offset by 1 */
|
||||
if((u4_strm_buf_offset > 3) && (pu1_strm_buf[u4_strm_buf_offset - 1] == 0x03) &&
|
||||
(pu1_strm_buf[u4_strm_buf_offset - 2] == 0x03) &&
|
||||
(pu1_strm_buf[u4_strm_buf_offset - 3] == 0x00) &&
|
||||
(pu1_strm_buf[u4_strm_buf_offset - 4] == 0x00))
|
||||
{
|
||||
u4_strm_buf_offset -= 1;
|
||||
}
|
||||
|
||||
/* previous byte carry add will not result in overflow to */
|
||||
/* u4_strm_buf_offset - 2 as we track 0xff as outstanding bytes */
|
||||
if(u4_strm_buf_offset > 0)
|
||||
{
|
||||
pu1_strm_buf[u4_strm_buf_offset - 1] += carry;
|
||||
zero_run = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* Insert outstanding bytes (if any) */
|
||||
while(u4_out_standing_bytes)
|
||||
{
|
||||
UWORD8 u1_0_or_ff = carry ? 0 : 0xFF;
|
||||
|
||||
PUTBYTE_EPB(pu1_strm_buf, u4_strm_buf_offset, u1_0_or_ff, zero_run);
|
||||
|
||||
u4_out_standing_bytes--;
|
||||
}
|
||||
ps_cab_enc_env->u4_out_standing_bytes = 0;
|
||||
|
||||
/* Insert the leading byte */
|
||||
lead_byte &= 0xFF;
|
||||
PUTBYTE_EPB(pu1_strm_buf, u4_strm_buf_offset, lead_byte, zero_run);
|
||||
|
||||
/* update the state variables and return success */
|
||||
ps_stream->u4_strm_buf_offset = u4_strm_buf_offset;
|
||||
ps_stream->i4_zero_bytes_run = zero_run;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
******************************************************************************
|
||||
*
|
||||
* @brief Codes a bin based on probablilty and mps packed context model
|
||||
*
|
||||
* @par Description
|
||||
* 1. Apart from encoding bin, context model is updated as per state transition
|
||||
* 2. Range and Low renormalization is done based on bin and original state
|
||||
* 3. After renorm bistream is updated (if required)
|
||||
*
|
||||
* @param[in] ps_cabac
|
||||
* pointer to cabac context (handle)
|
||||
*
|
||||
* @param[in] bin
|
||||
* bin(boolean) to be encoded
|
||||
*
|
||||
* @param[in] pu1_bin_ctxts
|
||||
* index of cabac context model containing pState[bits 5-0] | MPS[bit6]
|
||||
*
|
||||
* @return
|
||||
*
|
||||
******************************************************************************
|
||||
*/
|
||||
void isvce_cabac_encode_bin(isvce_cabac_ctxt_t *ps_cabac, WORD32 bin, bin_ctxt_model *pu1_bin_ctxts)
|
||||
{
|
||||
encoding_envirnoment_t *ps_cab_enc_env = &(ps_cabac->s_cab_enc_env);
|
||||
UWORD32 u4_range = ps_cab_enc_env->u4_code_int_range;
|
||||
UWORD32 u4_low = ps_cab_enc_env->u4_code_int_low;
|
||||
UWORD32 u4_rlps;
|
||||
UWORD8 state_mps = (*pu1_bin_ctxts) & 0x3F;
|
||||
UWORD8 u1_mps = !!((*pu1_bin_ctxts) & (0x40));
|
||||
WORD32 shift;
|
||||
UWORD32 u4_table_val;
|
||||
/* Sanity checks */
|
||||
ASSERT((bin == 0) || (bin == 1));
|
||||
ASSERT((u4_range >= 256) && (u4_range < 512));
|
||||
|
||||
/* Get the lps range from LUT based on quantized range and state */
|
||||
u4_table_val = gau4_isvc_cabac_table[state_mps][(u4_range >> 6) & 0x3];
|
||||
u4_rlps = u4_table_val & 0xFF;
|
||||
u4_range -= u4_rlps;
|
||||
|
||||
/* check if bin is mps or lps */
|
||||
if(u1_mps ^ bin)
|
||||
{
|
||||
/* lps path; L= L + R; R = RLPS */
|
||||
u4_low += u4_range;
|
||||
u4_range = u4_rlps;
|
||||
if(state_mps == 0)
|
||||
{
|
||||
/* MPS(CtxIdx) = 1 - MPS(CtxIdx) */
|
||||
u1_mps = 1 - u1_mps;
|
||||
} /* update the context model from state transition LUT */
|
||||
|
||||
state_mps = (u4_table_val >> 15) & 0x3F;
|
||||
}
|
||||
else
|
||||
{ /* update the context model from state transition LUT */
|
||||
state_mps = (u4_table_val >> 8) & 0x3F;
|
||||
}
|
||||
|
||||
(*pu1_bin_ctxts) = (u1_mps << 6) | state_mps;
|
||||
|
||||
/*****************************************************************/
|
||||
/* Renormalization; calculate bits generated based on range(R) */
|
||||
/* Note : 6 <= R < 512; R is 2 only for terminating encode */
|
||||
/*****************************************************************/
|
||||
GETRANGE(shift, u4_range);
|
||||
shift = 9 - shift;
|
||||
u4_low <<= shift;
|
||||
u4_range <<= shift;
|
||||
|
||||
/* bits to be inserted in the bitstream */
|
||||
ps_cab_enc_env->u4_bits_gen += shift;
|
||||
ps_cab_enc_env->u4_code_int_range = u4_range;
|
||||
ps_cab_enc_env->u4_code_int_low = u4_low;
|
||||
|
||||
/* generate stream when a byte is ready */
|
||||
if(ps_cab_enc_env->u4_bits_gen > CABAC_BITS)
|
||||
{
|
||||
isvce_cabac_put_byte(ps_cabac);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* Encoding process for a binary decision :implements encoding process of a
|
||||
decision
|
||||
* as defined in 9.3.4.2 . This function encodes multiple bins, of a symbol.
|
||||
Implements
|
||||
* flowchart Figure 9-7( ITU_T_H264-201402)
|
||||
*
|
||||
* @param[in] u4_bins
|
||||
* array of bin values
|
||||
*
|
||||
* @param[in] i1_bins_len
|
||||
* Length of bins, maximum 32
|
||||
*
|
||||
* @param[in] u4_ctx_inc
|
||||
* CtxInc, byte0- bin0, byte1-bin1 ..
|
||||
*
|
||||
* @param[in] i1_valid_len
|
||||
* valid length of bins, after that CtxInc is constant
|
||||
*
|
||||
* @param[in] pu1_bin_ctxt_type
|
||||
* Pointer to binary contexts
|
||||
|
||||
* @param[in] ps_cabac
|
||||
* Pointer to cabac_context_structure
|
||||
*
|
||||
* @returns
|
||||
*
|
||||
* @remarks
|
||||
* None
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
void isvce_encode_decision_bins(UWORD32 u4_bins, WORD8 i1_bins_len, UWORD32 u4_ctx_inc,
|
||||
WORD8 i1_valid_len, bin_ctxt_model *pu1_bin_ctxt_type,
|
||||
isvce_cabac_ctxt_t *ps_cabac)
|
||||
{
|
||||
WORD8 i;
|
||||
UWORD8 u1_ctx_inc, u1_bin;
|
||||
|
||||
for(i = 0; i < i1_bins_len; i++)
|
||||
{
|
||||
u1_bin = (u4_bins & 0x01);
|
||||
u4_bins = u4_bins >> 1;
|
||||
u1_ctx_inc = u4_ctx_inc & 0x0f;
|
||||
if(i < i1_valid_len) u4_ctx_inc = u4_ctx_inc >> 4;
|
||||
/* Encode the bin */
|
||||
isvce_cabac_encode_bin(ps_cabac, u1_bin, pu1_bin_ctxt_type + u1_ctx_inc);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
* @brief
|
||||
* Encoding process for a binary decision before termination:Encoding process
|
||||
* of a termination(9.3.4.5 :ITU_T_H264-201402) . Explained in flowchart 9-11.
|
||||
*
|
||||
* @param[in] ps_cabac
|
||||
* Pointer to cabac structure
|
||||
*
|
||||
* @param[in] term_bin
|
||||
* Symbol value, end of slice or not, term_bin is binary
|
||||
*
|
||||
* @returns
|
||||
*
|
||||
* @remarks
|
||||
* None
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
void isvce_cabac_encode_terminate(isvce_cabac_ctxt_t *ps_cabac, WORD32 term_bin)
|
||||
{
|
||||
encoding_envirnoment_t *ps_cab_enc_env = &(ps_cabac->s_cab_enc_env);
|
||||
|
||||
UWORD32 u4_range = ps_cab_enc_env->u4_code_int_range;
|
||||
UWORD32 u4_low = ps_cab_enc_env->u4_code_int_low;
|
||||
UWORD32 u4_rlps;
|
||||
WORD32 shift;
|
||||
|
||||
/* Sanity checks */
|
||||
ASSERT((u4_range >= 256) && (u4_range < 512));
|
||||
ASSERT((term_bin == 0) || (term_bin == 1));
|
||||
|
||||
/* term_bin = 1 has lps range = 2 */
|
||||
u4_rlps = 2;
|
||||
u4_range -= u4_rlps;
|
||||
|
||||
/* if terminate L is incremented by curR and R=2 */
|
||||
if(term_bin)
|
||||
{
|
||||
/* lps path; L= L + R; R = RLPS */
|
||||
u4_low += u4_range;
|
||||
u4_range = u4_rlps;
|
||||
}
|
||||
|
||||
/*****************************************************************/
|
||||
/* Renormalization; calculate bits generated based on range(R) */
|
||||
/* Note : 6 <= R < 512; R is 2 only for terminating encode */
|
||||
/*****************************************************************/
|
||||
GETRANGE(shift, u4_range);
|
||||
shift = 9 - shift;
|
||||
u4_low <<= shift;
|
||||
u4_range <<= shift;
|
||||
|
||||
/* bits to be inserted in the bitstream */
|
||||
ps_cab_enc_env->u4_bits_gen += shift;
|
||||
ps_cab_enc_env->u4_code_int_range = u4_range;
|
||||
ps_cab_enc_env->u4_code_int_low = u4_low;
|
||||
|
||||
/* generate stream when a byte is ready */
|
||||
if(ps_cab_enc_env->u4_bits_gen > CABAC_BITS)
|
||||
{
|
||||
isvce_cabac_put_byte(ps_cabac);
|
||||
}
|
||||
|
||||
if(term_bin)
|
||||
{
|
||||
isvce_cabac_flush(ps_cabac);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
* @brief
|
||||
* Bypass encoding process for binary decisions: Explained (9.3.4.4
|
||||
*:ITU_T_H264-201402) , flowchart 9-10.
|
||||
*
|
||||
* @param[ino] ps_cabac : pointer to cabac context (handle)
|
||||
*
|
||||
* @param[in] bin : bypass bin(0/1) to be encoded
|
||||
*
|
||||
* @returns
|
||||
*
|
||||
* @remarks
|
||||
* None
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
void isvce_cabac_encode_bypass_bin(isvce_cabac_ctxt_t *ps_cabac, WORD32 bin)
|
||||
{
|
||||
encoding_envirnoment_t *ps_cab_enc_env = &(ps_cabac->s_cab_enc_env);
|
||||
|
||||
UWORD32 u4_range = ps_cab_enc_env->u4_code_int_range;
|
||||
UWORD32 u4_low = ps_cab_enc_env->u4_code_int_low;
|
||||
|
||||
/* Sanity checks */
|
||||
ASSERT((u4_range >= 256) && (u4_range < 512));
|
||||
ASSERT((bin == 0) || (bin == 1));
|
||||
|
||||
u4_low <<= 1;
|
||||
/* add range if bin is 1 */
|
||||
if(bin)
|
||||
{
|
||||
u4_low += u4_range;
|
||||
}
|
||||
|
||||
/* 1 bit to be inserted in the bitstream */
|
||||
ps_cab_enc_env->u4_bits_gen++;
|
||||
ps_cab_enc_env->u4_code_int_low = u4_low;
|
||||
|
||||
/* generate stream when a byte is ready */
|
||||
if(ps_cab_enc_env->u4_bits_gen > CABAC_BITS)
|
||||
{
|
||||
isvce_cabac_put_byte(ps_cabac);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
******************************************************************************
|
||||
*
|
||||
* @brief Encodes a series of bypass bins (FLC bypass bins)
|
||||
*
|
||||
* @par Description
|
||||
* This function is more optimal than calling isvce_cabac_encode_bypass_bin()
|
||||
* in a loop as cabac low, renorm and generating the stream (8bins at a time)
|
||||
* can be done in one operation
|
||||
*
|
||||
* @param[inout]ps_cabac
|
||||
* pointer to cabac context (handle)
|
||||
*
|
||||
* @param[in] u4_bins
|
||||
* syntax element to be coded (as FLC bins)
|
||||
*
|
||||
* @param[in] num_bins
|
||||
* This is the FLC length for u4_sym
|
||||
*
|
||||
* @return
|
||||
*
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
void isvce_cabac_encode_bypass_bins(isvce_cabac_ctxt_t *ps_cabac, UWORD32 u4_bins, WORD32 num_bins)
|
||||
{
|
||||
encoding_envirnoment_t *ps_cab_enc_env = &(ps_cabac->s_cab_enc_env);
|
||||
|
||||
UWORD32 u4_range = ps_cab_enc_env->u4_code_int_range;
|
||||
WORD32 next_byte;
|
||||
|
||||
/* Sanity checks */
|
||||
ASSERT((num_bins < 33) && (num_bins > 0));
|
||||
ASSERT((u4_range >= 256) && (u4_range < 512));
|
||||
|
||||
/* Compute bit always to populate the trace */
|
||||
/* increment bits generated by num_bins */
|
||||
|
||||
/* Encode 8bins at a time and put in the bit-stream */
|
||||
while(num_bins > 8)
|
||||
{
|
||||
num_bins -= 8;
|
||||
|
||||
next_byte = (u4_bins >> (num_bins)) & 0xff;
|
||||
|
||||
/* L = (L << 8) + (R * next_byte) */
|
||||
ps_cab_enc_env->u4_code_int_low <<= 8;
|
||||
ps_cab_enc_env->u4_code_int_low += (next_byte * u4_range);
|
||||
ps_cab_enc_env->u4_bits_gen += 8;
|
||||
|
||||
if(ps_cab_enc_env->u4_bits_gen > CABAC_BITS)
|
||||
{
|
||||
/* insert the leading byte of low into stream */
|
||||
isvce_cabac_put_byte(ps_cabac);
|
||||
}
|
||||
}
|
||||
|
||||
/* Update low with remaining bins and return */
|
||||
next_byte = (u4_bins & ((1 << num_bins) - 1));
|
||||
|
||||
ps_cab_enc_env->u4_code_int_low <<= num_bins;
|
||||
ps_cab_enc_env->u4_code_int_low += (next_byte * u4_range);
|
||||
ps_cab_enc_env->u4_bits_gen += num_bins;
|
||||
|
||||
if(ps_cab_enc_env->u4_bits_gen > CABAC_BITS)
|
||||
{
|
||||
/* insert the leading byte of low into stream */
|
||||
isvce_cabac_put_byte(ps_cabac);
|
||||
}
|
||||
}
|
||||
380
encoder/svc/isvce_cabac.h
Normal file
380
encoder/svc/isvce_cabac.h
Normal file
|
|
@ -0,0 +1,380 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
* @file
|
||||
* isvce_cabac_structs.h
|
||||
*
|
||||
* @brief
|
||||
* This file contains cabac related macros, enums, tables and function
|
||||
*declarations.
|
||||
*
|
||||
* @author
|
||||
* Doney Alex
|
||||
*
|
||||
* @remarks
|
||||
* none
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef _ISVCE_CABAC_H_
|
||||
#define _ISVCE_CABAC_H_
|
||||
|
||||
#include "ih264e_cabac.h"
|
||||
#include "isvce_cabac_structs.h"
|
||||
#include "isvce_defs.h"
|
||||
#include "isvce_structs.h"
|
||||
|
||||
/*****************************************************************************/
|
||||
/* Function Declarations */
|
||||
/*****************************************************************************/
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* Initialize default context values and pointers.
|
||||
*
|
||||
* @param[in] ps_ent_ctxt
|
||||
* Pointer to entropy context structure
|
||||
*
|
||||
* @returns
|
||||
*
|
||||
* @remarks
|
||||
* None
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
void isvce_init_cabac_table(isvce_entropy_ctxt_t *ps_ent_ctxt);
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* Initialize cabac context: Intitalize all contest with init values given in
|
||||
*the spec. Called at the beginning of entropy coding of each slice for CABAC
|
||||
*encoding.
|
||||
*
|
||||
* @param[in] ps_ent_ctxt
|
||||
* Pointer to entropy context structure
|
||||
*
|
||||
* @returns
|
||||
*
|
||||
* @remarks
|
||||
* None
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
extern void isvce_init_cabac_ctxt(isvce_entropy_ctxt_t *ps_ent_ctxt, slice_header_t *ps_slice_hdr);
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* k-th order Exp-Golomb (UEGk) binarization process: Implements concatenated
|
||||
* unary/ k-th order Exp-Golomb (UEGk) binarization process,
|
||||
* where k = 0 as defined in 9.3.2.3 of ITU_T_H264-201402
|
||||
*
|
||||
* @param[in] i2_sufs
|
||||
* Suffix bit string
|
||||
*
|
||||
* @param[in] pi1_bins_len
|
||||
* Pointer to length of the string
|
||||
*
|
||||
* @returns Binarized value
|
||||
*
|
||||
* @remarks
|
||||
* None
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
UWORD32 isvce_cabac_UEGk0_binarization(WORD16 i2_sufs, WORD8 *pi1_bins_len);
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* Get cabac context for the MB :calculates the pointers to Top and left
|
||||
* cabac neighbor context depending upon neighbor availability.
|
||||
*
|
||||
* @param[in] ps_ent_ctxt
|
||||
* Pointer to entropy context structure
|
||||
*
|
||||
* @param[in] u4_mb_type
|
||||
* Type of MB
|
||||
*
|
||||
* @returns
|
||||
*
|
||||
* @remarks
|
||||
* None
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
void isvce_get_cabac_context(isvce_entropy_ctxt_t *ps_ent_ctxt, WORD32 u4_mb_type);
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
* @brief
|
||||
* flushing at termination: Explained in flowchart 9-12(ITU_T_H264-201402).
|
||||
*
|
||||
* @param[in] ps_cabac_ctxt
|
||||
* pointer to cabac context (handle)
|
||||
*
|
||||
* @returns none
|
||||
*
|
||||
* @remarks
|
||||
* None
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
void isvce_cabac_flush(isvce_cabac_ctxt_t *ps_cabac_ctxt);
|
||||
|
||||
/**
|
||||
******************************************************************************
|
||||
*
|
||||
* @brief Puts new byte (and outstanding bytes) into bitstream after cabac
|
||||
* renormalization
|
||||
*
|
||||
* @par Description
|
||||
* 1. Extract the leading byte of low(L)
|
||||
* 2. If leading byte=0xff increment outstanding bytes and return
|
||||
* (as the actual bits depend on carry propogation later)
|
||||
* 3. If leading byte is not 0xff check for any carry propogation
|
||||
* 4. Insert the carry (propogated in previous byte) along with outstanding
|
||||
* bytes (if any) and leading byte
|
||||
*
|
||||
*
|
||||
* @param[inout] ps_cabac_ctxt
|
||||
* pointer to cabac context (handle)
|
||||
*
|
||||
* @return
|
||||
*
|
||||
******************************************************************************
|
||||
*/
|
||||
void isvce_cabac_put_byte(isvce_cabac_ctxt_t *ps_cabac_ctxt);
|
||||
|
||||
/**
|
||||
******************************************************************************
|
||||
*
|
||||
* @brief Codes a bin based on probablilty and mps packed context model
|
||||
*
|
||||
* @par Description
|
||||
* 1. Apart from encoding bin, context model is updated as per state transition
|
||||
* 2. Range and Low renormalization is done based on bin and original state
|
||||
* 3. After renorm bistream is updated (if required)
|
||||
*
|
||||
* @param[inout] ps_cabac
|
||||
* pointer to cabac context (handle)
|
||||
*
|
||||
* @param[in] bin
|
||||
* bin(boolean) to be encoded
|
||||
*
|
||||
* @param[in] pu1_bin_ctxts
|
||||
* index of cabac context model containing pState[bits 5-0] | MPS[bit6]
|
||||
*
|
||||
* @return
|
||||
*
|
||||
******************************************************************************
|
||||
*/
|
||||
void isvce_cabac_encode_bin(isvce_cabac_ctxt_t *ps_cabac, WORD32 bin,
|
||||
bin_ctxt_model *pu1_bin_ctxts);
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* Encoding process for a binary decision :implements encoding process of a
|
||||
decision
|
||||
* as defined in 9.3.4.2 . This function encodes multiple bins, of a symbol.
|
||||
Implements
|
||||
* flowchart Figure 9-7( ITU_T_H264-201402)
|
||||
*
|
||||
* @param[in] u4_bins
|
||||
* array of bin values
|
||||
*
|
||||
* @param[in] i1_bins_len
|
||||
* Length of bins, maximum 32
|
||||
*
|
||||
* @param[in] u4_ctx_inc
|
||||
* CtxInc, byte0- bin0, byte1-bin1 ..
|
||||
*
|
||||
* @param[in] i1_valid_len
|
||||
* valid length of bins, after that CtxInc is constant
|
||||
*
|
||||
* @param[in] pu1_bin_ctxt_type
|
||||
* Pointer to binary contexts
|
||||
|
||||
* @param[in] ps_cabac
|
||||
* Pointer to cabac_context_structure
|
||||
*
|
||||
* @returns
|
||||
*
|
||||
* @remarks
|
||||
* None
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
void isvce_encode_decision_bins(UWORD32 u4_bins, WORD8 i1_bins_len, UWORD32 u4_ctx_inc,
|
||||
WORD8 i1_valid_len, bin_ctxt_model *pu1_bin_ctxt_type,
|
||||
isvce_cabac_ctxt_t *ps_cabac);
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
* @brief
|
||||
* Encoding process for a binary decision before termination:Encoding process
|
||||
* of a termination(9.3.4.5 :ITU_T_H264-201402) . Explained in flowchart 9-11.
|
||||
*
|
||||
* @param[in] ps_cabac
|
||||
* Pointer to cabac structure
|
||||
*
|
||||
* @param[in] term_bin
|
||||
* Symbol value, end of slice or not, term_bin is binary
|
||||
*
|
||||
* @returns
|
||||
*
|
||||
* @remarks
|
||||
* None
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
void isvce_cabac_encode_terminate(isvce_cabac_ctxt_t *ps_cabac, WORD32 term_bin);
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
* @brief
|
||||
* Bypass encoding process for binary decisions: Explained (9.3.4.4
|
||||
*:ITU_T_H264-201402) , flowchart 9-10.
|
||||
*
|
||||
* @param[in] ps_cabac : pointer to cabac context (handle)
|
||||
*
|
||||
* @param[in] bin : bypass bin(0/1) to be encoded
|
||||
*
|
||||
* @returns
|
||||
*
|
||||
* @remarks
|
||||
* None
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
void isvce_cabac_encode_bypass_bin(isvce_cabac_ctxt_t *ps_cabac, WORD32 bin);
|
||||
|
||||
/**
|
||||
******************************************************************************
|
||||
*
|
||||
* @brief Encodes a series of bypass bins (FLC bypass bins)
|
||||
*
|
||||
* @par Description
|
||||
* This function is more optimal than calling isvce_cabac_encode_bypass_bin()
|
||||
* in a loop as cabac low, renorm and generating the stream (8bins at a time)
|
||||
* can be done in one operation
|
||||
*
|
||||
* @param[inout]ps_cabac
|
||||
* pointer to cabac context (handle)
|
||||
*
|
||||
* @param[in] u4_bins
|
||||
* syntax element to be coded (as FLC bins)
|
||||
*
|
||||
* @param[in] num_bins
|
||||
* This is the FLC length for u4_sym
|
||||
*
|
||||
* @return
|
||||
*
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
void isvce_cabac_encode_bypass_bins(isvce_cabac_ctxt_t *ps_cabac, UWORD32 u4_bins, WORD32 num_bins);
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* This function generates CABAC coded bit stream for an Intra Slice.
|
||||
*
|
||||
* @description
|
||||
* The mb syntax layer for intra slices constitutes luma mb mode, luma sub
|
||||
*modes (if present), mb qp delta, coded block pattern, chroma mb mode and
|
||||
* luma/chroma residue. These syntax elements are written as directed by table
|
||||
* 7.3.5 of h264 specification.
|
||||
*
|
||||
* @param[in] ps_ent_ctxt
|
||||
* pointer to entropy context
|
||||
*
|
||||
* @returns error code
|
||||
*
|
||||
* @remarks none
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
IH264E_ERROR_T isvce_write_islice_mb_cabac(isvce_entropy_ctxt_t *ps_ent_ctxt);
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* This function generates CABAC coded bit stream for Inter slices
|
||||
*
|
||||
* @description
|
||||
* The mb syntax layer for inter slices constitutes luma mb mode, luma sub
|
||||
*modes (if present), mb qp delta, coded block pattern, chroma mb mode and
|
||||
* luma/chroma residue. These syntax elements are written as directed by table
|
||||
* 7.3.5 of h264 specification
|
||||
*
|
||||
* @param[in] ps_ent_ctxt
|
||||
* pointer to entropy context
|
||||
*
|
||||
* @returns error code
|
||||
*
|
||||
* @remarks none
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
IH264E_ERROR_T isvce_write_pslice_mb_cabac(isvce_entropy_ctxt_t *ps_ent_ctxt);
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* This function generates CABAC coded bit stream for B slices
|
||||
*
|
||||
* @description
|
||||
* The mb syntax layer for inter slices constitutes luma mb mode,
|
||||
* mb qp delta, coded block pattern, chroma mb mode and
|
||||
* luma/chroma residue. These syntax elements are written as directed by table
|
||||
* 7.3.5 of h264 specification
|
||||
*
|
||||
* @param[in] ps_ent_ctxt
|
||||
* pointer to entropy context
|
||||
*
|
||||
* @returns error code
|
||||
*
|
||||
* @remarks none
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
IH264E_ERROR_T isvce_write_bslice_mb_cabac(isvce_entropy_ctxt_t *ps_ent_ctxt);
|
||||
|
||||
#if ENABLE_RE_ENC_AS_SKIP
|
||||
IH264E_ERROR_T isvce_reencode_as_skip_frame_cabac(isvce_entropy_ctxt_t *ps_ent_ctxt);
|
||||
#endif
|
||||
|
||||
#endif
|
||||
2374
encoder/svc/isvce_cabac_encode.c
Normal file
2374
encoder/svc/isvce_cabac_encode.c
Normal file
File diff suppressed because it is too large
Load diff
215
encoder/svc/isvce_cabac_init.c
Normal file
215
encoder/svc/isvce_cabac_init.c
Normal file
|
|
@ -0,0 +1,215 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
* @file
|
||||
* isvce_cabac_init.c
|
||||
*
|
||||
* @brief
|
||||
* Contains all initialization functions for cabac contexts
|
||||
*
|
||||
* @author
|
||||
* Doney Alex
|
||||
*
|
||||
* @par List of Functions:
|
||||
*
|
||||
*
|
||||
* @remarks
|
||||
* None
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
/*****************************************************************************/
|
||||
/* File Includes */
|
||||
/*****************************************************************************/
|
||||
|
||||
/* System include files */
|
||||
#include <stdio.h>
|
||||
#include <stddef.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <limits.h>
|
||||
#include <assert.h>
|
||||
|
||||
/* User include files */
|
||||
#include "ih264_typedefs.h"
|
||||
#include "iv2.h"
|
||||
#include "ive2.h"
|
||||
#include "isvc_defs.h"
|
||||
#include "ih264_debug.h"
|
||||
#include "ime_distortion_metrics.h"
|
||||
#include "ime_defs.h"
|
||||
#include "ime_structs.h"
|
||||
#include "ih264_error.h"
|
||||
#include "isvc_structs.h"
|
||||
#include "isvc_trans_quant_itrans_iquant.h"
|
||||
#include "isvc_inter_pred_filters.h"
|
||||
#include "isvc_mem_fns.h"
|
||||
#include "ih264_padding.h"
|
||||
#include "ih264_intra_pred_filters.h"
|
||||
#include "ih264_deblk_edge_filters.h"
|
||||
#include "ih264_platform_macros.h"
|
||||
#include "isvc_macros.h"
|
||||
#include "ih264_buf_mgr.h"
|
||||
#include "ih264e_error.h"
|
||||
#include "ih264e_bitstream.h"
|
||||
#include "isvc_common_tables.h"
|
||||
#include "isvc_cabac_tables.h"
|
||||
#include "ih264_list.h"
|
||||
#include "isvce_defs.h"
|
||||
#include "irc_cntrl_param.h"
|
||||
#include "irc_frame_info_collector.h"
|
||||
#include "isvce_rate_control.h"
|
||||
#include "isvce_cabac_structs.h"
|
||||
#include "isvce_structs.h"
|
||||
#include "isvce_cabac.h"
|
||||
#include "isvce_process.h"
|
||||
#include "ithread.h"
|
||||
#include "isvce_encode_header.h"
|
||||
#include "isvce_globals.h"
|
||||
#include "ih264e_config.h"
|
||||
#include "ih264e_trace.h"
|
||||
#include "ih264e_statistics.h"
|
||||
#include "ih264_cavlc_tables.h"
|
||||
#include "isvce_deblk.h"
|
||||
#include "isvce_me.h"
|
||||
#include "ih264e_debug.h"
|
||||
#include "ih264e_master.h"
|
||||
#include "isvce_utils.h"
|
||||
#include "irc_mem_req_and_acq.h"
|
||||
#include "irc_rate_control_api.h"
|
||||
#include "ih264e_platform_macros.h"
|
||||
#include "ime_statistics.h"
|
||||
|
||||
/*****************************************************************************/
|
||||
/* Function definitions . */
|
||||
/*****************************************************************************/
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* Initialize cabac encoding environment
|
||||
*
|
||||
* @param[in] ps_cab_enc_env
|
||||
* Pointer to encoding_envirnoment_t structure
|
||||
*
|
||||
* @returns
|
||||
*
|
||||
* @remarks
|
||||
* None
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
static void isvce_init_cabac_enc_envirnoment(encoding_envirnoment_t *ps_cab_enc_env)
|
||||
{
|
||||
ps_cab_enc_env->u4_code_int_low = 0;
|
||||
ps_cab_enc_env->u4_code_int_range = 0x1fe;
|
||||
ps_cab_enc_env->u4_out_standing_bytes = 0;
|
||||
ps_cab_enc_env->u4_bits_gen = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* Initialize default context values and pointers (Called once at the beginning
|
||||
*of encoding).
|
||||
*
|
||||
* @param[in] ps_ent_ctxt
|
||||
* Pointer to entropy context structure
|
||||
*
|
||||
* @returns
|
||||
*
|
||||
* @remarks
|
||||
* None
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
void isvce_init_cabac_table(isvce_entropy_ctxt_t *ps_ent_ctxt)
|
||||
{
|
||||
/* CABAC context */
|
||||
isvce_cabac_ctxt_t *ps_cabac_ctxt = ps_ent_ctxt->ps_cabac;
|
||||
ps_cabac_ctxt->ps_mb_map_ctxt_inc = ps_cabac_ctxt->ps_mb_map_ctxt_inc_base + 1;
|
||||
ps_cabac_ctxt->ps_lft_csbp = &ps_cabac_ctxt->s_lft_csbp;
|
||||
ps_cabac_ctxt->ps_bitstrm = ps_ent_ctxt->ps_bitstrm;
|
||||
|
||||
{
|
||||
/* 0th entry of mb_map_ctxt_inc will be always be containing default values
|
||||
*/
|
||||
/* for CABAC context representing MB not available */
|
||||
isvce_mb_info_ctxt_t *ps_def_ctxt = ps_cabac_ctxt->ps_mb_map_ctxt_inc - 1;
|
||||
|
||||
ps_def_ctxt->u1_mb_type = CAB_SKIP;
|
||||
ps_def_ctxt->u1_cbp = 0x0f;
|
||||
ps_def_ctxt->u1_intrapred_chroma_mode = 0;
|
||||
ps_def_ctxt->u1_base_mode_flag = 0;
|
||||
|
||||
memset(ps_def_ctxt->i1_ref_idx, 0, sizeof(ps_def_ctxt->i1_ref_idx));
|
||||
memset(ps_def_ctxt->u1_mv, 0, sizeof(ps_def_ctxt->u1_mv));
|
||||
ps_cabac_ctxt->ps_def_ctxt_mb_info = ps_def_ctxt;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* Initialize cabac context: Initialize all contest with init values given in
|
||||
*the spec. Called at the beginning of entropy coding of each slice for CABAC
|
||||
*encoding.
|
||||
*
|
||||
* @param[in] ps_ent_ctxt
|
||||
* Pointer to entropy context structure
|
||||
*
|
||||
* @returns
|
||||
*
|
||||
* @remarks
|
||||
* None
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
void isvce_init_cabac_ctxt(isvce_entropy_ctxt_t *ps_ent_ctxt, slice_header_t *ps_slice_hdr)
|
||||
{
|
||||
isvce_cabac_ctxt_t *ps_cabac_ctxt = ps_ent_ctxt->ps_cabac;
|
||||
|
||||
const UWORD8 u1_slice_type = ps_slice_hdr->u1_slice_type;
|
||||
WORD8 i1_cabac_init_idc = 0;
|
||||
bin_ctxt_model *au1_cabac_ctxt_table = ps_cabac_ctxt->au1_cabac_ctxt_table;
|
||||
UWORD8 u1_qp_y = ps_slice_hdr->i1_slice_qp;
|
||||
|
||||
isvce_init_cabac_enc_envirnoment(&ps_cabac_ctxt->s_cab_enc_env);
|
||||
|
||||
ps_cabac_ctxt->i1_prevps_mb_qp_delta_ctxt = 0;
|
||||
|
||||
if(ISLICE != u1_slice_type)
|
||||
{
|
||||
i1_cabac_init_idc = ps_slice_hdr->i1_cabac_init_idc;
|
||||
}
|
||||
else
|
||||
{
|
||||
i1_cabac_init_idc = 3;
|
||||
}
|
||||
|
||||
memcpy(au1_cabac_ctxt_table, gau1_isvc_cabac_ctxt_init_table[i1_cabac_init_idc][u1_qp_y],
|
||||
NUM_SVC_CABAC_CTXTS * sizeof(bin_ctxt_model));
|
||||
}
|
||||
142
encoder/svc/isvce_cabac_structs.h
Normal file
142
encoder/svc/isvce_cabac_structs.h
Normal file
|
|
@ -0,0 +1,142 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
* @file
|
||||
* isvce_cabac_structs.h
|
||||
*
|
||||
* @brief
|
||||
* This file contains cabac related structure definitions.
|
||||
*
|
||||
* @author
|
||||
* Doney Alex
|
||||
*
|
||||
* @remarks
|
||||
* none
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef _ISVCE_CABAC_STRUCTS_H_
|
||||
#define _ISVCE_CABAC_STRUCTS_H_
|
||||
|
||||
#include "ih264_typedefs.h"
|
||||
#include "isvc_cabac_tables.h"
|
||||
#include "ih264e_bitstream.h"
|
||||
#include "ih264e_cabac_structs.h"
|
||||
|
||||
/**
|
||||
******************************************************************************
|
||||
* @brief MB info for cabac
|
||||
******************************************************************************
|
||||
*/
|
||||
typedef struct isvce_mb_info_ctxt_t
|
||||
{
|
||||
/* Neighbour availability Variables needed to get CtxtInc, for CABAC */
|
||||
UWORD8 u1_mb_type; /* !< macroblock type: I/P/B/SI/SP */
|
||||
|
||||
UWORD8 u1_cbp; /* !< Coded Block Pattern */
|
||||
UWORD8 u1_intrapred_chroma_mode;
|
||||
|
||||
/*************************************************************************/
|
||||
/* Arrangnment of AC CSBP */
|
||||
/* bits: b7 b6 b5 b4 b3 b2 b1 b0 */
|
||||
/* CSBP: V1 V0 U1 U0 Y3 Y2 Y1 Y0 */
|
||||
/*************************************************************************/
|
||||
UWORD8 u1_yuv_ac_csbp;
|
||||
/*************************************************************************/
|
||||
/* Arrangnment of DC CSBP */
|
||||
/* bits: b7 b6 b5 b4 b3 b2 b1 b0 */
|
||||
/* CSBP: x x x x x Vdc Udc Ydc */
|
||||
/*************************************************************************/
|
||||
UWORD8 u1_yuv_dc_csbp;
|
||||
|
||||
WORD8 i1_ref_idx[4];
|
||||
UWORD8 u1_mv[4][4];
|
||||
|
||||
UWORD8 u1_base_mode_flag;
|
||||
} isvce_mb_info_ctxt_t;
|
||||
|
||||
/**
|
||||
******************************************************************************
|
||||
* @brief CABAC Context structure : Variables to handle Cabac
|
||||
******************************************************************************
|
||||
*/
|
||||
typedef struct isvce_cabac_ctxt_t
|
||||
{
|
||||
/* Base pointer to all the cabac contexts */
|
||||
bin_ctxt_model au1_cabac_ctxt_table[NUM_SVC_CABAC_CTXTS];
|
||||
|
||||
cab_csbp_t s_lft_csbp;
|
||||
|
||||
/**
|
||||
* pointer to Bitstream structure
|
||||
*/
|
||||
bitstrm_t *ps_bitstrm;
|
||||
|
||||
/* Pointer to mb_info_ctxt_t map_base */
|
||||
isvce_mb_info_ctxt_t *ps_mb_map_ctxt_inc_base;
|
||||
|
||||
/* Pointer to encoding_envirnoment_t */
|
||||
encoding_envirnoment_t s_cab_enc_env;
|
||||
|
||||
/* These things need to be updated at each MbLevel */
|
||||
|
||||
/* Prev ps_mb_qp_delta_ctxt */
|
||||
WORD8 i1_prevps_mb_qp_delta_ctxt;
|
||||
|
||||
/* Pointer to mb_info_ctxt_t map */
|
||||
isvce_mb_info_ctxt_t *ps_mb_map_ctxt_inc;
|
||||
|
||||
/* Pointer to default mb_info_ctxt_t */
|
||||
isvce_mb_info_ctxt_t *ps_def_ctxt_mb_info;
|
||||
|
||||
/* Pointer to current mb_info_ctxt_t */
|
||||
isvce_mb_info_ctxt_t *ps_curr_ctxt_mb_info;
|
||||
|
||||
/* Pointer to left mb_info_ctxt_t */
|
||||
isvce_mb_info_ctxt_t *ps_left_ctxt_mb_info;
|
||||
|
||||
/* Pointer to top mb_info_ctxt_t */
|
||||
isvce_mb_info_ctxt_t *ps_top_ctxt_mb_info;
|
||||
|
||||
/* Poniter to left csbp structure */
|
||||
cab_csbp_t *ps_lft_csbp;
|
||||
UWORD8 *pu1_left_y_ac_csbp;
|
||||
UWORD8 *pu1_left_uv_ac_csbp;
|
||||
UWORD8 *pu1_left_yuv_dc_csbp;
|
||||
|
||||
/***************************************************************************/
|
||||
/* Ref_idx contexts are stored in the following way */
|
||||
/* Array Idx 0,1 for reference indices in Forward direction */
|
||||
/* Array Idx 2,3 for reference indices in backward direction */
|
||||
/***************************************************************************/
|
||||
/* Dimensions for u1_left_ref_ctxt_inc_arr is [2][4] for Mbaff:Top and Bot */
|
||||
WORD8 i1_left_ref_idx_ctx_inc_arr[2][4];
|
||||
WORD8 *pi1_left_ref_idx_ctxt_inc;
|
||||
|
||||
/* Dimensions for u1_left_mv_ctxt_inc_arr is [2][4][4] for Mbaff case */
|
||||
UWORD8 u1_left_mv_ctxt_inc_arr[2][4][4];
|
||||
UWORD8 (*pu1_left_mv_ctxt_inc)[4];
|
||||
|
||||
} isvce_cabac_ctxt_t;
|
||||
|
||||
#endif
|
||||
88
encoder/svc/isvce_cabac_utils.h
Normal file
88
encoder/svc/isvce_cabac_utils.h
Normal file
|
|
@ -0,0 +1,88 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
* @file
|
||||
* isvce_cabac_utils.h
|
||||
*
|
||||
* @brief
|
||||
* Contains function declarations for function declared in
|
||||
* isvce_svc_cabac_utils.c
|
||||
*
|
||||
* @author
|
||||
* ittiam
|
||||
*
|
||||
* @remarks
|
||||
* None
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef _ISVCE_CABAC_UTILS_H_
|
||||
#define _ISVCE_CABAC_UTILS_H_
|
||||
|
||||
#include "ih264_typedefs.h"
|
||||
#include "isvc_macros.h"
|
||||
#include "isvc_defs.h"
|
||||
#include "isvc_cabac_tables.h"
|
||||
#include "isvce_cabac_structs.h"
|
||||
#include "isvce_cabac.h"
|
||||
|
||||
static FORCEINLINE void isvce_cabac_enc_base_mode_flag(isvce_cabac_ctxt_t *ps_cabac_ctxt,
|
||||
UWORD8 u1_base_mode_flag)
|
||||
{
|
||||
UWORD8 u1_ctx_inc;
|
||||
UWORD8 u1_a, u1_b;
|
||||
|
||||
const UWORD32 u4_ctxidx_offset = BASE_MODE_FLAG;
|
||||
|
||||
u1_a = !ps_cabac_ctxt->ps_left_ctxt_mb_info->u1_base_mode_flag;
|
||||
u1_b = !ps_cabac_ctxt->ps_top_ctxt_mb_info->u1_base_mode_flag;
|
||||
|
||||
u1_ctx_inc = u1_a + u1_b;
|
||||
|
||||
isvce_cabac_encode_bin(ps_cabac_ctxt, u1_base_mode_flag,
|
||||
ps_cabac_ctxt->au1_cabac_ctxt_table + u4_ctxidx_offset + u1_ctx_inc);
|
||||
}
|
||||
|
||||
static FORCEINLINE void isvce_cabac_enc_residual_prediction_flag(isvce_cabac_ctxt_t *ps_cabac_ctxt,
|
||||
UWORD8 u1_base_mode_flag,
|
||||
UWORD8 u1_residual_prediction_flag)
|
||||
{
|
||||
const UWORD32 u4_ctxidx_offset = RESIDUAL_PREDICTION_FLAG;
|
||||
UWORD8 u1_ctx_inc = !u1_base_mode_flag;
|
||||
|
||||
isvce_cabac_encode_bin(ps_cabac_ctxt, u1_residual_prediction_flag,
|
||||
ps_cabac_ctxt->au1_cabac_ctxt_table + u4_ctxidx_offset + u1_ctx_inc);
|
||||
}
|
||||
|
||||
static FORCEINLINE void isvce_cabac_enc_motion_prediction_flag(isvce_cabac_ctxt_t *ps_cabac_ctxt,
|
||||
UWORD8 u1_motion_prediction_flag,
|
||||
UWORD8 u1_is_l0_mvp)
|
||||
{
|
||||
const UWORD32 u4_ctxidx_offset =
|
||||
u1_is_l0_mvp ? MOTION_PREDICTION_FLAG_L0 : MOTION_PREDICTION_FLAG_L1;
|
||||
|
||||
isvce_cabac_encode_bin(ps_cabac_ctxt, u1_motion_prediction_flag,
|
||||
ps_cabac_ctxt->au1_cabac_ctxt_table + u4_ctxidx_offset);
|
||||
}
|
||||
|
||||
#endif
|
||||
2021
encoder/svc/isvce_cavlc.c
Normal file
2021
encoder/svc/isvce_cavlc.c
Normal file
File diff suppressed because it is too large
Load diff
126
encoder/svc/isvce_cavlc.h
Normal file
126
encoder/svc/isvce_cavlc.h
Normal file
|
|
@ -0,0 +1,126 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
|
||||
/**
|
||||
******************************************************************************
|
||||
* @file
|
||||
* isvce_cavlc.h
|
||||
*
|
||||
* @brief
|
||||
* This file contains enumerations, macros and extern declarations of H264
|
||||
* cavlc tables
|
||||
*
|
||||
* @author
|
||||
* ittiam
|
||||
*
|
||||
* @remarks
|
||||
* none
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef _ISVCE_CAVLC_H_
|
||||
#define _ISVCE_CAVLC_H_
|
||||
|
||||
#include "ih264_typedefs.h"
|
||||
#include "isvce_defs.h"
|
||||
#include "isvce_structs.h"
|
||||
|
||||
/*****************************************************************************/
|
||||
/* Function macro definitions */
|
||||
/*****************************************************************************/
|
||||
|
||||
/*****************************************************************************/
|
||||
/* Extern Function Declarations */
|
||||
/*****************************************************************************/
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* This function generates CAVLC coded bit stream for an Intra Slice.
|
||||
*
|
||||
* @description
|
||||
* The mb syntax layer for intra slices constitutes luma mb mode, luma sub modes
|
||||
* (if present), mb qp delta, coded block pattern, chroma mb mode and
|
||||
* luma/chroma residue. These syntax elements are written as directed by table
|
||||
* 7.3.5 of h264 specification.
|
||||
*
|
||||
* @param[in] ps_ent_ctxt
|
||||
* pointer to entropy context
|
||||
*
|
||||
* @returns error code
|
||||
*
|
||||
* @remarks none
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
IH264E_ERROR_T isvce_write_islice_mb_cavlc(isvce_entropy_ctxt_t *ps_ent_ctxt);
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* This function generates CAVLC coded bit stream for Inter slices
|
||||
*
|
||||
* @description
|
||||
* The mb syntax layer for inter slices constitutes luma mb mode, luma sub modes
|
||||
* (if present), mb qp delta, coded block pattern, chroma mb mode and
|
||||
* luma/chroma residue. These syntax elements are written as directed by table
|
||||
* 7.3.5 of h264 specification
|
||||
*
|
||||
* @param[in] ps_ent_ctxt
|
||||
* pointer to entropy context
|
||||
*
|
||||
* @returns error code
|
||||
*
|
||||
* @remarks none
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
IH264E_ERROR_T isvce_write_pslice_mb_cavlc(isvce_entropy_ctxt_t *ps_ent_ctxt);
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* This function generates CAVLC coded bit stream for Inter(B) slices
|
||||
*
|
||||
* @description
|
||||
* The mb syntax layer for inter slices constitutes luma mb mode, luma sub modes
|
||||
* (if present), mb qp delta, coded block pattern, chroma mb mode and
|
||||
* luma/chroma residue. These syntax elements are written as directed by table
|
||||
* 7.3.5 of h264 specification
|
||||
*
|
||||
* @param[in] ps_ent_ctxt
|
||||
* pointer to entropy context
|
||||
*
|
||||
* @returns error code
|
||||
*
|
||||
* @remarks none
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
IH264E_ERROR_T isvce_write_bslice_mb_cavlc(isvce_entropy_ctxt_t *ps_ent_ctxt);
|
||||
|
||||
#if ENABLE_RE_ENC_AS_SKIP
|
||||
IH264E_ERROR_T isvce_reencode_as_skip_frame_cavlc(isvce_entropy_ctxt_t *ps_entropy);
|
||||
#endif
|
||||
|
||||
#endif
|
||||
2367
encoder/svc/isvce_core_coding.c
Normal file
2367
encoder/svc/isvce_core_coding.c
Normal file
File diff suppressed because it is too large
Load diff
125
encoder/svc/isvce_core_coding.h
Normal file
125
encoder/svc/isvce_core_coding.h
Normal file
|
|
@ -0,0 +1,125 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
|
||||
/**
|
||||
******************************************************************************
|
||||
* @file
|
||||
* isvce_core_coding.h
|
||||
*
|
||||
* @brief
|
||||
* This file contains extern declarations of core coding routines
|
||||
*
|
||||
* @author
|
||||
* ittiam
|
||||
*
|
||||
* @remarks
|
||||
* none
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef _ISVCE_CORE_CODING_H_
|
||||
#define _ISVCE_CORE_CODING_H_
|
||||
|
||||
#include "isvce_structs.h"
|
||||
|
||||
/*****************************************************************************/
|
||||
/* Constant Macros */
|
||||
/*****************************************************************************/
|
||||
|
||||
/**
|
||||
******************************************************************************
|
||||
* @brief Enable/Disable Hadamard transform of DC Coeff's
|
||||
******************************************************************************
|
||||
*/
|
||||
#define DISABLE_DC_TRANSFORM 0
|
||||
#define ENABLE_DC_TRANSFORM 1
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
* @brief bit masks for DC and AC control flags
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#define DC_COEFF_CNT_LUMA_MB 16
|
||||
#define NUM_4X4_BLKS_LUMA_MB_ROW 4
|
||||
#define NUM_LUMA4x4_BLOCKS_IN_MB 16
|
||||
#define NUM_CHROMA4x4_BLOCKS_IN_MB 8
|
||||
|
||||
#define SIZE_4X4_BLK_HRZ TRANS_SIZE_4
|
||||
#define SIZE_4X4_BLK_VERT TRANS_SIZE_4
|
||||
|
||||
#define CNTRL_FLAG_DC_MASK_LUMA 0x0000FFFF
|
||||
#define CNTRL_FLAG_AC_MASK_LUMA 0xFFFF0000
|
||||
|
||||
#define CNTRL_FLAG_AC_MASK_CHROMA_U 0xF0000000
|
||||
#define CNTRL_FLAG_DC_MASK_CHROMA_U 0x0000F000
|
||||
|
||||
#define CNTRL_FLAG_AC_MASK_CHROMA_V 0x0F000000
|
||||
#define CNTRL_FLAG_DC_MASK_CHROMA_V 0x00000F00
|
||||
|
||||
#define CNTRL_FLAG_AC_MASK_CHROMA (CNTRL_FLAG_AC_MASK_CHROMA_U | CNTRL_FLAG_AC_MASK_CHROMA_V)
|
||||
#define CNTRL_FLAG_DC_MASK_CHROMA (CNTRL_FLAG_DC_MASK_CHROMA_U | CNTRL_FLAG_DC_MASK_CHROMA_V)
|
||||
|
||||
#define CNTRL_FLAG_DCBLK_MASK_CHROMA 0x0000C000
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
* @brief macros for transforms
|
||||
*******************************************************************************
|
||||
*/
|
||||
#define DEQUEUE_BLKID_FROM_CONTROL(u4_cntrl, blk_lin_id) \
|
||||
{ \
|
||||
blk_lin_id = CLZ(u4_cntrl); \
|
||||
u4_cntrl &= (0x7FFFFFFF >> blk_lin_id); \
|
||||
};
|
||||
|
||||
#define IND2SUB_LUMA_MB(u4_blk_id, i4_offset_x, i4_offset_y) \
|
||||
{ \
|
||||
i4_offset_x = (u4_blk_id % 4) << 2; \
|
||||
i4_offset_y = (u4_blk_id / 4) << 2; \
|
||||
}
|
||||
|
||||
#define IS_V_BLK(u4_blk_id) ((u4_blk_id) > 3)
|
||||
|
||||
#define IND2SUB_CHROMA_MB(u4_blk_id, i4_offset_x, i4_offset_y) \
|
||||
{ \
|
||||
i4_offset_x = ((u4_blk_id & 0x1) << 3) + IS_V_BLK(u4_blk_id); \
|
||||
i4_offset_y = (u4_blk_id & 0x2) << 1; \
|
||||
}
|
||||
|
||||
/* Typedefs */
|
||||
|
||||
/*****************************************************************************/
|
||||
/* Function Declarations */
|
||||
/*****************************************************************************/
|
||||
|
||||
extern FT_CORE_CODING isvce_code_luma_intra_macroblock_16x16;
|
||||
|
||||
extern FT_CORE_CODING isvce_code_luma_intra_macroblock_4x4;
|
||||
|
||||
extern FT_CORE_CODING isvce_code_luma_intra_macroblock_4x4_rdopt_on;
|
||||
|
||||
extern FT_CORE_CODING isvce_code_chroma_intra_macroblock_8x8;
|
||||
|
||||
extern FT_CORE_CODING isvce_code_luma_inter_macroblock_16x16;
|
||||
|
||||
extern FT_CORE_CODING isvce_code_chroma_inter_macroblock_8x8;
|
||||
|
||||
#endif
|
||||
1267
encoder/svc/isvce_deblk.c
Normal file
1267
encoder/svc/isvce_deblk.c
Normal file
File diff suppressed because it is too large
Load diff
53
encoder/svc/isvce_deblk.h
Normal file
53
encoder/svc/isvce_deblk.h
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
|
||||
/**
|
||||
******************************************************************************
|
||||
* @file
|
||||
* isvce_deblk.h
|
||||
*
|
||||
* @brief
|
||||
* This file contains extern declarations of deblocking routines
|
||||
*
|
||||
* @author
|
||||
* ittiam
|
||||
*
|
||||
* @remarks
|
||||
* none
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef _ISVCE_DEBLK_H_
|
||||
#define _ISVCE_DEBLK_H_
|
||||
|
||||
#include "ih264_typedefs.h"
|
||||
#include "isvce_structs.h"
|
||||
|
||||
#define CSBP_LEFT_BLOCK_MASK 0x1111
|
||||
#define CSBP_RIGHT_BLOCK_MASK 0x8888
|
||||
|
||||
#define NUM_EDGES_IN_MB 4
|
||||
|
||||
extern void isvce_compute_bs(isvce_process_ctxt_t *ps_proc, UWORD8 u1_inter_layer_deblk_flag);
|
||||
|
||||
extern void isvce_deblock_mb(isvce_process_ctxt_t *ps_proc, isvce_deblk_ctxt_t *ps_deblk,
|
||||
UWORD8 u1_inter_layer_deblk_flag);
|
||||
|
||||
#endif
|
||||
345
encoder/svc/isvce_defs.h
Normal file
345
encoder/svc/isvce_defs.h
Normal file
|
|
@ -0,0 +1,345 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
/**
|
||||
*******************************************************************************
|
||||
* @file
|
||||
* isvce_defs.h
|
||||
*
|
||||
* @brief
|
||||
* Definitions used in the encoder
|
||||
*
|
||||
* @author
|
||||
* ittiam
|
||||
*
|
||||
* @remarks
|
||||
* None
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef _ISVCE_DEFS_H_
|
||||
#define _ISVCE_DEFS_H_
|
||||
|
||||
#include "ih264e_defs.h"
|
||||
|
||||
#define SVC_MAX_NUM_BFRAMES 0
|
||||
|
||||
#define DEFAULT_INIT_QP 1
|
||||
|
||||
#define SVC_MAX_NUM_INP_FRAMES ((SVC_MAX_NUM_BFRAMES) + 2)
|
||||
|
||||
#define LOG2_MAX_FRAME_NUM_MINUS4 12
|
||||
|
||||
#define ENC_MAX_PU_IN_MB ((MB_SIZE / ENC_MIN_PU_SIZE) * (MB_SIZE / ENC_MIN_PU_SIZE))
|
||||
|
||||
#define MAX_REF_FRAMES_PER_PRED_DIR 1
|
||||
|
||||
#define SVC_MAX_SLICE_HDR_CNT 1
|
||||
|
||||
#define MAX_LAYER_REFERENCE_PICS 1
|
||||
|
||||
#define ENABLE_RESIDUAL_PREDICTION 1
|
||||
|
||||
#define ENABLE_ILP_MV 1
|
||||
|
||||
#define USE_ILP_MV_IN_ME (1 && (ENABLE_ILP_MV))
|
||||
|
||||
#define USE_ILP_MV_AS_MVP (1 && (ENABLE_ILP_MV))
|
||||
|
||||
#define MAX_MVP_IDX (USE_ILP_MV_AS_MVP ? 1 : 0)
|
||||
|
||||
#define ENABLE_IBL_MODE 1
|
||||
|
||||
#define ENABLE_INTRA_BASE_DEBLOCK (0 && (ENABLE_IBL_MODE))
|
||||
|
||||
#define ENABLE_MODE_STAT_VISUALISER 0
|
||||
|
||||
#define FORCE_FAST_INTRA4X4 0
|
||||
|
||||
#define FORCE_DISTORTION_BASED_INTRA_4X4_GATING 1
|
||||
|
||||
#define ENABLE_INTRA16X16_BASED_INTRA4X4_GATING 0
|
||||
|
||||
#define ENABLE_ILP_BASED_INTRA4X4_GATING 0
|
||||
|
||||
#define DISABLE_POST_ENC_SKIP 1
|
||||
|
||||
#define ENABLE_RE_ENC_AS_SKIP 1
|
||||
|
||||
#define MAX_ILP_MV_IN_NBR_RGN 4
|
||||
|
||||
/* L, T, TL, TR, Zero, Skip, 'Temporal Skip', ILP */
|
||||
#define MAX_FPEL_SEARCH_CANDIDATES (7 + MAX_PU_IN_MB + MAX_ILP_MV_IN_NBR_RGN)
|
||||
|
||||
#define NUM_SVCE_RC_MEMTABS 45
|
||||
|
||||
#define SVCE_MAX_INP_DIM 1920
|
||||
|
||||
#define SVCE_MAX_INP_FRAME_SIZE (1920 * 1088)
|
||||
|
||||
/**
|
||||
***************************************************************************
|
||||
* Enum to hold various mem records being request
|
||||
****************************************************************************
|
||||
*/
|
||||
typedef enum ISVCE_MEMREC_TYPES_T
|
||||
{
|
||||
/**
|
||||
* Codec Object at API level
|
||||
*/
|
||||
ISVCE_MEM_REC_IV_OBJ,
|
||||
|
||||
/**
|
||||
* Codec context
|
||||
*/
|
||||
ISVCE_MEM_REC_CODEC,
|
||||
|
||||
/**
|
||||
* Cabac context
|
||||
*/
|
||||
ISVCE_MEM_REC_CABAC,
|
||||
|
||||
/**
|
||||
* Cabac context_mb_info
|
||||
*/
|
||||
ISVCE_MEM_REC_CABAC_MB_INFO,
|
||||
|
||||
/**
|
||||
* entropy context
|
||||
*/
|
||||
ISVCE_MEM_REC_ENTROPY,
|
||||
|
||||
/**
|
||||
* Buffer to hold coeff data
|
||||
*/
|
||||
ISVCE_MEM_REC_MB_COEFF_DATA,
|
||||
|
||||
/**
|
||||
* Buffer to hold coeff data
|
||||
*/
|
||||
ISVCE_MEM_REC_MB_HEADER_DATA,
|
||||
|
||||
/**
|
||||
* Motion vector bank
|
||||
*/
|
||||
ISVCE_MEM_REC_MVBANK,
|
||||
|
||||
/**
|
||||
* Motion vector bits
|
||||
*/
|
||||
ISVCE_MEM_REC_MVBITS,
|
||||
|
||||
/**
|
||||
* Holds mem records passed to the codec.
|
||||
*/
|
||||
ISVCE_MEM_REC_BACKUP,
|
||||
|
||||
/**
|
||||
* Holds SPS
|
||||
*/
|
||||
ISVCE_MEM_REC_SPS,
|
||||
|
||||
/**
|
||||
* Holds PPS
|
||||
*/
|
||||
ISVCE_MEM_REC_PPS,
|
||||
|
||||
/**
|
||||
* Holds SVC NALU Extension data
|
||||
*/
|
||||
ISVCE_MEM_REC_SVC_NALU_EXT,
|
||||
|
||||
/**
|
||||
* Holds subset SPS data
|
||||
*/
|
||||
ISVCE_MEM_REC_SUBSET_SPS,
|
||||
|
||||
/**
|
||||
* Holds Slice Headers
|
||||
*/
|
||||
ISVCE_MEM_REC_SLICE_HDR,
|
||||
|
||||
/**
|
||||
* Holds SVC Slice Headers
|
||||
*/
|
||||
ISVCE_MEM_REC_SVC_SLICE_HDR,
|
||||
|
||||
/**
|
||||
* Contains map indicating slice index per MB basis
|
||||
*/
|
||||
ISVCE_MEM_REC_SLICE_MAP,
|
||||
|
||||
/**
|
||||
* Holds thread handles
|
||||
*/
|
||||
ISVCE_MEM_REC_THREAD_HANDLE,
|
||||
|
||||
/**
|
||||
* Holds control call mutex
|
||||
*/
|
||||
ISVCE_MEM_REC_CTL_MUTEX,
|
||||
|
||||
/**
|
||||
* Holds entropy call mutex
|
||||
*/
|
||||
ISVCE_MEM_REC_ENTROPY_MUTEX,
|
||||
|
||||
/**
|
||||
* Holds memory for Process JOB Queue
|
||||
*/
|
||||
ISVCE_MEM_REC_PROC_JOBQ,
|
||||
|
||||
/**
|
||||
* Holds memory for Entropy JOB Queue
|
||||
*/
|
||||
ISVCE_MEM_REC_ENTROPY_JOBQ,
|
||||
|
||||
/**
|
||||
* Contains status map indicating processing status per MB basis
|
||||
*/
|
||||
ISVCE_MEM_REC_PROC_MAP,
|
||||
|
||||
/**
|
||||
* Contains status map indicating deblocking status per MB basis
|
||||
*/
|
||||
ISVCE_MEM_REC_DBLK_MAP,
|
||||
|
||||
/*
|
||||
* Contains AIR map and mask
|
||||
*/
|
||||
ISVCE_MEM_REC_AIR_MAP,
|
||||
|
||||
/**
|
||||
* Contains status map indicating ME status per MB basis
|
||||
*/
|
||||
ISVCE_MEM_REC_ME_MAP,
|
||||
|
||||
/**
|
||||
* Holds dpb manager context
|
||||
*/
|
||||
ISVCE_MEM_REC_DPB_MGR,
|
||||
|
||||
/**
|
||||
* Holds intermediate buffers needed during processing stage
|
||||
* Memory for process contexts is allocated in this memtab
|
||||
*/
|
||||
ISVCE_MEM_REC_PROC_SCRATCH,
|
||||
|
||||
/**
|
||||
* Holds buffers for vert_bs, horz_bs and QP (all frame level)
|
||||
*/
|
||||
ISVCE_MEM_REC_QUANT_PARAM,
|
||||
|
||||
/**
|
||||
* Holds top row syntax information
|
||||
*/
|
||||
ISVCE_MEM_REC_TOP_ROW_SYN_INFO,
|
||||
|
||||
/**
|
||||
* Holds buffers for vert_bs, horz_bs and QP (all frame level)
|
||||
*/
|
||||
ISVCE_MEM_REC_BS_QP,
|
||||
|
||||
/**
|
||||
* Holds input buffer manager context
|
||||
*/
|
||||
ISVCE_MEM_REC_INP_PIC,
|
||||
|
||||
/**
|
||||
* Holds output buffer manager context
|
||||
*/
|
||||
ISVCE_MEM_REC_OUT,
|
||||
|
||||
/**
|
||||
* Holds picture buffer manager context and array of pic_buf_ts
|
||||
* Also holds reference picture buffers in non-shared mode
|
||||
*/
|
||||
ISVCE_MEM_REC_REF_PIC,
|
||||
|
||||
/*
|
||||
* Mem record for color space conversion
|
||||
*/
|
||||
ISVCE_MEM_REC_CSC,
|
||||
|
||||
/**
|
||||
* NMB info struct
|
||||
*/
|
||||
ISVCE_MEM_REC_MB_INFO_NMB,
|
||||
|
||||
/**
|
||||
* SVC Spatial layer Inputs
|
||||
*/
|
||||
ISVCE_MEM_SVC_SPAT_INP,
|
||||
|
||||
/**
|
||||
* Downscaler memory records
|
||||
*/
|
||||
ISVCE_MEM_DOWN_SCALER,
|
||||
|
||||
/**
|
||||
* SVC ILP data
|
||||
*/
|
||||
ISVCE_MEM_SVC_ILP_DATA,
|
||||
|
||||
/**
|
||||
* SVC ILP MV Context
|
||||
*/
|
||||
ISVCE_MEM_SVC_ILP_MV_CTXT,
|
||||
|
||||
/**
|
||||
* SVC ResPred Context
|
||||
*/
|
||||
ISVCE_MEM_SVC_RES_PRED_CTXT,
|
||||
|
||||
/**
|
||||
* SVC inter-layer intra pred context
|
||||
*/
|
||||
ISVCE_MEM_SVC_INTRA_PRED_CTXT,
|
||||
|
||||
/**
|
||||
* RC Utils Context
|
||||
*/
|
||||
ISVCE_MEM_SVC_RC_UTILS_CTXT,
|
||||
|
||||
/**
|
||||
* SubPic RC Context
|
||||
*/
|
||||
ISVCE_MEM_SVC_SUB_PIC_RC_CTXT,
|
||||
|
||||
#if ENABLE_MODE_STAT_VISUALISER
|
||||
ISVCE_MEM_MODE_STAT_VISUALISER_BUF,
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Rate control of memory records.
|
||||
*/
|
||||
ISVCE_MEM_REC_RC,
|
||||
|
||||
/**
|
||||
* Place holder to compute number of memory records.
|
||||
*/
|
||||
ISVCE_MEM_REC_CNT = ISVCE_MEM_REC_RC + NUM_SVCE_RC_MEMTABS,
|
||||
|
||||
/*
|
||||
* Do not add anything below
|
||||
*/
|
||||
} ISVCE_MEMREC_TYPES_T;
|
||||
|
||||
#endif
|
||||
537
encoder/svc/isvce_downscaler.c
Normal file
537
encoder/svc/isvce_downscaler.c
Normal file
|
|
@ -0,0 +1,537 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
* @file
|
||||
* isvce_downscaler.c
|
||||
*
|
||||
* @brief
|
||||
* Contains downscaler functions required by the SVC encoder
|
||||
*
|
||||
* @author
|
||||
* ittiam
|
||||
*
|
||||
* @par List of Functions:
|
||||
* - isvce_get_downscaler_data_size()
|
||||
* - isvce_get_downscaler_padding_dims()
|
||||
* - isvce_get_downscaler_normalized_filtered_pixel()
|
||||
* - isvce_horizontal_downscale_and_transpose()
|
||||
* - isvce_process_downscaler()
|
||||
* - isvce_initialize_downscaler()
|
||||
*
|
||||
* @remarks
|
||||
* None
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
/*****************************************************************************/
|
||||
/* File Includes */
|
||||
/*****************************************************************************/
|
||||
|
||||
/* system include files */
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "ih264_typedefs.h"
|
||||
#include "ih264_macros.h"
|
||||
#include "isvc_macros.h"
|
||||
#include "ih264_platform_macros.h"
|
||||
#include "iv2.h"
|
||||
#include "isvc_defs.h"
|
||||
#include "isvce_defs.h"
|
||||
#include "isvc_structs.h"
|
||||
#include "isvc_structs.h"
|
||||
#include "isvce_downscaler.h"
|
||||
#include "isvce_downscaler_private_defs.h"
|
||||
|
||||
/**
|
||||
******************************************************************************
|
||||
* @brief lanczos filter coefficients for 2x downscaling
|
||||
* @remarks Though the length of the filter is 8, the
|
||||
* same coefficients
|
||||
* are replicated so that 2 rows can be processed at one
|
||||
* go in SIMD
|
||||
******************************************************************************
|
||||
*/
|
||||
static WORD8 gai1_lanczos_coefficients_2x[NUM_SCALER_FILTER_PHASES][NUM_SCALER_FILTER_TAPS * 2] = {
|
||||
{-7, 0, 39, 64, 39, 0, -7, 0, -7, 0, 39, 64, 39, 0, -7, 0},
|
||||
{-6, 0, 33, 62, 41, 4, -6, 0, -6, 0, 33, 62, 41, 4, -6, 0},
|
||||
{-5, -1, 29, 57, 45, 9, -5, -1, -5, -1, 29, 57, 45, 9, -5, -1},
|
||||
{-4, -2, 23, 55, 48, 14, -4, -2, -4, -2, 23, 55, 48, 14, -4, -2},
|
||||
{-3, -3, 18, 52, 52, 18, -3, -3, -3, -3, 18, 52, 52, 18, -3, -3},
|
||||
{-2, -4, 13, 49, 54, 24, -2, -4, -2, -4, 13, 49, 54, 24, -2, -4},
|
||||
{-1, -5, 9, 44, 58, 29, -1, -5, -1, -5, 9, 44, 58, 29, -1, -5},
|
||||
{0, -6, 3, 42, 61, 34, 0, -6, 0, -6, 3, 42, 61, 34, 0, -6}};
|
||||
|
||||
/**
|
||||
******************************************************************************
|
||||
* @brief lanczos filter coefficients for 1.5x downscaling
|
||||
* @remarks Though the length of the filter is 8, the same coefficients
|
||||
* are replicated so that 2 rows can be processed at one go in SIMD.
|
||||
******************************************************************************
|
||||
*/
|
||||
static WORD8 gai1_lanczos_coefficients_3by2x[NUM_SCALER_FILTER_PHASES][NUM_SCALER_FILTER_TAPS * 2] =
|
||||
{{0, -11, 32, 86, 32, -11, 0, 0, 0, -11, 32, 86, 32, -11, 0, 0},
|
||||
{0, -10, 26, 79, 39, -5, 0, 0, 0, -10, 26, 79, 39, -5, 0, 0},
|
||||
{0, -8, 21, 72, 46, 0, -2, 0, 0, -8, 21, 72, 46, 0, -2, 0},
|
||||
{0, -6, 15, 66, 52, 3, -3, 0, 0, -6, 15, 66, 52, 3, -3, 0},
|
||||
{0, -6, 10, 60, 60, 10, -6, 0, 0, -6, 10, 60, 60, 10, -6, 0},
|
||||
{0, -3, 3, 52, 66, 15, -6, 0, 0, -3, 3, 52, 66, 15, -6, 0},
|
||||
{0, -2, 0, 46, 72, 21, -8, 0, 0, -2, 0, 46, 72, 21, -8, 0},
|
||||
{0, 0, -5, 39, 79, 26, -10, 0, 0, 0, -5, 39, 79, 26, -10, 0}};
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* gets the memory size required for downscaler
|
||||
*
|
||||
* @par Description:
|
||||
* returns the memory required by the downscaler context and state structs
|
||||
* for allocation.
|
||||
*
|
||||
* @returns
|
||||
*
|
||||
* @remarks
|
||||
*
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
UWORD32 isvce_get_downscaler_data_size(UWORD8 u1_num_spatial_layers, DOUBLE d_scaling_factor,
|
||||
UWORD32 u4_width, UWORD32 u4_height)
|
||||
{
|
||||
UWORD32 u4_size = 0;
|
||||
|
||||
if(u1_num_spatial_layers > 1)
|
||||
{
|
||||
u4_size += sizeof(downscaler_state_t);
|
||||
|
||||
u4_size +=
|
||||
(u4_height + NUM_SCALER_FILTER_TAPS * 2) * ((UWORD32) (u4_width / d_scaling_factor));
|
||||
}
|
||||
|
||||
return u4_size;
|
||||
}
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* gets the padding size required for filtering
|
||||
*
|
||||
* @par Description:
|
||||
* gets the padding size required for filtering
|
||||
*
|
||||
* @returns
|
||||
*
|
||||
* @remarks
|
||||
*
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
void isvce_get_downscaler_padding_dims(padding_dims_t *ps_pad_dims)
|
||||
{
|
||||
ps_pad_dims->u1_left_pad_size = ALIGN8(NUM_SCALER_FILTER_TAPS / 2);
|
||||
ps_pad_dims->u1_right_pad_size = ALIGN8(NUM_SCALER_FILTER_TAPS / 2);
|
||||
ps_pad_dims->u1_top_pad_size = NUM_SCALER_FILTER_TAPS / 2;
|
||||
ps_pad_dims->u1_bottom_pad_size = NUM_SCALER_FILTER_TAPS / 2;
|
||||
}
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* processes downscaler
|
||||
*
|
||||
* @par Description:
|
||||
* calls the function for padding and scaling
|
||||
*
|
||||
* @param[in] ps_scaler
|
||||
* pointer to downdownscaler context
|
||||
*
|
||||
* @param[in] ps_src_buf_props
|
||||
* pointer to source buffer props struct
|
||||
*
|
||||
* @param[in] u4_blk_wd
|
||||
* width of the block to be processed
|
||||
*
|
||||
* @param[in] u4_blk_ht
|
||||
* height of the block to be processed
|
||||
*
|
||||
* @returns
|
||||
*
|
||||
* @remarks
|
||||
*
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
void isvce_process_downscaler(downscaler_ctxt_t *ps_scaler, yuv_buf_props_t *ps_src_buf_props,
|
||||
yuv_buf_props_t *ps_dst_buf_props, UWORD32 u4_blk_wd,
|
||||
UWORD32 u4_blk_ht)
|
||||
{
|
||||
buffer_container_t s_src_buf;
|
||||
buffer_container_t s_dst_buf;
|
||||
|
||||
UWORD32 u4_scaled_block_size_x, u4_scaled_block_size_y;
|
||||
|
||||
downscaler_state_t *ps_scaler_state = (downscaler_state_t *) ps_scaler->pv_scaler_state;
|
||||
|
||||
ASSERT(ps_src_buf_props->e_color_format == IV_YUV_420SP_UV);
|
||||
|
||||
u4_scaled_block_size_x = (UWORD32) (u4_blk_wd / ps_scaler->d_scaling_factor);
|
||||
u4_scaled_block_size_y = (UWORD32) (u4_blk_ht / ps_scaler->d_scaling_factor);
|
||||
|
||||
/* luma */
|
||||
s_src_buf = ps_src_buf_props->as_component_bufs[Y];
|
||||
s_src_buf.pv_data = ((UWORD8 *) s_src_buf.pv_data) - (NUM_SCALER_FILTER_TAPS / 2) -
|
||||
(NUM_SCALER_FILTER_TAPS / 2) * s_src_buf.i4_data_stride;
|
||||
|
||||
s_dst_buf.pv_data = ps_scaler_state->pv_scratch_buf;
|
||||
s_dst_buf.i4_data_stride = u4_blk_ht + NUM_SCALER_FILTER_TAPS;
|
||||
|
||||
ps_scaler_state->pf_downscaler(ps_scaler, &s_src_buf, &s_dst_buf, ps_scaler_state->pai1_filters,
|
||||
u4_scaled_block_size_x, u4_blk_ht + NUM_SCALER_FILTER_TAPS, 0);
|
||||
|
||||
s_src_buf = s_dst_buf;
|
||||
s_dst_buf = ps_dst_buf_props->as_component_bufs[Y];
|
||||
|
||||
ps_scaler_state->pf_downscaler(ps_scaler, &s_src_buf, &s_dst_buf, ps_scaler_state->pai1_filters,
|
||||
u4_scaled_block_size_y, u4_scaled_block_size_x, 0);
|
||||
|
||||
/* chroma */
|
||||
u4_blk_ht /= 2;
|
||||
u4_scaled_block_size_y /= 2;
|
||||
|
||||
s_src_buf = ps_src_buf_props->as_component_bufs[U];
|
||||
s_src_buf.pv_data = ((UWORD8 *) s_src_buf.pv_data) - NUM_SCALER_FILTER_TAPS -
|
||||
(NUM_SCALER_FILTER_TAPS / 2) * s_src_buf.i4_data_stride;
|
||||
|
||||
s_dst_buf.pv_data = ps_scaler_state->pv_scratch_buf;
|
||||
s_dst_buf.i4_data_stride = u4_blk_ht + NUM_SCALER_FILTER_TAPS;
|
||||
|
||||
ps_scaler_state->pf_downscaler(ps_scaler, &s_src_buf, &s_dst_buf, ps_scaler_state->pai1_filters,
|
||||
u4_scaled_block_size_x, u4_blk_ht + NUM_SCALER_FILTER_TAPS, 1);
|
||||
|
||||
s_src_buf = s_dst_buf;
|
||||
s_dst_buf = ps_dst_buf_props->as_component_bufs[U];
|
||||
|
||||
ps_scaler_state->pf_downscaler(ps_scaler, &s_src_buf, &s_dst_buf, ps_scaler_state->pai1_filters,
|
||||
u4_scaled_block_size_y, u4_scaled_block_size_x, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* normalized dot product computer for downscaler
|
||||
*
|
||||
* @par Description:
|
||||
* Given the downscaler filter coefficients, source buffer, the function
|
||||
* calculates the dot product between them, adds an offset and normalizes it
|
||||
*
|
||||
* @param[in] ps_scaler
|
||||
* pointer to src buf
|
||||
*
|
||||
* @param[in] pi1_filter
|
||||
* pointer to filter coefficients
|
||||
*
|
||||
* @returns
|
||||
*
|
||||
* @remarks
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
static UWORD8 isvce_get_downscaler_normalized_filtered_pixel(UWORD8 *pu1_src, WORD8 *pi1_filter)
|
||||
{
|
||||
WORD32 i;
|
||||
WORD32 i4_norm_dot_product;
|
||||
UWORD8 u1_out_pixel;
|
||||
WORD32 i4_dot_product_sum = 0;
|
||||
WORD32 i4_rounding_offset = 1 << (FILTER_COEFF_Q - 1);
|
||||
WORD32 i4_normalizing_factor = 1 << FILTER_COEFF_Q;
|
||||
|
||||
for(i = 0; i < NUM_SCALER_FILTER_TAPS; i++)
|
||||
{
|
||||
i4_dot_product_sum += (pu1_src[i] * pi1_filter[i]);
|
||||
}
|
||||
|
||||
i4_norm_dot_product = ((i4_dot_product_sum + i4_rounding_offset) / i4_normalizing_factor);
|
||||
u1_out_pixel = (UWORD8) CLIP_U8(i4_norm_dot_product);
|
||||
|
||||
return u1_out_pixel;
|
||||
}
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* horizontal scaler function
|
||||
*
|
||||
* @par Description:
|
||||
* Does horizontal scaling for the given block
|
||||
*
|
||||
* @param[in] ps_scaler
|
||||
* pointer to downscaler context
|
||||
*
|
||||
* @param[in] ps_src
|
||||
* pointer to source buffer container
|
||||
*
|
||||
* @param[in] ps_dst
|
||||
* pointer to destination buffer container
|
||||
*
|
||||
* @param[in] pai1_filters
|
||||
* pointer to array of downscaler filters
|
||||
*
|
||||
* @param[in] u4_blk_wd
|
||||
* width of the block after horizontal scaling (output block width)
|
||||
*
|
||||
* @param[in] u4_blk_ht
|
||||
* height of the current block (input block height)
|
||||
*
|
||||
* @param[in] u1_is_chroma
|
||||
* flag suggesting whether the buffer is luma or chroma
|
||||
*
|
||||
*
|
||||
* @returns
|
||||
*
|
||||
* @remarks
|
||||
* The same function is used for vertical scaling too as
|
||||
* the horizontally scaled input in stored in transpose fashion.
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
static void isvce_horizontal_downscale_and_transpose(
|
||||
downscaler_ctxt_t *ps_scaler, buffer_container_t *ps_src, buffer_container_t *ps_dst,
|
||||
FILTER_COEFF_ARRAY pai1_filters, UWORD32 u4_blk_wd, UWORD32 u4_blk_ht, UWORD8 u1_is_chroma)
|
||||
{
|
||||
WORD32 i, j, k;
|
||||
UWORD8 u1_phase;
|
||||
UWORD8 u1_filtered_out_pixel;
|
||||
UWORD8 *pu1_src_j, *pu1_dst_j;
|
||||
UWORD8 u1_filtered_out_u_pixel, u1_filtered_out_v_pixel;
|
||||
UWORD8 *pu1_in_pixel;
|
||||
UWORD8 *pu1_out_pixel;
|
||||
WORD8 *pi1_filter_grid;
|
||||
UWORD16 u2_full_pixel_inc;
|
||||
UWORD8 au1_temp_u_buff[NUM_SCALER_FILTER_TAPS];
|
||||
UWORD8 au1_temp_v_buff[NUM_SCALER_FILTER_TAPS];
|
||||
|
||||
downscaler_state_t *ps_scaler_state = (downscaler_state_t *) ps_scaler->pv_scaler_state;
|
||||
|
||||
UWORD32 u4_center_pixel_pos = ps_scaler_state->i4_init_offset;
|
||||
UWORD32 u4_src_horz_increments = ps_scaler_state->u4_horz_increment;
|
||||
UWORD8 *pu1_src = ps_src->pv_data;
|
||||
UWORD32 u4_in_stride = ps_src->i4_data_stride;
|
||||
UWORD8 *pu1_dst = ps_dst->pv_data;
|
||||
UWORD32 u4_out_stride = ps_dst->i4_data_stride;
|
||||
UWORD32 u4_center_pixel_pos_src = u4_center_pixel_pos;
|
||||
|
||||
/* Offset the input so that the input pixel to be processed
|
||||
co-incides with the centre of filter (4th coefficient)*/
|
||||
pu1_src += (1 + u1_is_chroma);
|
||||
|
||||
ASSERT((1 << DOWNSCALER_Q) == ps_scaler_state->u4_vert_increment);
|
||||
|
||||
if(!u1_is_chroma)
|
||||
{
|
||||
for(j = 0; j < (WORD32) u4_blk_ht; j++)
|
||||
{
|
||||
pu1_src_j = pu1_src + (j * u4_in_stride);
|
||||
pu1_dst_j = pu1_dst + j;
|
||||
|
||||
u4_center_pixel_pos = u4_center_pixel_pos_src;
|
||||
|
||||
for(i = 0; i < (WORD32) u4_blk_wd; i++)
|
||||
{
|
||||
u1_phase = get_filter_phase(u4_center_pixel_pos);
|
||||
pi1_filter_grid = pai1_filters[u1_phase];
|
||||
|
||||
/* Doing the Calculation for current Loop Count */
|
||||
u2_full_pixel_inc = u4_center_pixel_pos >> DOWNSCALER_Q;
|
||||
pu1_in_pixel = pu1_src_j + (u2_full_pixel_inc << u1_is_chroma);
|
||||
pu1_out_pixel = pu1_dst_j + ((i << u1_is_chroma) * u4_out_stride);
|
||||
|
||||
u1_filtered_out_pixel =
|
||||
isvce_get_downscaler_normalized_filtered_pixel(pu1_in_pixel, pi1_filter_grid);
|
||||
*pu1_out_pixel = u1_filtered_out_pixel;
|
||||
|
||||
/* Update the context for next Loop Count */
|
||||
u4_center_pixel_pos += u4_src_horz_increments;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for(j = 0; j < (WORD32) u4_blk_ht; j++)
|
||||
{
|
||||
pu1_src_j = pu1_src + (j * u4_in_stride);
|
||||
pu1_dst_j = pu1_dst + j;
|
||||
|
||||
u4_center_pixel_pos = u4_center_pixel_pos_src;
|
||||
|
||||
for(i = 0; i < (WORD32) u4_blk_wd; i++)
|
||||
{
|
||||
u1_phase = get_filter_phase(u4_center_pixel_pos);
|
||||
pi1_filter_grid = pai1_filters[u1_phase];
|
||||
|
||||
/*Doing the Calculation for current Loop Count */
|
||||
u2_full_pixel_inc = u4_center_pixel_pos >> DOWNSCALER_Q;
|
||||
pu1_in_pixel = pu1_src_j + (u2_full_pixel_inc << u1_is_chroma);
|
||||
pu1_out_pixel = pu1_dst_j + ((i << u1_is_chroma) * u4_out_stride);
|
||||
|
||||
for(k = 0; k < NUM_SCALER_FILTER_TAPS; k++)
|
||||
{
|
||||
au1_temp_u_buff[k] = *(pu1_in_pixel + (2 * k));
|
||||
au1_temp_v_buff[k] = *(pu1_in_pixel + ((2 * k) + 1));
|
||||
}
|
||||
|
||||
u1_filtered_out_u_pixel = isvce_get_downscaler_normalized_filtered_pixel(
|
||||
au1_temp_u_buff, pi1_filter_grid);
|
||||
u1_filtered_out_v_pixel = isvce_get_downscaler_normalized_filtered_pixel(
|
||||
au1_temp_v_buff, pi1_filter_grid);
|
||||
*pu1_out_pixel = u1_filtered_out_u_pixel;
|
||||
*(pu1_out_pixel + u4_out_stride) = u1_filtered_out_v_pixel;
|
||||
|
||||
/* Update the context for next Loop Count */
|
||||
u4_center_pixel_pos += u4_src_horz_increments;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void isvce_downscaler_function_selector(downscaler_state_t *ps_scaler_state, IV_ARCH_T e_arch)
|
||||
{
|
||||
switch(e_arch)
|
||||
{
|
||||
#if defined(X86)
|
||||
case ARCH_X86_SSE42:
|
||||
{
|
||||
ps_scaler_state->pf_downscaler = isvce_horizontal_downscale_and_transpose_sse42;
|
||||
|
||||
break;
|
||||
}
|
||||
#elif defined(ARMV8)
|
||||
case ARCH_ARM_A53:
|
||||
case ARCH_ARM_A57:
|
||||
case ARCH_ARM_V8_NEON:
|
||||
{
|
||||
ps_scaler_state->pf_downscaler = isvce_horizontal_downscale_and_transpose_neon;
|
||||
|
||||
break;
|
||||
}
|
||||
#elif !defined(DISABLE_NEON)
|
||||
case ARCH_ARM_A9Q:
|
||||
case ARCH_ARM_A9A:
|
||||
case ARCH_ARM_A9:
|
||||
case ARCH_ARM_A7:
|
||||
case ARCH_ARM_A5:
|
||||
case ARCH_ARM_A15:
|
||||
{
|
||||
ps_scaler_state->pf_downscaler = isvce_horizontal_downscale_and_transpose_neon;
|
||||
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
default:
|
||||
{
|
||||
ps_scaler_state->pf_downscaler = isvce_horizontal_downscale_and_transpose;
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* initializes the downscaler context
|
||||
*
|
||||
* @par Description:
|
||||
* initializes the downscaler context for the given scaling factor
|
||||
* with padding size, filter size, etc.
|
||||
*
|
||||
* @param[in] ps_scaler
|
||||
* pointer downscaler context
|
||||
*
|
||||
* @param[in] ps_mem_rec
|
||||
* pointer to memory allocated to downscaler process
|
||||
*
|
||||
* @param[in] d_scaling_factor
|
||||
* scaling reatio of width/ height between two consecutive SVC layers
|
||||
*
|
||||
* @param[in] u1_num_spatial_layers
|
||||
* scaling reatio of width/ height between two consecutive SVC layers
|
||||
*
|
||||
* @param[in] u4_wd
|
||||
* width of the input
|
||||
*
|
||||
* @param[in] u4_ht
|
||||
* height of the input
|
||||
*
|
||||
* @param[in] e_arch
|
||||
* architecure type
|
||||
*
|
||||
* @returns
|
||||
*
|
||||
* @remarks
|
||||
* when ARM intrinsics are added, update should be done here
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
void isvce_initialize_downscaler(downscaler_ctxt_t *ps_scaler, iv_mem_rec_t *ps_mem_rec,
|
||||
DOUBLE d_scaling_factor, UWORD8 u1_num_spatial_layers,
|
||||
UWORD32 u4_in_width, UWORD32 u4_in_height, IV_ARCH_T e_arch)
|
||||
{
|
||||
if(u1_num_spatial_layers > 1)
|
||||
{
|
||||
downscaler_state_t *ps_scaler_state;
|
||||
|
||||
UWORD8 *pu1_buf = (UWORD8 *) ps_mem_rec->pv_base;
|
||||
|
||||
ps_scaler_state = (downscaler_state_t *) pu1_buf;
|
||||
pu1_buf += sizeof(ps_scaler_state[0]);
|
||||
|
||||
ps_scaler_state->pv_scratch_buf = pu1_buf;
|
||||
ps_scaler_state->u4_in_wd = u4_in_width;
|
||||
ps_scaler_state->u4_in_ht = u4_in_height;
|
||||
|
||||
ps_scaler->pv_scaler_state = ps_scaler_state;
|
||||
ps_scaler->d_scaling_factor = d_scaling_factor;
|
||||
ps_scaler->u1_num_spatial_layers = u1_num_spatial_layers;
|
||||
|
||||
isvce_downscaler_function_selector(ps_scaler_state, e_arch);
|
||||
|
||||
ps_scaler_state->u4_horz_increment = (UWORD32) (d_scaling_factor * (1 << DOWNSCALER_Q));
|
||||
|
||||
ps_scaler_state->u4_vert_increment = (1 << DOWNSCALER_Q);
|
||||
ps_scaler_state->i4_init_offset = 0;
|
||||
ps_scaler_state->pai1_filters = (d_scaling_factor == 2.0) ? gai1_lanczos_coefficients_2x
|
||||
: gai1_lanczos_coefficients_3by2x;
|
||||
}
|
||||
}
|
||||
205
encoder/svc/isvce_downscaler.h
Normal file
205
encoder/svc/isvce_downscaler.h
Normal file
|
|
@ -0,0 +1,205 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
* @file
|
||||
* isvce_downscaler.h
|
||||
*
|
||||
* @brief
|
||||
* Contains downscaler functions required by the SVC encoder
|
||||
*
|
||||
* @author
|
||||
* ittiam
|
||||
*
|
||||
* @par List of Functions:
|
||||
* - isvce_get_downscaler_data_size()
|
||||
* - isvce_get_downscaler_padding_dims()
|
||||
* - isvce_isvce_process_ctxt_t_downscaler()
|
||||
* - isvce_get_downscaler_normalized_filtered_pixel()
|
||||
* - isvce_horizontal_downscale_and_transpose()
|
||||
* - isvce_process_downscaler()
|
||||
* - isvce_initialize_downscaler()
|
||||
*
|
||||
* @remarks
|
||||
* None
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef _ISVCE_DOWNSCALER_H_
|
||||
#define _ISVCE_DOWNSCALER_H_
|
||||
|
||||
#include "ih264_typedefs.h"
|
||||
#include "iv2.h"
|
||||
#include "isvc_defs.h"
|
||||
#include "isvc_structs.h"
|
||||
#include "isvce_defs.h"
|
||||
|
||||
typedef struct
|
||||
{
|
||||
/**
|
||||
* pointer to the state of downscaler
|
||||
*/
|
||||
void *pv_scaler_state;
|
||||
|
||||
/**
|
||||
* scaling factor between the dimensions of two consecutive SVC layers
|
||||
*/
|
||||
DOUBLE d_scaling_factor;
|
||||
|
||||
/**
|
||||
* Num spatial layers
|
||||
*/
|
||||
UWORD8 u1_num_spatial_layers;
|
||||
|
||||
} downscaler_ctxt_t;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
UWORD8 u1_left_pad_size;
|
||||
|
||||
UWORD8 u1_right_pad_size;
|
||||
|
||||
UWORD8 u1_top_pad_size;
|
||||
|
||||
UWORD8 u1_bottom_pad_size;
|
||||
|
||||
} padding_dims_t;
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* initializes the downscaler context
|
||||
*
|
||||
* @par Description:
|
||||
* initializes the downscaler context for the given scaling factor
|
||||
* with padding size, filter size, etc.
|
||||
*
|
||||
* @param[in] ps_scaler
|
||||
* pointer downscaler context
|
||||
*
|
||||
* @param[in] ps_mem_rec
|
||||
* pointer to memory allocated to downscaler process
|
||||
*
|
||||
* @param[in] d_scaling_factor
|
||||
* scaling reatio of width/ height between two consecutive SVC layers
|
||||
*
|
||||
* @param[in] u1_num_spatial_layers
|
||||
* scaling reatio of width/ height between two consecutive SVC layers
|
||||
*
|
||||
* @param[in] u4_wd
|
||||
* width of the input
|
||||
*
|
||||
* @param[in] u4_ht
|
||||
* height of the input
|
||||
*
|
||||
* @param[in] e_arch
|
||||
* architecure type
|
||||
*
|
||||
* @returns
|
||||
*
|
||||
* @remarks
|
||||
* when ARM intrinsics are added, update should be done here
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
extern void isvce_initialize_downscaler(downscaler_ctxt_t *ps_scaler, iv_mem_rec_t *ps_mem_rec,
|
||||
DOUBLE d_scaling_factor, UWORD8 u1_num_spatial_layers,
|
||||
UWORD32 u4_in_width, UWORD32 u4_in_height,
|
||||
IV_ARCH_T e_arch);
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* gets the memory size required for downscaler
|
||||
*
|
||||
* @par Description:
|
||||
* returns the memory required by the downscaler context and state structs
|
||||
* for allocation.
|
||||
*
|
||||
* @returns
|
||||
*
|
||||
* @remarks
|
||||
*
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
extern UWORD32 isvce_get_downscaler_data_size(UWORD8 u1_num_spatial_layers, DOUBLE d_scaling_factor,
|
||||
UWORD32 u4_width, UWORD32 u4_height);
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* processes downscaler
|
||||
*
|
||||
* @par Description:
|
||||
* calls the function for padding and scaling
|
||||
*
|
||||
* @param[in] ps_scaler
|
||||
* pointer to downdownscaler context
|
||||
*
|
||||
* @param[in] ps_src_buf_props
|
||||
* pointer to source buffer props struct
|
||||
*
|
||||
* @param[in] u4_blk_wd
|
||||
* width of the block to be processed
|
||||
*
|
||||
* @param[in] u4_blk_ht
|
||||
* height of the block to be processed
|
||||
*
|
||||
* @returns
|
||||
*
|
||||
* @remarks
|
||||
*
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
extern void isvce_process_downscaler(downscaler_ctxt_t *ps_scaler,
|
||||
yuv_buf_props_t *ps_src_buf_props,
|
||||
yuv_buf_props_t *ps_dst_buf_props, UWORD32 u4_blk_wd,
|
||||
UWORD32 u4_blk_ht);
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* gets the padding size required for filtering
|
||||
*
|
||||
* @par Description:
|
||||
* gets the padding size required for filtering
|
||||
*
|
||||
* @returns
|
||||
*
|
||||
* @remarks
|
||||
*
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
extern void isvce_get_downscaler_padding_dims(padding_dims_t *ps_pad_dims);
|
||||
|
||||
#endif
|
||||
124
encoder/svc/isvce_downscaler_private_defs.h
Normal file
124
encoder/svc/isvce_downscaler_private_defs.h
Normal file
|
|
@ -0,0 +1,124 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
|
||||
#ifndef _ISVCE_DOWNSCALER_PRIVATE_DEFS_H_
|
||||
#define _ISVCE_DOWNSCALER_PRIVATE_DEFS_H_
|
||||
#include "ih264_typedefs.h"
|
||||
#include "isvc_macros.h"
|
||||
#include "ih264_debug.h"
|
||||
#include "isvc_structs.h"
|
||||
#include "isvce_downscaler.h"
|
||||
|
||||
/* Macros */
|
||||
#define DOWNSCALER_Q 16
|
||||
|
||||
#define FILTER_COEFF_Q 7
|
||||
|
||||
#define NUM_SCALER_FILTER_TAPS 8
|
||||
|
||||
#define NUM_SCALER_FILTER_PHASES 8
|
||||
|
||||
/* Typedefs */
|
||||
typedef WORD8 (*FILTER_COEFF_ARRAY)[NUM_SCALER_FILTER_TAPS * 2];
|
||||
|
||||
typedef void FT_DOWNSCALER(downscaler_ctxt_t *ps_scaler_state, buffer_container_t *ps_src,
|
||||
buffer_container_t *ps_dst, FILTER_COEFF_ARRAY pai1_filters,
|
||||
UWORD32 u4_blk_wd, UWORD32 u4_blk_ht, UWORD8 u1_is_chroma);
|
||||
|
||||
/* Structs */
|
||||
typedef struct
|
||||
{
|
||||
/**
|
||||
* pointer to scratch buf
|
||||
*/
|
||||
void *pv_scratch_buf;
|
||||
|
||||
/**
|
||||
* initial offset while calculating input pixel location
|
||||
*/
|
||||
WORD32 i4_init_offset;
|
||||
|
||||
/**
|
||||
* increment to the centre pixel in horizontal direction
|
||||
*/
|
||||
UWORD32 u4_horz_increment;
|
||||
|
||||
/**
|
||||
* increment to the centre pixel in vertical direction
|
||||
*/
|
||||
UWORD32 u4_vert_increment;
|
||||
|
||||
/**
|
||||
* pointer to the filter coefficients
|
||||
*/
|
||||
FILTER_COEFF_ARRAY pai1_filters;
|
||||
|
||||
/**
|
||||
* function pointer to the leaf level function for horizontal scaling
|
||||
*/
|
||||
FT_DOWNSCALER *pf_downscaler;
|
||||
|
||||
/**
|
||||
* width of the input (highest SVC layer)
|
||||
*/
|
||||
UWORD32 u4_in_wd;
|
||||
|
||||
/**
|
||||
* height of the input (highest SVC layer)
|
||||
*/
|
||||
UWORD32 u4_in_ht;
|
||||
|
||||
} downscaler_state_t;
|
||||
|
||||
static FORCEINLINE UWORD32 get_filter_phase(UWORD32 u4_center_pixel_pos)
|
||||
{
|
||||
UWORD32 au4_phase_binning_pos[NUM_SCALER_FILTER_PHASES + 1];
|
||||
UWORD32 i;
|
||||
|
||||
ASSERT(NUM_SCALER_FILTER_PHASES == 8);
|
||||
|
||||
for(i = 0; i < NUM_SCALER_FILTER_PHASES + 1; i++)
|
||||
{
|
||||
au4_phase_binning_pos[i] = (i << DOWNSCALER_Q) / NUM_SCALER_FILTER_PHASES;
|
||||
}
|
||||
|
||||
u4_center_pixel_pos = u4_center_pixel_pos % (1 << DOWNSCALER_Q);
|
||||
|
||||
for(i = 0; i < NUM_SCALER_FILTER_PHASES; i++)
|
||||
{
|
||||
if((u4_center_pixel_pos < au4_phase_binning_pos[i + 1]) &&
|
||||
(u4_center_pixel_pos >= au4_phase_binning_pos[i]))
|
||||
{
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
ASSERT(0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* SSE42 Declarations */
|
||||
extern FT_DOWNSCALER isvce_horizontal_downscale_and_transpose_sse42;
|
||||
|
||||
/* NEON Declarations */
|
||||
extern FT_DOWNSCALER isvce_horizontal_downscale_and_transpose_neon;
|
||||
|
||||
#endif
|
||||
790
encoder/svc/isvce_encode.c
Normal file
790
encoder/svc/isvce_encode.c
Normal file
|
|
@ -0,0 +1,790 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
|
||||
/**
|
||||
******************************************************************************
|
||||
* @file
|
||||
* isvce_encode.c
|
||||
*
|
||||
* @brief
|
||||
* This file contains functions for encoding the input yuv frame in synchronous
|
||||
* api mode
|
||||
*
|
||||
* @author
|
||||
* ittiam
|
||||
*
|
||||
* List of Functions
|
||||
* - isvce_join_threads()
|
||||
* - isvce_wait_for_thread()
|
||||
* - isvce_encode()
|
||||
*
|
||||
******************************************************************************
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include <stddef.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <limits.h>
|
||||
#include <assert.h>
|
||||
#include <math.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "ih264_typedefs.h"
|
||||
/* Dependencies of ih264_buf_mgr.h */
|
||||
/* Dependencies of ih264_list.h */
|
||||
#include "ih264_error.h"
|
||||
/* Dependencies of ih264_common_tables.h */
|
||||
#include "ih264_defs.h"
|
||||
#include "ih264_structs.h"
|
||||
#include "ih264_buf_mgr.h"
|
||||
#include "ih264_common_tables.h"
|
||||
#include "ih264_list.h"
|
||||
#include "ih264_platform_macros.h"
|
||||
#include "ih264_trans_data.h"
|
||||
#include "ih264_size_defs.h"
|
||||
/* Dependencies of ih264e_cabac_structs.h */
|
||||
#include "ih264_cabac_tables.h"
|
||||
/* Dependencies of ime_structs.h */
|
||||
#include "ime_defs.h"
|
||||
#include "ime_distortion_metrics.h"
|
||||
/* Dependencies of ih264e_structs.h */
|
||||
#include "iv2.h"
|
||||
#include "ive2.h"
|
||||
#include "ih264_defs.h"
|
||||
#include "ih264_deblk_edge_filters.h"
|
||||
#include "ih264_inter_pred_filters.h"
|
||||
#include "ih264_structs.h"
|
||||
#include "ih264_trans_quant_itrans_iquant.h"
|
||||
/* Dependencies of ih264e_bitstream.h */
|
||||
#include "ih264e_error.h"
|
||||
#include "ih264e_bitstream.h"
|
||||
#include "ih264e_cabac_structs.h"
|
||||
#include "irc_cntrl_param.h"
|
||||
#include "irc_frame_info_collector.h"
|
||||
#include "ime_statistics.h"
|
||||
#include "ime_structs.h"
|
||||
/* Dependencies of 'ih264e_utils.h' */
|
||||
#include "ih264e_defs.h"
|
||||
#include "ih264e_structs.h"
|
||||
#include "ih264e_utils.h"
|
||||
#include "ime.h"
|
||||
#include "isvce.h"
|
||||
#include "isvce_cabac.h"
|
||||
#include "isvce_deblk.h"
|
||||
#include "isvce_defs.h"
|
||||
#include "isvce_downscaler.h"
|
||||
#include "isvce_encode_header.h"
|
||||
#include "isvce_fmt_conv.h"
|
||||
#include "isvce_ibl_eval.h"
|
||||
#include "isvce_ilp_mv.h"
|
||||
#include "isvce_intra_modes_eval.h"
|
||||
#include "isvce_me.h"
|
||||
#include "isvce_process.h"
|
||||
#include "isvce_rate_control.h"
|
||||
#include "isvce_residual_pred.h"
|
||||
#include "isvce_sub_pic_rc.h"
|
||||
#include "isvce_utils.h"
|
||||
|
||||
#define SEI_BASED_FORCE_IDR 1
|
||||
|
||||
/*****************************************************************************/
|
||||
/* Function Definitions */
|
||||
/*****************************************************************************/
|
||||
|
||||
/**
|
||||
******************************************************************************
|
||||
*
|
||||
* @brief This function puts the current thread to sleep for a duration
|
||||
* of sleep_us
|
||||
*
|
||||
* @par Description
|
||||
* ithread_yield() method causes the calling thread to yield execution to
|
||||
*another thread that is ready to run on the current processor. The operating
|
||||
*system selects the thread to yield to. ithread_usleep blocks the current thread
|
||||
*for the specified number of milliseconds. In other words, yield just says, end
|
||||
*my timeslice prematurely, look around for other threads to run. If there is
|
||||
*nothing better than me, continue. Sleep says I don't want to run for x
|
||||
* milliseconds. Even if no other thread wants to run, don't make me run.
|
||||
*
|
||||
* @param[in] sleep_us
|
||||
* thread sleep duration
|
||||
*
|
||||
* @returns error_status
|
||||
*
|
||||
******************************************************************************
|
||||
*/
|
||||
IH264E_ERROR_T isvce_wait_for_thread(UWORD32 sleep_us)
|
||||
{
|
||||
/* yield thread */
|
||||
ithread_yield();
|
||||
|
||||
/* put thread to sleep */
|
||||
ithread_sleep(sleep_us);
|
||||
|
||||
return IH264E_SUCCESS;
|
||||
}
|
||||
|
||||
/**
|
||||
******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* Encodes in synchronous api mode
|
||||
*
|
||||
* @par Description
|
||||
* This routine processes input yuv, encodes it and outputs bitstream and recon
|
||||
*
|
||||
* @param[in] ps_codec_obj
|
||||
* Pointer to codec object at API level
|
||||
*
|
||||
* @param[in] pv_api_ip
|
||||
* Pointer to input argument structure
|
||||
*
|
||||
* @param[out] pv_api_op
|
||||
* Pointer to output argument structure
|
||||
*
|
||||
* @returns Status
|
||||
*
|
||||
******************************************************************************
|
||||
*/
|
||||
WORD32 isvce_encode(iv_obj_t *ps_codec_obj, void *pv_api_ip, void *pv_api_op)
|
||||
{
|
||||
/* error status */
|
||||
IH264E_ERROR_T error_status = IH264E_SUCCESS;
|
||||
|
||||
/* codec ctxt */
|
||||
isvce_codec_t *ps_codec = (isvce_codec_t *) ps_codec_obj->pv_codec_handle;
|
||||
|
||||
/* input frame to encode */
|
||||
isvce_video_encode_ip_t *ps_video_encode_ip = pv_api_ip;
|
||||
|
||||
/* output buffer to write stream */
|
||||
isvce_video_encode_op_t *ps_video_encode_op = pv_api_op;
|
||||
|
||||
/* i/o structures */
|
||||
isvce_inp_buf_t s_inp_buf;
|
||||
isvce_out_buf_t s_out_buf;
|
||||
|
||||
WORD32 ctxt_sel = 0, i4_rc_pre_enc_skip;
|
||||
WORD32 i, j;
|
||||
|
||||
ASSERT(MAX_CTXT_SETS == 1);
|
||||
|
||||
/********************************************************************/
|
||||
/* BEGIN INIT */
|
||||
/********************************************************************/
|
||||
/* reset output structure */
|
||||
ps_video_encode_op->s_ive_op.u4_error_code = IV_SUCCESS;
|
||||
ps_video_encode_op->s_ive_op.output_present = 0;
|
||||
ps_video_encode_op->s_ive_op.dump_recon = 0;
|
||||
ps_video_encode_op->s_ive_op.u4_encoded_frame_type = IV_NA_FRAME;
|
||||
|
||||
/* Check for output memory allocation size */
|
||||
{
|
||||
UWORD32 u4_min_bufsize =
|
||||
MIN_STREAM_SIZE * ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers;
|
||||
UWORD32 u4_bufsize_per_layer = ps_video_encode_ip->s_ive_ip.s_out_buf.u4_bufsize /
|
||||
ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers;
|
||||
|
||||
if(ps_video_encode_ip->s_ive_ip.s_out_buf.u4_bufsize < u4_min_bufsize)
|
||||
{
|
||||
error_status = IH264E_INSUFFICIENT_OUTPUT_BUFFER;
|
||||
|
||||
SET_ERROR_ON_RETURN(error_status, IVE_UNSUPPORTEDPARAM,
|
||||
ps_video_encode_op->s_ive_op.u4_error_code, IV_FAIL);
|
||||
}
|
||||
|
||||
for(i = 0; i < ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers; i++)
|
||||
{
|
||||
s_out_buf.as_bits_buf[i] = ps_video_encode_ip->s_ive_ip.s_out_buf;
|
||||
|
||||
s_out_buf.as_bits_buf[i].u4_bufsize = u4_bufsize_per_layer;
|
||||
s_out_buf.as_bits_buf[i].pv_buf =
|
||||
((UWORD8 *) ps_video_encode_ip->s_ive_ip.s_out_buf.pv_buf) +
|
||||
u4_bufsize_per_layer * i;
|
||||
}
|
||||
}
|
||||
|
||||
s_out_buf.u4_is_last = 0;
|
||||
s_out_buf.u4_timestamp_low = ps_video_encode_ip->s_ive_ip.u4_timestamp_low;
|
||||
s_out_buf.u4_timestamp_high = ps_video_encode_ip->s_ive_ip.u4_timestamp_high;
|
||||
|
||||
/* api call cnt */
|
||||
ps_codec->i4_encode_api_call_cnt += 1;
|
||||
|
||||
/* codec context selector */
|
||||
ctxt_sel = ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS;
|
||||
|
||||
/* reset status flags */
|
||||
ps_codec->ai4_pic_cnt[ctxt_sel] = -1;
|
||||
ps_codec->s_rate_control.post_encode_skip[ctxt_sel] = 0;
|
||||
ps_codec->s_rate_control.pre_encode_skip[ctxt_sel] = 0;
|
||||
|
||||
/* pass output buffer to codec */
|
||||
ps_codec->as_out_buf[ctxt_sel] = s_out_buf;
|
||||
|
||||
/* initialize codec ctxt with default params for the first encode api call */
|
||||
if(ps_codec->i4_encode_api_call_cnt == 0)
|
||||
{
|
||||
isvce_codec_init(ps_codec);
|
||||
}
|
||||
|
||||
/* parse configuration params */
|
||||
for(i = 0; i < MAX_ACTIVE_CONFIG_PARAMS; i++)
|
||||
{
|
||||
isvce_cfg_params_t *ps_cfg = &ps_codec->as_cfg[i];
|
||||
|
||||
if(1 == ps_cfg->u4_is_valid)
|
||||
{
|
||||
if(((ps_cfg->u4_timestamp_high == ps_video_encode_ip->s_ive_ip.u4_timestamp_high) &&
|
||||
(ps_cfg->u4_timestamp_low == ps_video_encode_ip->s_ive_ip.u4_timestamp_low)) ||
|
||||
((WORD32) ps_cfg->u4_timestamp_high == -1) ||
|
||||
((WORD32) ps_cfg->u4_timestamp_low == -1))
|
||||
{
|
||||
error_status = isvce_codec_update_config(ps_codec, ps_cfg);
|
||||
SET_ERROR_ON_RETURN(error_status, IVE_UNSUPPORTEDPARAM,
|
||||
ps_video_encode_op->s_ive_op.u4_error_code, IV_FAIL);
|
||||
|
||||
ps_cfg->u4_is_valid = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
/* Force IDR based on SEI params */
|
||||
#if SEI_BASED_FORCE_IDR
|
||||
{
|
||||
sei_mdcv_params_t *ps_sei_mdcv_params = &ps_codec->s_sei.s_sei_mdcv_params;
|
||||
sei_mdcv_params_t *ps_cfg_sei_mdcv_params = &ps_codec->s_cfg.s_sei.s_sei_mdcv_params;
|
||||
sei_cll_params_t *ps_sei_cll_params = &ps_codec->s_sei.s_sei_cll_params;
|
||||
sei_cll_params_t *ps_cfg_sei_cll_params = &ps_codec->s_cfg.s_sei.s_sei_cll_params;
|
||||
sei_ave_params_t *ps_sei_ave_params = &ps_codec->s_sei.s_sei_ave_params;
|
||||
sei_ave_params_t *ps_cfg_sei_ave_params = &ps_codec->s_cfg.s_sei.s_sei_ave_params;
|
||||
|
||||
if((ps_sei_mdcv_params->au2_display_primaries_x[0] !=
|
||||
ps_cfg_sei_mdcv_params->au2_display_primaries_x[0]) ||
|
||||
(ps_sei_mdcv_params->au2_display_primaries_x[1] !=
|
||||
ps_cfg_sei_mdcv_params->au2_display_primaries_x[1]) ||
|
||||
(ps_sei_mdcv_params->au2_display_primaries_x[2] !=
|
||||
ps_cfg_sei_mdcv_params->au2_display_primaries_x[2]) ||
|
||||
(ps_sei_mdcv_params->au2_display_primaries_y[0] !=
|
||||
ps_cfg_sei_mdcv_params->au2_display_primaries_y[0]) ||
|
||||
(ps_sei_mdcv_params->au2_display_primaries_y[1] !=
|
||||
ps_cfg_sei_mdcv_params->au2_display_primaries_y[1]) ||
|
||||
(ps_sei_mdcv_params->au2_display_primaries_y[2] !=
|
||||
ps_cfg_sei_mdcv_params->au2_display_primaries_y[2]) ||
|
||||
(ps_sei_mdcv_params->u2_white_point_x != ps_cfg_sei_mdcv_params->u2_white_point_x) ||
|
||||
(ps_sei_mdcv_params->u2_white_point_y != ps_cfg_sei_mdcv_params->u2_white_point_y) ||
|
||||
(ps_sei_mdcv_params->u4_max_display_mastering_luminance !=
|
||||
ps_cfg_sei_mdcv_params->u4_max_display_mastering_luminance) ||
|
||||
(ps_sei_mdcv_params->u4_min_display_mastering_luminance !=
|
||||
ps_cfg_sei_mdcv_params->u4_min_display_mastering_luminance))
|
||||
{
|
||||
ps_codec->s_sei.s_sei_mdcv_params = ps_codec->s_cfg.s_sei.s_sei_mdcv_params;
|
||||
ps_codec->s_sei.u1_sei_mdcv_params_present_flag = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
ps_codec->s_sei.u1_sei_mdcv_params_present_flag = 0;
|
||||
}
|
||||
|
||||
if((ps_sei_cll_params->u2_max_content_light_level !=
|
||||
ps_cfg_sei_cll_params->u2_max_content_light_level) ||
|
||||
(ps_sei_cll_params->u2_max_pic_average_light_level !=
|
||||
ps_cfg_sei_cll_params->u2_max_pic_average_light_level))
|
||||
{
|
||||
ps_codec->s_sei.s_sei_cll_params = ps_codec->s_cfg.s_sei.s_sei_cll_params;
|
||||
ps_codec->s_sei.u1_sei_cll_params_present_flag = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
ps_codec->s_sei.u1_sei_cll_params_present_flag = 0;
|
||||
}
|
||||
|
||||
if((ps_sei_ave_params->u4_ambient_illuminance !=
|
||||
ps_cfg_sei_ave_params->u4_ambient_illuminance) ||
|
||||
(ps_sei_ave_params->u2_ambient_light_x != ps_cfg_sei_ave_params->u2_ambient_light_x) ||
|
||||
(ps_sei_ave_params->u2_ambient_light_y != ps_cfg_sei_ave_params->u2_ambient_light_y))
|
||||
{
|
||||
ps_codec->s_sei.s_sei_ave_params = ps_codec->s_cfg.s_sei.s_sei_ave_params;
|
||||
ps_codec->s_sei.u1_sei_ave_params_present_flag = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
ps_codec->s_sei.u1_sei_ave_params_present_flag = 0;
|
||||
}
|
||||
|
||||
if((1 == ps_codec->s_sei.u1_sei_mdcv_params_present_flag) ||
|
||||
(1 == ps_codec->s_sei.u1_sei_cll_params_present_flag) ||
|
||||
(1 == ps_codec->s_sei.u1_sei_ave_params_present_flag))
|
||||
{
|
||||
ps_codec->force_curr_frame_type = IV_IDR_FRAME;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/* In case of alt ref and B pics we will have non reference frame in stream */
|
||||
if(ps_codec->s_cfg.u4_enable_alt_ref || ps_codec->s_cfg.u4_num_bframes)
|
||||
{
|
||||
ps_codec->i4_non_ref_frames_in_stream = 1;
|
||||
}
|
||||
|
||||
if(ps_codec->i4_encode_api_call_cnt == 0)
|
||||
{
|
||||
/********************************************************************/
|
||||
/* number of mv/ref bank buffers used by the codec, */
|
||||
/* 1 to handle curr frame */
|
||||
/* 1 to store information of ref frame */
|
||||
/* 1 more additional because of the codec employs 2 ctxt sets */
|
||||
/* to assist asynchronous API */
|
||||
/********************************************************************/
|
||||
|
||||
/* initialize mv bank buffer manager */
|
||||
error_status = isvce_svc_au_data_mgr_add_bufs(ps_codec);
|
||||
|
||||
SET_ERROR_ON_RETURN(error_status, IVE_FATALERROR,
|
||||
ps_video_encode_op->s_ive_op.u4_error_code, IV_FAIL);
|
||||
|
||||
/* initialize ref bank buffer manager */
|
||||
error_status = isvce_svc_au_buf_mgr_add_bufs(ps_codec);
|
||||
|
||||
SET_ERROR_ON_RETURN(error_status, IVE_FATALERROR,
|
||||
ps_video_encode_op->s_ive_op.u4_error_code, IV_FAIL);
|
||||
|
||||
/* for the first frame, generate header when not requested explicitly */
|
||||
if(ps_codec->i4_header_mode == 0 && ps_codec->u4_header_generated == 0)
|
||||
{
|
||||
ps_codec->i4_gen_header = 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* generate header and return when encoder is operated in header mode */
|
||||
if(ps_codec->i4_header_mode == 1)
|
||||
{
|
||||
/* whenever the header is generated, this implies a start of sequence
|
||||
* and a sequence needs to be started with IDR
|
||||
*/
|
||||
ps_codec->force_curr_frame_type = IV_IDR_FRAME;
|
||||
|
||||
s_inp_buf.s_svc_params = ps_codec->s_cfg.s_svc_params;
|
||||
s_inp_buf.s_inp_props.s_raw_buf = ps_video_encode_ip->s_ive_ip.s_inp_buf;
|
||||
s_inp_buf.s_inp_props.s_raw_buf.au4_wd[Y] = ps_codec->s_cfg.u4_wd;
|
||||
s_inp_buf.s_inp_props.s_raw_buf.au4_ht[Y] = ps_codec->s_cfg.u4_ht;
|
||||
|
||||
isvce_init_svc_dimension(&s_inp_buf);
|
||||
|
||||
/* generate header */
|
||||
error_status = isvce_generate_sps_pps(ps_codec, &s_inp_buf);
|
||||
|
||||
/* send the input to app */
|
||||
ps_video_encode_op->s_ive_op.s_inp_buf = ps_video_encode_ip->s_ive_ip.s_inp_buf;
|
||||
ps_video_encode_op->s_ive_op.u4_timestamp_low =
|
||||
ps_video_encode_ip->s_ive_ip.u4_timestamp_low;
|
||||
ps_video_encode_op->s_ive_op.u4_timestamp_high =
|
||||
ps_video_encode_ip->s_ive_ip.u4_timestamp_high;
|
||||
|
||||
ps_video_encode_op->s_ive_op.u4_is_last = ps_video_encode_ip->s_ive_ip.u4_is_last;
|
||||
|
||||
/* send the output to app */
|
||||
ps_video_encode_op->s_ive_op.output_present = 1;
|
||||
ps_video_encode_op->s_ive_op.dump_recon = 0;
|
||||
ps_video_encode_op->s_ive_op.s_out_buf = ps_codec->as_out_buf[ctxt_sel].as_bits_buf[0];
|
||||
|
||||
for(i = 1; i < ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers; i++)
|
||||
{
|
||||
memmove(((UWORD8 *) ps_video_encode_op->s_ive_op.s_out_buf.pv_buf +
|
||||
ps_video_encode_op->s_ive_op.s_out_buf.u4_bytes),
|
||||
ps_codec->as_out_buf[ctxt_sel].as_bits_buf[i].pv_buf,
|
||||
ps_codec->as_out_buf[ctxt_sel].as_bits_buf[i].u4_bytes);
|
||||
|
||||
ps_video_encode_op->s_ive_op.s_out_buf.u4_bytes +=
|
||||
ps_codec->as_out_buf[ctxt_sel].as_bits_buf[i].u4_bytes;
|
||||
}
|
||||
|
||||
/* error status */
|
||||
SET_ERROR_ON_RETURN(error_status, IVE_FATALERROR,
|
||||
ps_video_encode_op->s_ive_op.u4_error_code, IV_FAIL);
|
||||
|
||||
/* indicates that header has been generated previously */
|
||||
ps_codec->u4_header_generated = 1;
|
||||
|
||||
/* api call cnt */
|
||||
ps_codec->i4_encode_api_call_cnt--;
|
||||
|
||||
/* header mode tag is not sticky */
|
||||
ps_codec->i4_header_mode = 0;
|
||||
ps_codec->i4_gen_header = 0;
|
||||
|
||||
return IV_SUCCESS;
|
||||
}
|
||||
|
||||
/* curr pic cnt */
|
||||
ps_codec->i4_pic_cnt += 1;
|
||||
|
||||
i4_rc_pre_enc_skip = 0;
|
||||
for(i = 0; i < ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers; i++)
|
||||
{
|
||||
i4_rc_pre_enc_skip =
|
||||
isvce_input_queue_update(ps_codec, &ps_video_encode_ip->s_ive_ip, &s_inp_buf, i);
|
||||
}
|
||||
|
||||
s_out_buf.u4_is_last = s_inp_buf.s_inp_props.u4_is_last;
|
||||
ps_video_encode_op->s_ive_op.u4_is_last = s_inp_buf.s_inp_props.u4_is_last;
|
||||
|
||||
/* Only encode if the current frame is not pre-encode skip */
|
||||
if(!i4_rc_pre_enc_skip && s_inp_buf.s_inp_props.s_raw_buf.apv_bufs[0])
|
||||
{
|
||||
isvce_process_ctxt_t *ps_proc = &ps_codec->as_process[ctxt_sel * MAX_PROCESS_THREADS];
|
||||
|
||||
WORD32 num_thread_cnt = ps_codec->s_cfg.u4_num_cores - 1;
|
||||
|
||||
ps_codec->ai4_pic_cnt[ctxt_sel] = ps_codec->i4_pic_cnt;
|
||||
|
||||
error_status = isvce_svc_au_init(ps_codec, &s_inp_buf);
|
||||
|
||||
SET_ERROR_ON_RETURN(error_status, IVE_FATALERROR,
|
||||
ps_video_encode_op->s_ive_op.u4_error_code, IV_FAIL);
|
||||
|
||||
isvce_nalu_info_au_init(ps_codec->as_nalu_descriptors,
|
||||
ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers);
|
||||
|
||||
#if ENABLE_MODE_STAT_VISUALISER
|
||||
isvce_msv_get_input_frame(ps_codec->ps_mode_stat_visualiser, &s_inp_buf);
|
||||
#endif
|
||||
|
||||
for(i = 0; i < ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers; i++)
|
||||
{
|
||||
isvce_svc_layer_pic_init(ps_codec, &s_inp_buf, i);
|
||||
|
||||
for(j = 0; j < num_thread_cnt; j++)
|
||||
{
|
||||
ithread_create(ps_codec->apv_proc_thread_handle[j], NULL, isvce_process_thread,
|
||||
&ps_codec->as_process[j + 1]);
|
||||
|
||||
ps_codec->ai4_process_thread_created[j] = 1;
|
||||
|
||||
ps_codec->i4_proc_thread_cnt++;
|
||||
}
|
||||
|
||||
/* launch job */
|
||||
isvce_process_thread(ps_proc);
|
||||
|
||||
/* Join threads at the end of encoding a frame */
|
||||
isvce_join_threads(ps_codec);
|
||||
|
||||
ih264_list_reset(ps_codec->pv_proc_jobq);
|
||||
|
||||
ih264_list_reset(ps_codec->pv_entropy_jobq);
|
||||
}
|
||||
|
||||
#if ENABLE_MODE_STAT_VISUALISER
|
||||
isvce_msv_dump_visualisation(ps_codec->ps_mode_stat_visualiser);
|
||||
#endif
|
||||
|
||||
isvce_sub_pic_rc_dump_data(ps_codec->as_process->ps_sub_pic_rc_ctxt);
|
||||
}
|
||||
|
||||
/****************************************************************************
|
||||
* RECON
|
||||
* Since we have forward dependent frames, we cannot return recon in
|
||||
*encoding order. It must be in poc order, or input pic order. To achieve this
|
||||
*we introduce a delay of 1 to the recon wrt encode. Now since we have that
|
||||
* delay, at any point minimum of pic_cnt in our ref buffer will be the
|
||||
* correct frame. For ex let our GOP be IBBP [1 2 3 4] . The encode order
|
||||
* will be [1 4 2 3] .Now since we have a delay of 1, when we are done with
|
||||
* encoding 4, the min in the list will be 1. After encoding 2, it will be
|
||||
* 2, 3 after 3 and 4 after 4. Hence we can return in sequence. Note
|
||||
* that the 1 delay is critical. Hence if we have post enc skip, we must
|
||||
* skip here too. Note that since post enc skip already frees the recon
|
||||
* buffer we need not do any thing here
|
||||
*
|
||||
* We need to return a recon when ever we consume an input buffer. This
|
||||
* comsumption include a pre or post enc skip. Thus dump recon is set for
|
||||
* all cases except when
|
||||
* 1) We are waiting -> ps_codec->i4_pic_cnt >
|
||||
*ps_codec->s_cfg.u4_num_bframe An exception need to be made for the case when
|
||||
*we have the last buffer since we need to flush out the on remainig recon.
|
||||
****************************************************************************/
|
||||
|
||||
ps_video_encode_op->s_ive_op.dump_recon = 0;
|
||||
|
||||
if(ps_codec->s_cfg.u4_enable_recon &&
|
||||
((ps_codec->i4_pic_cnt > (WORD32) ps_codec->s_cfg.u4_num_bframes) ||
|
||||
s_inp_buf.s_inp_props.u4_is_last))
|
||||
{
|
||||
/* error status */
|
||||
IH264_ERROR_T ret = IH264_SUCCESS;
|
||||
|
||||
svc_au_buf_t *ps_pic_buf = NULL;
|
||||
|
||||
WORD32 i4_buf_status, i4_curr_poc = 32768;
|
||||
|
||||
/* In case of skips we return recon, but indicate that buffer is zero size
|
||||
*/
|
||||
if(ps_codec->s_rate_control.post_encode_skip[ctxt_sel] || i4_rc_pre_enc_skip)
|
||||
{
|
||||
ps_video_encode_op->s_ive_op.dump_recon = 1;
|
||||
ps_video_encode_op->s_ive_op.s_recon_buf.au4_wd[0] = 0;
|
||||
ps_video_encode_op->s_ive_op.s_recon_buf.au4_wd[1] = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
for(i = 0; i < ps_codec->i4_ref_buf_cnt; i++)
|
||||
{
|
||||
if(ps_codec->as_ref_set[i].i4_pic_cnt == -1) continue;
|
||||
|
||||
i4_buf_status = ih264_buf_mgr_get_status(
|
||||
ps_codec->pv_ref_buf_mgr, ps_codec->as_ref_set[i].ps_pic_buf->i4_buf_id);
|
||||
|
||||
if((i4_buf_status & BUF_MGR_IO) && (ps_codec->as_ref_set[i].i4_poc < i4_curr_poc))
|
||||
{
|
||||
ps_pic_buf = ps_codec->as_ref_set[i].ps_pic_buf;
|
||||
i4_curr_poc = ps_codec->as_ref_set[i].i4_poc;
|
||||
}
|
||||
}
|
||||
|
||||
ps_video_encode_op->s_ive_op.s_recon_buf = ps_video_encode_ip->s_ive_ip.s_recon_buf;
|
||||
|
||||
/*
|
||||
* If we get a valid buffer. output and free recon.
|
||||
*
|
||||
* we may get an invalid buffer if num_b_frames is 0. This is because
|
||||
* We assume that there will be a ref frame in ref list after encoding
|
||||
* the last frame. With B frames this is correct since its forward ref
|
||||
* pic will be in the ref list. But if num_b_frames is 0, we will not
|
||||
* have a forward ref pic
|
||||
*/
|
||||
|
||||
if(ps_pic_buf)
|
||||
{
|
||||
if((ps_video_encode_ip->s_ive_ip.s_recon_buf.au4_wd[Y] !=
|
||||
ps_codec->s_cfg.u4_disp_wd) ||
|
||||
(ps_video_encode_ip->s_ive_ip.s_recon_buf.au4_ht[Y] !=
|
||||
ps_codec->s_cfg.u4_disp_ht))
|
||||
{
|
||||
SET_ERROR_ON_RETURN(IH264E_NO_FREE_RECONBUF, IVE_FATALERROR,
|
||||
ps_video_encode_op->s_ive_op.u4_error_code, IV_FAIL);
|
||||
}
|
||||
|
||||
isvce_fmt_conv(ps_codec, ps_pic_buf,
|
||||
ps_video_encode_ip->s_ive_ip.s_recon_buf.apv_bufs[0],
|
||||
ps_video_encode_ip->s_ive_ip.s_recon_buf.apv_bufs[1],
|
||||
ps_video_encode_ip->s_ive_ip.s_recon_buf.apv_bufs[2],
|
||||
ps_video_encode_ip->s_ive_ip.s_recon_buf.au4_wd[0],
|
||||
ps_video_encode_ip->s_ive_ip.s_recon_buf.au4_wd[1], 0,
|
||||
ps_codec->s_cfg.u4_disp_ht);
|
||||
|
||||
ps_video_encode_op->s_ive_op.dump_recon = 1;
|
||||
|
||||
ret = ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr, ps_pic_buf->i4_buf_id,
|
||||
BUF_MGR_IO);
|
||||
|
||||
if(IH264_SUCCESS != ret)
|
||||
{
|
||||
SET_ERROR_ON_RETURN((IH264E_ERROR_T) ret, IVE_FATALERROR,
|
||||
ps_video_encode_op->s_ive_op.u4_error_code, IV_FAIL);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/***************************************************************************
|
||||
* Free reference buffers:
|
||||
* In case of a post enc skip, we have to ensure that those pics will not
|
||||
* be used as reference anymore. In all other cases we will not even mark
|
||||
* the ref buffers
|
||||
***************************************************************************/
|
||||
if(ps_codec->s_rate_control.post_encode_skip[ctxt_sel])
|
||||
{
|
||||
/* pic info */
|
||||
svc_au_buf_t *ps_cur_pic;
|
||||
|
||||
/* mv info */
|
||||
svc_au_data_t *ps_cur_mv_buf;
|
||||
|
||||
/* error status */
|
||||
IH264_ERROR_T ret = IH264_SUCCESS;
|
||||
|
||||
/* Decrement coded pic count */
|
||||
ps_codec->i4_poc--;
|
||||
|
||||
/* loop through to get the min pic cnt among the list of pics stored in ref
|
||||
* list */
|
||||
/* since the skipped frame may not be on reference list, we may not have an
|
||||
* MV bank hence free only if we have allocated */
|
||||
for(i = 0; i < ps_codec->i4_ref_buf_cnt; i++)
|
||||
{
|
||||
if(ps_codec->i4_pic_cnt == ps_codec->as_ref_set[i].i4_pic_cnt)
|
||||
{
|
||||
ps_cur_pic = ps_codec->as_ref_set[i].ps_pic_buf;
|
||||
|
||||
ps_cur_mv_buf = ps_codec->as_ref_set[i].ps_svc_au_data;
|
||||
|
||||
/* release this frame from reference list and recon list */
|
||||
ret = ih264_buf_mgr_release(ps_codec->pv_svc_au_data_store_mgr,
|
||||
ps_cur_mv_buf->i4_buf_id, BUF_MGR_REF);
|
||||
ret |= ih264_buf_mgr_release(ps_codec->pv_svc_au_data_store_mgr,
|
||||
ps_cur_mv_buf->i4_buf_id, BUF_MGR_IO);
|
||||
SET_ERROR_ON_RETURN((IH264E_ERROR_T) ret, IVE_FATALERROR,
|
||||
ps_video_encode_op->s_ive_op.u4_error_code, IV_FAIL);
|
||||
|
||||
ret = ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr, ps_cur_pic->i4_buf_id,
|
||||
BUF_MGR_REF);
|
||||
ret |= ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr, ps_cur_pic->i4_buf_id,
|
||||
BUF_MGR_IO);
|
||||
SET_ERROR_ON_RETURN((IH264E_ERROR_T) ret, IVE_FATALERROR,
|
||||
ps_video_encode_op->s_ive_op.u4_error_code, IV_FAIL);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Since recon is not in sync with output, ie there can be frame to be
|
||||
* given back as recon even after last output. Hence we need to mark that
|
||||
* the output is not the last.
|
||||
* Hence search through reflist and mark appropriately
|
||||
*/
|
||||
if(ps_codec->s_cfg.u4_enable_recon)
|
||||
{
|
||||
WORD32 i4_buf_status = 0;
|
||||
|
||||
for(i = 0; i < ps_codec->i4_ref_buf_cnt; i++)
|
||||
{
|
||||
if(ps_codec->as_ref_set[i].i4_pic_cnt == -1) continue;
|
||||
|
||||
i4_buf_status |= ih264_buf_mgr_get_status(
|
||||
ps_codec->pv_ref_buf_mgr, ps_codec->as_ref_set[i].ps_pic_buf->i4_buf_id);
|
||||
}
|
||||
|
||||
if(i4_buf_status & BUF_MGR_IO)
|
||||
{
|
||||
s_out_buf.u4_is_last = 0;
|
||||
ps_video_encode_op->s_ive_op.u4_is_last = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/**************************************************************************
|
||||
* Signaling to APP
|
||||
* 1) If we valid a valid output mark it so
|
||||
* 2) Set the codec output ps_video_encode_op
|
||||
* 3) Set the error status
|
||||
* 4) Set the return Pic type
|
||||
* Note that we already has marked recon properly
|
||||
* 5)Send the consumed input back to app so that it can free it if possible
|
||||
*
|
||||
* We will have to return the output and input buffers unconditionally
|
||||
* so that app can release them
|
||||
**************************************************************************/
|
||||
if(!i4_rc_pre_enc_skip && !ps_codec->s_rate_control.post_encode_skip[ctxt_sel] &&
|
||||
s_inp_buf.s_inp_props.s_raw_buf.apv_bufs[0])
|
||||
{
|
||||
/* receive output back from codec */
|
||||
s_out_buf = ps_codec->as_out_buf[ctxt_sel];
|
||||
|
||||
/* send the output to app */
|
||||
ps_video_encode_op->s_ive_op.output_present = 1;
|
||||
ps_video_encode_op->s_ive_op.u4_error_code = IV_SUCCESS;
|
||||
|
||||
/* Set the time stamps of the encodec input */
|
||||
ps_video_encode_op->s_ive_op.u4_timestamp_low = s_inp_buf.s_inp_props.u4_timestamp_low;
|
||||
ps_video_encode_op->s_ive_op.u4_timestamp_high = s_inp_buf.s_inp_props.u4_timestamp_high;
|
||||
|
||||
switch(ps_codec->pic_type)
|
||||
{
|
||||
case PIC_IDR:
|
||||
ps_video_encode_op->s_ive_op.u4_encoded_frame_type = IV_IDR_FRAME;
|
||||
break;
|
||||
|
||||
case PIC_I:
|
||||
ps_video_encode_op->s_ive_op.u4_encoded_frame_type = IV_I_FRAME;
|
||||
break;
|
||||
|
||||
case PIC_P:
|
||||
ps_video_encode_op->s_ive_op.u4_encoded_frame_type = IV_P_FRAME;
|
||||
break;
|
||||
|
||||
case PIC_B:
|
||||
ps_video_encode_op->s_ive_op.u4_encoded_frame_type = IV_B_FRAME;
|
||||
break;
|
||||
|
||||
default:
|
||||
ps_video_encode_op->s_ive_op.u4_encoded_frame_type = IV_NA_FRAME;
|
||||
break;
|
||||
}
|
||||
|
||||
for(i = 0; i < (WORD32) ps_codec->s_cfg.u4_num_cores; i++)
|
||||
{
|
||||
error_status = ps_codec->as_process[ctxt_sel + i].i4_error_code;
|
||||
SET_ERROR_ON_RETURN(error_status, IVE_FATALERROR,
|
||||
ps_video_encode_op->s_ive_op.u4_error_code, IV_FAIL);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* receive output back from codec */
|
||||
s_out_buf = ps_codec->as_out_buf[ctxt_sel];
|
||||
|
||||
ps_video_encode_op->s_ive_op.output_present = 0;
|
||||
ps_video_encode_op->s_ive_op.u4_error_code = IV_SUCCESS;
|
||||
|
||||
/* Set the time stamps of the encodec input */
|
||||
ps_video_encode_op->s_ive_op.u4_timestamp_low = 0;
|
||||
ps_video_encode_op->s_ive_op.u4_timestamp_high = 0;
|
||||
|
||||
ps_video_encode_op->s_ive_op.s_inp_buf = s_inp_buf.s_inp_props.s_raw_buf;
|
||||
|
||||
ps_video_encode_op->s_ive_op.u4_encoded_frame_type = IV_NA_FRAME;
|
||||
}
|
||||
|
||||
/* Send the input to encoder so that it can free it if possible */
|
||||
ps_video_encode_op->s_ive_op.s_out_buf = ps_codec->as_out_buf[ctxt_sel].as_bits_buf[0];
|
||||
|
||||
for(i = 1; i < ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers; i++)
|
||||
{
|
||||
memmove(((UWORD8 *) ps_video_encode_op->s_ive_op.s_out_buf.pv_buf +
|
||||
ps_video_encode_op->s_ive_op.s_out_buf.u4_bytes),
|
||||
ps_codec->as_out_buf[ctxt_sel].as_bits_buf[i].pv_buf,
|
||||
ps_codec->as_out_buf[ctxt_sel].as_bits_buf[i].u4_bytes);
|
||||
|
||||
ps_video_encode_op->s_ive_op.s_out_buf.u4_bytes +=
|
||||
ps_codec->as_out_buf[ctxt_sel].as_bits_buf[i].u4_bytes;
|
||||
}
|
||||
|
||||
if(ps_codec->s_cfg.b_nalu_info_export_enable && !i4_rc_pre_enc_skip &&
|
||||
!ps_codec->s_rate_control.post_encode_skip[ctxt_sel] &&
|
||||
s_inp_buf.s_inp_props.s_raw_buf.apv_bufs[0])
|
||||
{
|
||||
ps_video_encode_op->b_is_nalu_info_present = true;
|
||||
|
||||
for(i = 0; i < ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers; i++)
|
||||
{
|
||||
isvce_nalu_info_csv_translator(&ps_codec->as_nalu_descriptors[i],
|
||||
&ps_video_encode_ip->ps_nalu_info_buf[i]);
|
||||
|
||||
ps_video_encode_op->ps_nalu_info_buf[i] = ps_video_encode_ip->ps_nalu_info_buf[i];
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
ps_video_encode_op->b_is_nalu_info_present = false;
|
||||
}
|
||||
|
||||
ps_video_encode_op->s_ive_op.s_inp_buf = s_inp_buf.s_inp_props.s_raw_buf;
|
||||
|
||||
return IV_SUCCESS;
|
||||
}
|
||||
41
encoder/svc/isvce_encode.h
Normal file
41
encoder/svc/isvce_encode.h
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
* @file
|
||||
* isvce_encode.h
|
||||
*
|
||||
* @brief
|
||||
* Contains functions for encode API
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef _ISVCE_ENCODE_H_
|
||||
#define _ISVCE_ENCODE_H_
|
||||
|
||||
#include "ih264_typedefs.h"
|
||||
#include "iv2.h"
|
||||
#include "ive2.h"
|
||||
|
||||
extern WORD32 isvce_encode(iv_obj_t *ps_codec_obj, void *pv_api_ip, void *pv_api_op);
|
||||
|
||||
#endif
|
||||
2127
encoder/svc/isvce_encode_header.c
Normal file
2127
encoder/svc/isvce_encode_header.c
Normal file
File diff suppressed because it is too large
Load diff
296
encoder/svc/isvce_encode_header.h
Normal file
296
encoder/svc/isvce_encode_header.h
Normal file
|
|
@ -0,0 +1,296 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
|
||||
/**
|
||||
******************************************************************************
|
||||
* @file
|
||||
* isvce_encode_header.h
|
||||
*
|
||||
* @brief
|
||||
* This file contains structures and interface prototypes for h264 bitstream
|
||||
* header encoding
|
||||
*
|
||||
* @author
|
||||
* ittiam
|
||||
*
|
||||
* @remarks
|
||||
* None
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef _ISVCE_ENCODE_HEADER_H_
|
||||
#define _ISVCE_ENCODE_HEADER_H_
|
||||
|
||||
#include "ih264_typedefs.h"
|
||||
|
||||
/* Dependencies of ih264e_bitstream.h */
|
||||
#include "ih264e_error.h"
|
||||
|
||||
#include "ih264e_bitstream.h"
|
||||
#include "ih264e_trace.h"
|
||||
#include "isvce_structs.h"
|
||||
|
||||
/**
|
||||
******************************************************************************
|
||||
* @brief Macro to put a code with specified number of bits into the
|
||||
* bitstream
|
||||
******************************************************************************
|
||||
*/
|
||||
#define PUT_BITS(ps_bitstrm, code_val, code_len, ret_val, syntax_string) \
|
||||
{ \
|
||||
ENTROPY_TRACE(syntax_string, code_val); \
|
||||
ret_val = ih264e_put_bits((ps_bitstrm), (code_val), (code_len)); \
|
||||
if(ret_val != IH264E_SUCCESS) \
|
||||
{ \
|
||||
return ret_val; \
|
||||
} \
|
||||
}
|
||||
|
||||
/**
|
||||
******************************************************************************
|
||||
* @brief Macro to put a code with specified number of bits into the
|
||||
* bitstream using 0th order exponential Golomb encoding for
|
||||
* signed numbers
|
||||
******************************************************************************
|
||||
*/
|
||||
#define PUT_BITS_UEV(ps_bitstrm, code_val, ret_val, syntax_string) \
|
||||
{ \
|
||||
ENTROPY_TRACE(syntax_string, code_val); \
|
||||
ret_val = ih264e_put_uev((ps_bitstrm), (code_val)); \
|
||||
if(ret_val != IH264E_SUCCESS) \
|
||||
{ \
|
||||
return ret_val; \
|
||||
} \
|
||||
}
|
||||
/**
|
||||
******************************************************************************
|
||||
* @brief Macro to put a code with specified number of bits into the
|
||||
* bitstream using 0th order exponential Golomb encoding for
|
||||
* signed numbers
|
||||
******************************************************************************
|
||||
*/
|
||||
#define PUT_BITS_SEV(ps_bitstrm, code_val, ret_val, syntax_string) \
|
||||
{ \
|
||||
ENTROPY_TRACE(syntax_string, code_val); \
|
||||
ret_val = ih264e_put_sev((ps_bitstrm), (code_val)); \
|
||||
if(ret_val != IH264E_SUCCESS) \
|
||||
{ \
|
||||
return ret_val; \
|
||||
} \
|
||||
}
|
||||
|
||||
/**
|
||||
******************************************************************************
|
||||
* @brief Macro to set active entropy threads to zero and return
|
||||
* in case of errors
|
||||
******************************************************************************
|
||||
*/
|
||||
#define RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel) \
|
||||
if(ps_entropy->i4_error_code != IH264E_SUCCESS) \
|
||||
{ \
|
||||
DATA_SYNC(); \
|
||||
ps_codec->au4_entropy_thread_active[ctxt_sel] = 0; \
|
||||
return ps_entropy->i4_error_code; \
|
||||
}
|
||||
|
||||
/*****************************************************************************/
|
||||
/* Extern Function Declarations */
|
||||
/*****************************************************************************/
|
||||
extern WORD32 ih264e_generate_nal_unit_header(bitstrm_t *ps_bitstrm, WORD32 nal_unit_type,
|
||||
WORD32 nal_ref_idc);
|
||||
|
||||
extern WORD32 ih264e_generate_vui(bitstrm_t *ps_bitstrm, vui_t *ps_vui);
|
||||
|
||||
extern IH264E_ERROR_T ih264e_generate_sei(bitstrm_t *ps_bitstrm, sei_params_t *ps_sei,
|
||||
UWORD32 u4_insert_per_idr);
|
||||
|
||||
extern IH264E_ERROR_T ih264e_add_filler_nal_unit(bitstrm_t *ps_bitstrm, WORD32 insert_fill_bytes);
|
||||
|
||||
/**
|
||||
******************************************************************************
|
||||
*
|
||||
* @brief Generates SPS (Sequence Parameter Set)
|
||||
*
|
||||
* @par Description
|
||||
* This function generates Sequence Parameter Set header as per the spec
|
||||
*
|
||||
* @param[in] ps_bitstrm
|
||||
* pointer to bitstream context (handle)
|
||||
*
|
||||
* @param[in] ps_sps
|
||||
* pointer to structure containing SPS data
|
||||
*
|
||||
* @return success or failure error code
|
||||
*
|
||||
******************************************************************************
|
||||
*/
|
||||
WORD32 isvce_generate_sps(bitstrm_t *ps_bitstrm, sps_t *ps_sps, NAL_UNIT_TYPE_T nal_type);
|
||||
|
||||
/**
|
||||
******************************************************************************
|
||||
*
|
||||
* @brief Generates PPS (Picture Parameter Set)
|
||||
*
|
||||
* @par Description
|
||||
* Generate Picture Parameter Set as per Section 7.3.2.2
|
||||
*
|
||||
* @param[in] ps_bitstrm
|
||||
* pointer to bitstream context (handle)
|
||||
*
|
||||
* @param[in] ps_pps
|
||||
* pointer to structure containing PPS data
|
||||
*
|
||||
* @return success or failure error code
|
||||
*
|
||||
******************************************************************************
|
||||
*/
|
||||
WORD32 isvce_generate_pps(bitstrm_t *ps_bitstrm, pps_t *ps_pps, sps_t *ps_sps);
|
||||
|
||||
/**
|
||||
******************************************************************************
|
||||
*
|
||||
* @brief Generates Slice Header
|
||||
*
|
||||
* @par Description
|
||||
* Generate Slice Header as per Section 7.3.5.1
|
||||
*
|
||||
* @param[inout] ps_bitstrm
|
||||
* pointer to bitstream context for generating slice header
|
||||
*
|
||||
* @param[in] ps_slice_hdr
|
||||
* pointer to slice header params
|
||||
*
|
||||
* @param[in] ps_pps
|
||||
* pointer to pps params referred by slice
|
||||
*
|
||||
* @param[in] ps_sps
|
||||
* pointer to sps params referred by slice
|
||||
*
|
||||
* @param[out] ps_dup_bit_strm_ent_offset
|
||||
* Bitstream struct to store bitstream state
|
||||
*
|
||||
* @param[out] pu4_first_slice_start_offset
|
||||
* first slice offset is returned
|
||||
*
|
||||
* @return success or failure error code
|
||||
*
|
||||
******************************************************************************
|
||||
*/
|
||||
WORD32 isvce_generate_slice_header(bitstrm_t *ps_bitstrm, slice_header_t *ps_slice_hdr,
|
||||
pps_t *ps_pps, sps_t *ps_sps, UWORD8 u1_idr_flag);
|
||||
/**
|
||||
******************************************************************************
|
||||
*
|
||||
* @brief Populates sps structure
|
||||
*
|
||||
* @par Description
|
||||
* Populates sps structure for its use in header generation
|
||||
*
|
||||
* @param[in] ps_codec
|
||||
* pointer to encoder context
|
||||
*
|
||||
* @param[out] ps_sps
|
||||
* pointer to sps params that needs to be populated
|
||||
*
|
||||
* @return success or failure error code
|
||||
*
|
||||
******************************************************************************
|
||||
*/
|
||||
IH264E_ERROR_T isvce_populate_sps(isvce_codec_t *ps_codec, sps_t *ps_sps, UWORD8 u1_sps_id,
|
||||
UWORD8 u1_profile_idc, isvce_inp_buf_t *ps_inp_buf,
|
||||
UWORD8 u1_spatial_layer_id);
|
||||
|
||||
/**
|
||||
******************************************************************************
|
||||
*
|
||||
* @brief Populates pps structure
|
||||
*
|
||||
* @par Description
|
||||
* Populates pps structure for its use in header generation
|
||||
*
|
||||
* @param[in] ps_codec
|
||||
* pointer to encoder context
|
||||
*
|
||||
* @param[out] ps_pps
|
||||
* pointer to pps params that needs to be populated
|
||||
*
|
||||
* @return success or failure error code
|
||||
*
|
||||
******************************************************************************
|
||||
*/
|
||||
IH264E_ERROR_T isvce_populate_pps(isvce_codec_t *ps_codec, pps_t *ps_pps, UWORD8 u1_sps_id,
|
||||
UWORD8 u1_pps_id, UWORD8 u1_spatial_layer_id);
|
||||
|
||||
/**
|
||||
******************************************************************************
|
||||
*
|
||||
* @brief Populates slice header structure
|
||||
*
|
||||
* @par Description
|
||||
* Populates slice header structure for its use in header generation
|
||||
*
|
||||
* @param[in] ps_proc
|
||||
* pointer to proc context
|
||||
*
|
||||
* @param[out] ps_slice_hdr
|
||||
* pointer to slice header structure that needs to be populated
|
||||
*
|
||||
* @param[in] ps_pps
|
||||
* pointer to pps params structure referred by the slice
|
||||
*
|
||||
* @param[in] ps_sps
|
||||
* pointer to sps params referred by the pps
|
||||
*
|
||||
* @return success or failure error code
|
||||
*
|
||||
******************************************************************************
|
||||
*/
|
||||
WORD32 isvce_populate_slice_header(isvce_process_ctxt_t *ps_proc, slice_header_t *ps_slice_hdr,
|
||||
pps_t *ps_pps, sps_t *ps_sps, UWORD8 u1_is_idr);
|
||||
|
||||
extern WORD32 isvce_populate_svc_nalu_extension(isvce_process_ctxt_t *ps_proc,
|
||||
svc_nalu_ext_t *ps_svc_nalu_ext,
|
||||
NAL_UNIT_TYPE_T nalu_type, UWORD8 u1_idr_flag);
|
||||
|
||||
extern WORD32 isvce_generate_svc_nalu_extension(bitstrm_t *ps_bitstrm,
|
||||
svc_nalu_ext_t *ps_svc_nalu_ext, UWORD8 u1_nalu_id);
|
||||
|
||||
extern WORD32 isvce_populate_svc_slice(isvce_process_ctxt_t *ps_proc,
|
||||
svc_slice_header_t *ps_svc_slice_hdr, pps_t *ps_pps,
|
||||
subset_sps_t *ps_subset_sps,
|
||||
svc_nalu_ext_t *ps_svc_nalu_ext);
|
||||
|
||||
extern WORD32 isvce_populate_subset_sps(isvce_codec_t *ps_codec, subset_sps_t *ps_subset_sps,
|
||||
UWORD8 u1_sps_id, isvce_inp_buf_t *ps_inp_buf,
|
||||
UWORD8 u1_spatial_layer_id);
|
||||
|
||||
extern WORD32 isvce_generate_prefix_nal(bitstrm_t *ps_bitstrm, svc_nalu_ext_t *ps_svc_nalu_ext,
|
||||
slice_header_t *ps_slice_header,
|
||||
UWORD8 u1_max_num_ref_frames, UWORD8 u1_num_spatial_layers);
|
||||
|
||||
extern WORD32 isvce_generate_slice_header_svc(bitstrm_t *ps_bitstrm, pps_t *ps_pps,
|
||||
svc_nalu_ext_t *ps_svc_nalu_ext,
|
||||
svc_slice_header_t *ps_svc_slice_hdr,
|
||||
subset_sps_t *ps_subset_sps);
|
||||
|
||||
extern WORD32 isvce_generate_subset_sps(bitstrm_t *ps_bitstrm, subset_sps_t *ps_subset_sps);
|
||||
|
||||
#endif
|
||||
70
encoder/svc/isvce_error.h
Normal file
70
encoder/svc/isvce_error.h
Normal file
|
|
@ -0,0 +1,70 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
/**
|
||||
*******************************************************************************
|
||||
* @file
|
||||
* isvce_error.h
|
||||
*
|
||||
* @brief
|
||||
* SVC specific error codes
|
||||
*
|
||||
* @author
|
||||
* ittiam
|
||||
*
|
||||
* @remarks
|
||||
* None
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef _ISVCE_ERROR_H_
|
||||
#define _ISVCE_ERROR_H_
|
||||
|
||||
#include "ih264e_error.h"
|
||||
|
||||
typedef enum ISVCE_ERRORS_T
|
||||
{
|
||||
/**Invalid SVC params */
|
||||
IH264E_INVALID_SVC_PARAMS = IH264E_CODEC_ERROR_START + 0x100,
|
||||
|
||||
/**Invalid num_temporal_layers */
|
||||
IH264E_INVALID_NUM_TEMPORAL_LAYERS = IH264E_CODEC_ERROR_START + 0x101,
|
||||
|
||||
/**Invalid num_spatial_layers */
|
||||
IH264E_INVALID_NUM_SPATIAL_LAYERS = IH264E_CODEC_ERROR_START + 0x102,
|
||||
|
||||
/**Invalid spatial_res_ratio */
|
||||
IH264E_INVALID_SPATIAL_RES_RATIO = IH264E_CODEC_ERROR_START + 0x103,
|
||||
|
||||
/** Weighted prediction not supported */
|
||||
IH264E_WEIGHTED_PRED_NOT_SUPPORTED = IH264E_CODEC_ERROR_START + 0x104,
|
||||
|
||||
/** CABAC entropy mode not supported for SVC */
|
||||
IH264E_CABAC_NOT_SUPPORTED = IH264E_CODEC_ERROR_START + 0x105,
|
||||
|
||||
/**Invalid input dimensions */
|
||||
IH264E_INVALID_SVC_INPUT_DIMENSIONS = IH264E_CODEC_ERROR_START + 0x106,
|
||||
|
||||
/** Invalid init QP */
|
||||
IH264E_INVALID_DYN_INIT_QP = IH264E_CODEC_ERROR_START + 0x107,
|
||||
|
||||
} ISVCE_ERRORS_T;
|
||||
|
||||
#endif
|
||||
145
encoder/svc/isvce_fmt_conv.c
Normal file
145
encoder/svc/isvce_fmt_conv.c
Normal file
|
|
@ -0,0 +1,145 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
* @file
|
||||
* isvce_fmt_conv.c
|
||||
*
|
||||
* @brief
|
||||
* Contains functions for format conversion or frame copy of output buffer
|
||||
*
|
||||
* @author
|
||||
* ittiam
|
||||
*
|
||||
* @par List of Functions:
|
||||
* - isvce_fmt_conv()
|
||||
*
|
||||
* @remarks
|
||||
* None
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
#include "ih264_typedefs.h"
|
||||
#include "ih264_macros.h"
|
||||
/* Dependencies of ih264_buf_mgr.h */
|
||||
/* Dependencies of ih264_list.h */
|
||||
#include "ih264_error.h"
|
||||
/* Dependencies of ih264_common_tables.h */
|
||||
#include "ih264_defs.h"
|
||||
#include "ih264_structs.h"
|
||||
#include "ih264_buf_mgr.h"
|
||||
#include "ih264_common_tables.h"
|
||||
#include "ih264_list.h"
|
||||
#include "ih264_platform_macros.h"
|
||||
#include "ih264_trans_data.h"
|
||||
#include "ih264_size_defs.h"
|
||||
/* Dependencies of ih264e_cabac_structs.h */
|
||||
#include "ih264_cabac_tables.h"
|
||||
/* Dependencies of ime_structs.h */
|
||||
#include "ime_defs.h"
|
||||
#include "ime_distortion_metrics.h"
|
||||
/* Dependencies of ih264e_structs.h */
|
||||
#include "iv2.h"
|
||||
#include "ive2.h"
|
||||
#include "ih264_defs.h"
|
||||
#include "ih264_deblk_edge_filters.h"
|
||||
#include "ih264_inter_pred_filters.h"
|
||||
#include "ih264_structs.h"
|
||||
#include "ih264_trans_quant_itrans_iquant.h"
|
||||
/* Dependencies of ih264e_bitstream.h */
|
||||
#include "ih264e_error.h"
|
||||
#include "ih264e_bitstream.h"
|
||||
#include "ih264e_cabac_structs.h"
|
||||
#include "irc_cntrl_param.h"
|
||||
#include "irc_frame_info_collector.h"
|
||||
#include "ime_statistics.h"
|
||||
#include "ime_structs.h"
|
||||
/* Dependencies of 'ih264e_utils.h' */
|
||||
#include "ih264e_defs.h"
|
||||
#include "ih264e_structs.h"
|
||||
#include "ih264e_fmt_conv.h"
|
||||
#include "isvce_structs.h"
|
||||
|
||||
IH264E_ERROR_T isvce_fmt_conv(isvce_codec_t *ps_codec, svc_au_buf_t *ps_pic, UWORD8 *pu1_y_dst,
|
||||
UWORD8 *pu1_u_dst, UWORD8 *pu1_v_dst, UWORD32 u4_dst_y_strd,
|
||||
UWORD32 u4_dst_uv_strd, WORD32 cur_row, WORD32 num_rows)
|
||||
{
|
||||
IH264E_ERROR_T ret = IH264E_SUCCESS;
|
||||
UWORD8 *pu1_y_src, *pu1_uv_src;
|
||||
UWORD8 *pu1_y_dst_tmp, *pu1_uv_dst_tmp;
|
||||
UWORD8 *pu1_u_dst_tmp, *pu1_v_dst_tmp;
|
||||
WORD32 is_u_first;
|
||||
UWORD8 *pu1_luma;
|
||||
UWORD8 *pu1_chroma;
|
||||
WORD32 wd;
|
||||
|
||||
WORD32 src_y_strd;
|
||||
WORD32 src_uv_strd;
|
||||
|
||||
WORD32 layer_id = ps_pic->u1_num_spatial_layers - 1;
|
||||
|
||||
if(0 == num_rows)
|
||||
{
|
||||
return ret;
|
||||
}
|
||||
|
||||
pu1_luma = ps_pic->ps_layer_yuv_buf_props[layer_id].as_component_bufs[0].pv_data;
|
||||
pu1_chroma = ps_pic->ps_layer_yuv_buf_props[layer_id].as_component_bufs[1].pv_data;
|
||||
|
||||
src_y_strd = ps_pic->ps_layer_yuv_buf_props[layer_id].as_component_bufs[0].i4_data_stride;
|
||||
src_uv_strd = ps_pic->ps_layer_yuv_buf_props[layer_id].as_component_bufs[1].i4_data_stride;
|
||||
|
||||
wd = ps_codec->s_cfg.u4_disp_wd;
|
||||
is_u_first = (IV_YUV_420SP_UV == ps_codec->e_codec_color_format) ? 1 : 0;
|
||||
|
||||
/* In case of 420P output luma copy is disabled for shared mode */
|
||||
{
|
||||
pu1_y_src = pu1_luma + cur_row * src_y_strd;
|
||||
pu1_uv_src = pu1_chroma + (cur_row / 2) * src_uv_strd;
|
||||
|
||||
pu1_y_dst_tmp = pu1_y_dst + cur_row * u4_dst_y_strd;
|
||||
pu1_uv_dst_tmp = pu1_u_dst + (cur_row / 2) * u4_dst_uv_strd;
|
||||
pu1_u_dst_tmp = pu1_u_dst + (cur_row / 2) * u4_dst_uv_strd;
|
||||
pu1_v_dst_tmp = pu1_v_dst + (cur_row / 2) * u4_dst_uv_strd;
|
||||
|
||||
/* If the call is non-blocking and there are no rows to be copied then
|
||||
* return */
|
||||
/* In non-shared mode, reference buffers are in 420SP UV format,
|
||||
* if output also is in 420SP_UV, then just copy
|
||||
* if output is in 420SP_VU then swap UV values
|
||||
*/
|
||||
if((IV_YUV_420SP_UV == ps_codec->s_cfg.e_recon_color_fmt) ||
|
||||
(IV_YUV_420SP_VU == ps_codec->s_cfg.e_recon_color_fmt))
|
||||
{
|
||||
ih264e_fmt_conv_420sp_to_420sp(pu1_y_src, pu1_uv_src, pu1_y_dst_tmp, pu1_uv_dst_tmp, wd,
|
||||
num_rows, ps_codec->i4_rec_strd, ps_codec->i4_rec_strd,
|
||||
u4_dst_y_strd, u4_dst_uv_strd);
|
||||
}
|
||||
else if(IV_YUV_420P == ps_codec->s_cfg.e_recon_color_fmt)
|
||||
{
|
||||
ih264e_fmt_conv_420sp_to_420p(pu1_y_src, pu1_uv_src, pu1_y_dst_tmp, pu1_u_dst_tmp,
|
||||
pu1_v_dst_tmp, wd, num_rows, ps_codec->i4_rec_strd,
|
||||
ps_codec->i4_rec_strd, u4_dst_y_strd, u4_dst_uv_strd,
|
||||
is_u_first, 0);
|
||||
}
|
||||
}
|
||||
return (ret);
|
||||
}
|
||||
48
encoder/svc/isvce_fmt_conv.h
Normal file
48
encoder/svc/isvce_fmt_conv.h
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
* @file
|
||||
* ih264e_fmt_conv.h
|
||||
*
|
||||
* @brief
|
||||
* The file contains extern declarations of color space conversion routines
|
||||
*
|
||||
* @author
|
||||
* ittiam
|
||||
*
|
||||
* @remarks
|
||||
* None
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef _ISVCE_FMT_CONV_H_
|
||||
#define _ISVCE_FMT_CONV_H_
|
||||
|
||||
#include "ih264e_fmt_conv.h"
|
||||
#include "isvce_structs.h"
|
||||
|
||||
IH264E_ERROR_T isvce_fmt_conv(isvce_codec_t *ps_codec, svc_au_buf_t *ps_pic, UWORD8 *pu1_y_dst,
|
||||
UWORD8 *pu1_u_dst, UWORD8 *pu1_v_dst, UWORD32 u4_dst_y_strd,
|
||||
UWORD32 u4_dst_uv_strd, WORD32 cur_row, WORD32 num_rows);
|
||||
|
||||
#endif
|
||||
314
encoder/svc/isvce_function_selector_generic.c
Normal file
314
encoder/svc/isvce_function_selector_generic.c
Normal file
|
|
@ -0,0 +1,314 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
* @file
|
||||
* isvce_function_selector_generic.c
|
||||
*
|
||||
* @brief
|
||||
* Contains functions to initialize function pointers of codec context
|
||||
*
|
||||
* @author
|
||||
* ittiam
|
||||
*
|
||||
* @par List of Functions:
|
||||
* - isvce_init_function_ptr_generic
|
||||
*
|
||||
* @remarks
|
||||
* None
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
/*****************************************************************************/
|
||||
/* File Includes */
|
||||
/*****************************************************************************/
|
||||
|
||||
/* System Include files */
|
||||
#include <stdio.h>
|
||||
#include <stddef.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
/* User Include files */
|
||||
#include "ih264_typedefs.h"
|
||||
#include "iv2.h"
|
||||
#include "ive2.h"
|
||||
#include "isvc_defs.h"
|
||||
#include "ih264_size_defs.h"
|
||||
#include "isvce_defs.h"
|
||||
#include "ih264e_error.h"
|
||||
#include "ih264e_bitstream.h"
|
||||
#include "ime_distortion_metrics.h"
|
||||
#include "ime_defs.h"
|
||||
#include "ime_structs.h"
|
||||
#include "ih264_error.h"
|
||||
#include "isvc_structs.h"
|
||||
#include "isvc_trans_quant_itrans_iquant.h"
|
||||
#include "ih264_inter_pred_filters.h"
|
||||
#include "isvc_mem_fns.h"
|
||||
#include "ih264_padding.h"
|
||||
#include "ih264_intra_pred_filters.h"
|
||||
#include "ih264_deblk_edge_filters.h"
|
||||
#include "isvc_cabac_tables.h"
|
||||
#include "irc_cntrl_param.h"
|
||||
#include "irc_frame_info_collector.h"
|
||||
#include "isvce_rate_control.h"
|
||||
#include "isvce_cabac_structs.h"
|
||||
#include "isvce_structs.h"
|
||||
#include "ih264e_platform_macros.h"
|
||||
#include "isvce_cabac.h"
|
||||
#include "isvce_core_coding.h"
|
||||
#include "ih264_cavlc_tables.h"
|
||||
#include "isvce_cavlc.h"
|
||||
#include "ih264e_intra_modes_eval.h"
|
||||
#include "ih264e_fmt_conv.h"
|
||||
#include "ih264e_half_pel.h"
|
||||
#include "isvce_me.h"
|
||||
|
||||
/*****************************************************************************/
|
||||
/* Function Definitions */
|
||||
/*****************************************************************************/
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief Initialize the intra/inter/transform/deblk function pointers of
|
||||
* codec context
|
||||
*
|
||||
* @par Description: the current routine initializes the function pointers of
|
||||
* codec context basing on the architecture in use
|
||||
*
|
||||
* @param[in] ps_codec
|
||||
* Codec context pointer
|
||||
*
|
||||
* @returns none
|
||||
*
|
||||
* @remarks none
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
void isvce_init_function_ptr_generic(isvce_codec_t *ps_codec)
|
||||
{
|
||||
WORD32 i = 0;
|
||||
|
||||
/* curr proc ctxt */
|
||||
isvce_process_ctxt_t *ps_proc = NULL;
|
||||
isvce_me_ctxt_t *ps_me_ctxt = NULL;
|
||||
isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns;
|
||||
enc_loop_fxns_t *ps_enc_loop_fxns = &ps_isa_dependent_fxns->s_enc_loop_fxns;
|
||||
inter_pred_fxns_t *ps_inter_pred_fxns = &ps_isa_dependent_fxns->s_inter_pred_fxns;
|
||||
mem_fxns_t *ps_mem_fxns = &ps_isa_dependent_fxns->s_mem_fxns;
|
||||
|
||||
/* Init function pointers for intra pred leaf level functions luma
|
||||
* Intra 16x16 */
|
||||
ps_codec->apf_intra_pred_16_l[0] = ih264_intra_pred_luma_16x16_mode_vert;
|
||||
ps_codec->apf_intra_pred_16_l[1] = ih264_intra_pred_luma_16x16_mode_horz;
|
||||
ps_codec->apf_intra_pred_16_l[2] = ih264_intra_pred_luma_16x16_mode_dc;
|
||||
ps_codec->apf_intra_pred_16_l[3] = ih264_intra_pred_luma_16x16_mode_plane;
|
||||
|
||||
/* Init function pointers for intra pred leaf level functions luma
|
||||
* Intra 4x4 */
|
||||
ps_codec->apf_intra_pred_4_l[0] = ih264_intra_pred_luma_4x4_mode_vert;
|
||||
ps_codec->apf_intra_pred_4_l[1] = ih264_intra_pred_luma_4x4_mode_horz;
|
||||
ps_codec->apf_intra_pred_4_l[2] = ih264_intra_pred_luma_4x4_mode_dc;
|
||||
ps_codec->apf_intra_pred_4_l[3] = ih264_intra_pred_luma_4x4_mode_diag_dl;
|
||||
ps_codec->apf_intra_pred_4_l[4] = ih264_intra_pred_luma_4x4_mode_diag_dr;
|
||||
ps_codec->apf_intra_pred_4_l[5] = ih264_intra_pred_luma_4x4_mode_vert_r;
|
||||
ps_codec->apf_intra_pred_4_l[6] = ih264_intra_pred_luma_4x4_mode_horz_d;
|
||||
ps_codec->apf_intra_pred_4_l[7] = ih264_intra_pred_luma_4x4_mode_vert_l;
|
||||
ps_codec->apf_intra_pred_4_l[8] = ih264_intra_pred_luma_4x4_mode_horz_u;
|
||||
|
||||
/* Init function pointers for intra pred leaf level functions luma
|
||||
* Intra 8x8 */
|
||||
ps_codec->apf_intra_pred_8_l[0] = ih264_intra_pred_luma_8x8_mode_vert;
|
||||
ps_codec->apf_intra_pred_8_l[2] = ih264_intra_pred_luma_8x8_mode_dc;
|
||||
ps_codec->apf_intra_pred_8_l[3] = ih264_intra_pred_luma_8x8_mode_diag_dl;
|
||||
ps_codec->apf_intra_pred_8_l[4] = ih264_intra_pred_luma_8x8_mode_diag_dr;
|
||||
ps_codec->apf_intra_pred_8_l[5] = ih264_intra_pred_luma_8x8_mode_vert_r;
|
||||
ps_codec->apf_intra_pred_8_l[6] = ih264_intra_pred_luma_8x8_mode_horz_d;
|
||||
ps_codec->apf_intra_pred_8_l[7] = ih264_intra_pred_luma_8x8_mode_vert_l;
|
||||
ps_codec->apf_intra_pred_8_l[8] = ih264_intra_pred_luma_8x8_mode_horz_u;
|
||||
|
||||
/* Init function pointers for intra pred leaf level functions chroma
|
||||
* Intra 8x8 */
|
||||
ps_codec->apf_intra_pred_c[0] = ih264_intra_pred_chroma_8x8_mode_dc;
|
||||
ps_codec->apf_intra_pred_c[1] = ih264_intra_pred_chroma_8x8_mode_horz;
|
||||
ps_codec->apf_intra_pred_c[2] = ih264_intra_pred_chroma_8x8_mode_vert;
|
||||
ps_codec->apf_intra_pred_c[3] = ih264_intra_pred_chroma_8x8_mode_plane;
|
||||
|
||||
/* Init luma forward transform fn ptr */
|
||||
ASSERT((sizeof(ps_enc_loop_fxns->apf_resi_trans_quant_8x8) /
|
||||
sizeof(ps_enc_loop_fxns->apf_resi_trans_quant_8x8[0])) ==
|
||||
NUM_RESI_TRANS_QUANT_VARIANTS);
|
||||
ASSERT((sizeof(ps_enc_loop_fxns->apf_resi_trans_quant_4x4) /
|
||||
sizeof(ps_enc_loop_fxns->apf_resi_trans_quant_4x4[0])) ==
|
||||
NUM_RESI_TRANS_QUANT_VARIANTS);
|
||||
ASSERT((sizeof(ps_enc_loop_fxns->apf_resi_trans_quant_chroma_4x4) /
|
||||
sizeof(ps_enc_loop_fxns->apf_resi_trans_quant_chroma_4x4[0])) ==
|
||||
NUM_RESI_TRANS_QUANT_VARIANTS);
|
||||
|
||||
ps_enc_loop_fxns->apf_resi_trans_quant_8x8[0] = isvc_resi_trans_quant_8x8;
|
||||
ps_enc_loop_fxns->apf_resi_trans_quant_4x4[0] = isvc_resi_trans_quant_4x4;
|
||||
ps_enc_loop_fxns->apf_resi_trans_quant_chroma_4x4[0] = isvc_resi_trans_quant_chroma_4x4;
|
||||
ps_enc_loop_fxns->apf_resi_trans_quant_8x8[1] = isvc_resi_trans_quant_8x8;
|
||||
ps_enc_loop_fxns->apf_resi_trans_quant_4x4[1] = isvc_resi_trans_quant_4x4;
|
||||
ps_enc_loop_fxns->apf_resi_trans_quant_chroma_4x4[1] = isvc_resi_trans_quant_chroma_4x4;
|
||||
ps_enc_loop_fxns->pf_hadamard_quant_4x4 = isvc_hadamard_quant_4x4;
|
||||
ps_enc_loop_fxns->pf_hadamard_quant_2x2_uv = isvc_hadamard_quant_2x2_uv;
|
||||
|
||||
/* Init inverse transform fn ptr */
|
||||
ASSERT((sizeof(ps_enc_loop_fxns->apf_iquant_itrans_recon_8x8) /
|
||||
sizeof(ps_enc_loop_fxns->apf_iquant_itrans_recon_8x8[0])) == NUM_IQ_IT_RECON_VARIANTS);
|
||||
ASSERT((sizeof(ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4) /
|
||||
sizeof(ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4[0])) == NUM_IQ_IT_RECON_VARIANTS);
|
||||
ASSERT((sizeof(ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4_dc) /
|
||||
sizeof(ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4_dc[0])) ==
|
||||
NUM_IQ_IT_RECON_VARIANTS);
|
||||
ASSERT((sizeof(ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4) /
|
||||
sizeof(ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4[0])) ==
|
||||
NUM_IQ_IT_RECON_VARIANTS);
|
||||
ASSERT((sizeof(ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4_dc) /
|
||||
sizeof(ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4_dc[0])) ==
|
||||
NUM_IQ_IT_RECON_VARIANTS);
|
||||
|
||||
ps_enc_loop_fxns->apf_iquant_itrans_recon_8x8[0] = isvc_iquant_itrans_recon_8x8;
|
||||
ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4[0] = isvc_iquant_itrans_recon_4x4;
|
||||
ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4_dc[0] = isvc_iquant_itrans_recon_4x4_dc;
|
||||
ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4[0] = isvc_iquant_itrans_recon_chroma_4x4;
|
||||
ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4_dc[0] =
|
||||
isvc_iquant_itrans_recon_chroma_4x4_dc;
|
||||
ps_enc_loop_fxns->apf_iquant_itrans_recon_8x8[1] = isvc_iquant_itrans_recon_8x8;
|
||||
ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4[1] = isvc_iquant_itrans_recon_4x4;
|
||||
ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4_dc[1] = isvc_iquant_itrans_recon_4x4_dc;
|
||||
ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4[1] = isvc_iquant_itrans_recon_chroma_4x4;
|
||||
ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4_dc[1] =
|
||||
isvc_iquant_itrans_recon_chroma_4x4_dc;
|
||||
ps_enc_loop_fxns->apf_iquant_itrans_recon_8x8[2] = isvc_iquant_itrans_recon_8x8;
|
||||
ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4[2] = isvc_iquant_itrans_recon_4x4;
|
||||
ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4_dc[2] = isvc_iquant_itrans_recon_4x4_dc;
|
||||
ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4[2] = isvc_iquant_itrans_recon_chroma_4x4;
|
||||
ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4_dc[2] =
|
||||
isvc_iquant_itrans_recon_chroma_4x4_dc;
|
||||
ps_enc_loop_fxns->pf_zcbf_iquant_itrans_recon_4x4 = isvc_zcbf_iquant_itrans_recon_4x4;
|
||||
ps_enc_loop_fxns->pf_chroma_zcbf_iquant_itrans_recon_4x4 =
|
||||
isvc_chroma_zcbf_iquant_itrans_recon_4x4;
|
||||
|
||||
ps_enc_loop_fxns->pf_ihadamard_scaling_4x4 = ih264_ihadamard_scaling_4x4;
|
||||
ps_enc_loop_fxns->pf_ihadamard_scaling_2x2_uv = ih264_ihadamard_scaling_2x2_uv;
|
||||
|
||||
/* Init fn ptr luma core coding */
|
||||
ps_enc_loop_fxns->apf_luma_energy_compaction[0] = isvce_code_luma_intra_macroblock_16x16;
|
||||
ps_enc_loop_fxns->apf_luma_energy_compaction[1] = isvce_code_luma_intra_macroblock_4x4;
|
||||
ps_enc_loop_fxns->apf_luma_energy_compaction[3] = isvce_code_luma_inter_macroblock_16x16;
|
||||
|
||||
/* Init fn ptr chroma core coding */
|
||||
ps_enc_loop_fxns->apf_chroma_energy_compaction[0] = isvce_code_chroma_intra_macroblock_8x8;
|
||||
ps_enc_loop_fxns->apf_chroma_energy_compaction[1] = isvce_code_chroma_inter_macroblock_8x8;
|
||||
|
||||
/* Init fn ptr luma deblocking */
|
||||
ps_codec->pf_deblk_luma_vert_bs4 = ih264_deblk_luma_vert_bs4;
|
||||
ps_codec->pf_deblk_luma_vert_bslt4 = ih264_deblk_luma_vert_bslt4;
|
||||
ps_codec->pf_deblk_luma_horz_bs4 = ih264_deblk_luma_horz_bs4;
|
||||
ps_codec->pf_deblk_luma_horz_bslt4 = ih264_deblk_luma_horz_bslt4;
|
||||
|
||||
/* Init fn ptr chroma deblocking */
|
||||
ps_codec->pf_deblk_chroma_vert_bs4 = ih264_deblk_chroma_vert_bs4;
|
||||
ps_codec->pf_deblk_chroma_vert_bslt4 = ih264_deblk_chroma_vert_bslt4;
|
||||
ps_codec->pf_deblk_chroma_horz_bs4 = ih264_deblk_chroma_horz_bs4;
|
||||
ps_codec->pf_deblk_chroma_horz_bslt4 = ih264_deblk_chroma_horz_bslt4;
|
||||
|
||||
/* write mb syntax layer */
|
||||
ps_codec->pf_write_mb_syntax_layer[CAVLC][ISLICE] = isvce_write_islice_mb_cavlc;
|
||||
ps_codec->pf_write_mb_syntax_layer[CAVLC][PSLICE] = isvce_write_pslice_mb_cavlc;
|
||||
ps_codec->pf_write_mb_syntax_layer[CAVLC][BSLICE] = isvce_write_bslice_mb_cavlc;
|
||||
ps_codec->pf_write_mb_syntax_layer[CABAC][ISLICE] = isvce_write_islice_mb_cabac;
|
||||
ps_codec->pf_write_mb_syntax_layer[CABAC][PSLICE] = isvce_write_pslice_mb_cabac;
|
||||
ps_codec->pf_write_mb_syntax_layer[CABAC][BSLICE] = isvce_write_bslice_mb_cabac;
|
||||
|
||||
/* Padding Functions */
|
||||
ps_codec->pf_pad_top = ih264_pad_top;
|
||||
ps_codec->pf_pad_bottom = ih264_pad_bottom;
|
||||
ps_codec->pf_pad_left_luma = ih264_pad_left_luma;
|
||||
ps_codec->pf_pad_left_chroma = ih264_pad_left_chroma;
|
||||
ps_codec->pf_pad_right_luma = ih264_pad_right_luma;
|
||||
ps_codec->pf_pad_right_chroma = ih264_pad_right_chroma;
|
||||
|
||||
/* Inter pred leaf level functions */
|
||||
ps_inter_pred_fxns->pf_inter_pred_luma_copy = ih264_inter_pred_luma_copy;
|
||||
ps_inter_pred_fxns->pf_inter_pred_luma_horz = ih264_inter_pred_luma_horz;
|
||||
ps_inter_pred_fxns->pf_inter_pred_luma_vert = ih264_inter_pred_luma_vert;
|
||||
ps_inter_pred_fxns->pf_inter_pred_luma_bilinear = ih264_inter_pred_luma_bilinear;
|
||||
ps_inter_pred_fxns->pf_inter_pred_chroma = ih264_inter_pred_chroma;
|
||||
|
||||
/* sad me level functions */
|
||||
ps_codec->apf_compute_sad_16x16[0] = ime_compute_sad_16x16;
|
||||
ps_codec->apf_compute_sad_16x16[1] = ime_compute_sad_16x16_fast;
|
||||
ps_codec->pf_compute_sad_16x8 = ime_compute_sad_16x8;
|
||||
|
||||
/* memory handling operations */
|
||||
ps_mem_fxns->pf_mem_cpy = ih264_memcpy;
|
||||
ps_mem_fxns->pf_mem_cpy_mul8 = ih264_memcpy_mul_8;
|
||||
ps_mem_fxns->pf_mem_set = ih264_memset;
|
||||
ps_mem_fxns->pf_mem_set_mul8 = ih264_memset_mul_8;
|
||||
ps_mem_fxns->pf_copy_2d = isvc_copy_2d;
|
||||
ps_mem_fxns->pf_memset_2d = isvc_memset_2d;
|
||||
ps_mem_fxns->pf_16bit_interleaved_copy = isvc_16bit_interleaved_copy;
|
||||
ps_mem_fxns->pf_16bit_interleaved_memset = isvc_16bit_interleaved_memset;
|
||||
ps_mem_fxns->pf_nonzero_checker = isvc_is_nonzero_blk;
|
||||
|
||||
/* sad me level functions */
|
||||
for(i = 0; i < (MAX_PROCESS_CTXT); i++)
|
||||
{
|
||||
ps_proc = &ps_codec->as_process[i];
|
||||
|
||||
ps_me_ctxt = &ps_proc->s_me_ctxt;
|
||||
ps_me_ctxt->pf_ime_compute_sad_16x16[0] = ime_compute_sad_16x16;
|
||||
ps_me_ctxt->pf_ime_compute_sad_16x16[1] = ime_compute_sad_16x16_fast;
|
||||
ps_me_ctxt->pf_ime_compute_sad_16x8 = ime_compute_sad_16x8;
|
||||
ps_me_ctxt->pf_ime_compute_sad4_diamond = ime_calculate_sad4_prog;
|
||||
ps_me_ctxt->pf_ime_compute_sad3_diamond = ime_calculate_sad3_prog;
|
||||
ps_me_ctxt->pf_ime_compute_sad2_diamond = ime_calculate_sad2_prog;
|
||||
ps_me_ctxt->pf_ime_sub_pel_compute_sad_16x16 = ime_sub_pel_compute_sad_16x16;
|
||||
ps_me_ctxt->pf_ime_compute_sad_stat_luma_16x16 = ime_compute_satqd_16x16_lumainter;
|
||||
}
|
||||
|
||||
/* intra mode eval -encoder level function */
|
||||
ps_codec->pf_ih264e_evaluate_intra16x16_modes = ih264e_evaluate_intra16x16_modes;
|
||||
ps_codec->pf_ih264e_evaluate_intra_chroma_modes = ih264e_evaluate_intra_chroma_modes;
|
||||
ps_codec->pf_ih264e_evaluate_intra_4x4_modes = ih264e_evaluate_intra_4x4_modes;
|
||||
|
||||
/* csc */
|
||||
ps_codec->pf_ih264e_conv_420p_to_420sp = ih264e_fmt_conv_420p_to_420sp;
|
||||
ps_codec->pf_ih264e_fmt_conv_422i_to_420sp = ih264e_fmt_conv_422i_to_420sp;
|
||||
|
||||
/* Halp pel generation function - encoder level*/
|
||||
ps_codec->pf_ih264e_sixtapfilter_horz = ih264e_sixtapfilter_horz;
|
||||
ps_codec->pf_ih264e_sixtap_filter_2dvh_vert = ih264e_sixtap_filter_2dvh_vert;
|
||||
|
||||
/* ME compute */
|
||||
ps_codec->apf_compute_me[PSLICE] = &isvce_compute_me_single_reflist;
|
||||
ps_codec->apf_compute_me[BSLICE] = &isvce_compute_me_multi_reflist;
|
||||
|
||||
/* skip decision */
|
||||
ps_codec->apf_find_skip_params_me[PSLICE] = &isvce_find_pskip_params_me;
|
||||
ps_codec->apf_find_skip_params_me[BSLICE] = &isvce_find_bskip_params_me;
|
||||
}
|
||||
48
encoder/svc/isvce_globals.c
Normal file
48
encoder/svc/isvce_globals.c
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
/**
|
||||
*******************************************************************************
|
||||
* @file
|
||||
* isvce_globals.c
|
||||
*
|
||||
* @brief
|
||||
* Contains definitions of global variables used across the encoder
|
||||
*
|
||||
* @author
|
||||
* ittiam
|
||||
*
|
||||
* @par List of functions
|
||||
*
|
||||
*
|
||||
* @remarks
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
/*****************************************************************************/
|
||||
/* File Includes */
|
||||
/*****************************************************************************/
|
||||
|
||||
#include "ih264_typedefs.h"
|
||||
#include "ih264_defs.h"
|
||||
|
||||
/* Raster to z scan map */
|
||||
const UWORD8 gau1_raster_to_zscan_map[MAX_TU_IN_MB] = {0, 1, 4, 5, 2, 3, 6, 7,
|
||||
8, 9, 12, 13, 10, 11, 14, 15};
|
||||
44
encoder/svc/isvce_globals.h
Normal file
44
encoder/svc/isvce_globals.h
Normal file
|
|
@ -0,0 +1,44 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
* @file
|
||||
* isvce_globals.h
|
||||
*
|
||||
* @brief
|
||||
* Contains declarations of global variables for H264 encoder
|
||||
*
|
||||
* @author
|
||||
* Ittiam
|
||||
*
|
||||
* @remarks
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef _ISVCE_GLOBALS_H_
|
||||
#define _ISVCE_GLOBALS_H_
|
||||
|
||||
#include "ih264e_globals.h"
|
||||
|
||||
extern const UWORD8 gau1_raster_to_zscan_map[MAX_TU_IN_MB];
|
||||
|
||||
#endif
|
||||
1378
encoder/svc/isvce_ibl_eval.c
Normal file
1378
encoder/svc/isvce_ibl_eval.c
Normal file
File diff suppressed because it is too large
Load diff
105
encoder/svc/isvce_ibl_eval.h
Normal file
105
encoder/svc/isvce_ibl_eval.h
Normal file
|
|
@ -0,0 +1,105 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
* @file
|
||||
* isvce_intra_pred.h
|
||||
*
|
||||
* @brief
|
||||
* Contains function declarations for function declared in
|
||||
*isvce_intra_pred.c
|
||||
*
|
||||
* @author
|
||||
* ittiam
|
||||
*
|
||||
* @remarks
|
||||
* None
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
#ifndef _ISVCE_IBL_EVAL_H_
|
||||
#define _ISVCE_IBL_EVAL_H_
|
||||
|
||||
#include "ih264_typedefs.h"
|
||||
#include "isvc_macros.h"
|
||||
#include "ih264_debug.h"
|
||||
#include "isvc_defs.h"
|
||||
#include "isvc_structs.h"
|
||||
#include "isvc_intra_resample.h"
|
||||
#include "isvce_structs.h"
|
||||
#include "isvce_structs.h"
|
||||
|
||||
#define TEMP_BUF_SIZE_LUMA (REF_ARRAY_WIDTH * REF_ARRAY_WIDTH)
|
||||
#define TEMP_BUF_SIZE_CB (REF_ARRAY_WIDTH * REF_ARRAY_WIDTH)
|
||||
#define TEMP_BUF_SIZE_CR (DYADIC_REF_W_C * DYADIC_REF_H_C)
|
||||
|
||||
#define INTERMEDIATE_BUFF_WIDTH 48
|
||||
#define INTERMEDIATE_BUFF_HEIGHT (MB_SIZE + 4)
|
||||
#define TEMP_INTERPOLATION_BUF_SIZE (INTERMEDIATE_BUFF_WIDTH * INTERMEDIATE_BUFF_HEIGHT)
|
||||
|
||||
/* Structs */
|
||||
typedef struct intra_pred_constants_t
|
||||
{
|
||||
void *pv_state;
|
||||
} intra_pred_constants_t;
|
||||
|
||||
typedef struct intra_pred_outputs_t
|
||||
{
|
||||
yuv_buf_props_t s_pred_buf;
|
||||
} intra_pred_outputs_t;
|
||||
|
||||
typedef struct intra_pred_variables_t
|
||||
{
|
||||
svc_ilp_data_t *ps_svc_ilp_data;
|
||||
|
||||
coordinates_t s_mb_pos;
|
||||
|
||||
UWORD8 u1_spatial_layer_id;
|
||||
} intra_pred_variables_t;
|
||||
|
||||
typedef struct svc_intra_pred_ctxt_t
|
||||
{
|
||||
intra_pred_constants_t s_intra_pred_constants;
|
||||
|
||||
intra_pred_variables_t s_intra_pred_variables;
|
||||
|
||||
intra_pred_outputs_t s_intra_pred_outputs;
|
||||
|
||||
} svc_intra_pred_ctxt_t;
|
||||
|
||||
extern UWORD32 isvce_get_svc_intra_pred_ctxt_size(UWORD8 u1_num_spatial_layers,
|
||||
DOUBLE d_spatial_res_ratio, UWORD32 u4_wd,
|
||||
UWORD32 u4_ht);
|
||||
|
||||
extern void isvce_intra_pred_ctxt_init(isvce_codec_t *ps_codec, iv_mem_rec_t *ps_mem_rec);
|
||||
|
||||
extern void isvce_update_ibl_info(svc_intra_pred_ctxt_t *ps_intra_pred_ctxt,
|
||||
UWORD8 u1_num_spatial_layers, UWORD8 u1_spatial_layer_id,
|
||||
UWORD16 u2_mb_type, WORD32 i4_mb_x, WORD32 i4_mb_y,
|
||||
WORD8 u1_base_mode_flag);
|
||||
|
||||
extern void isvce_evaluate_IBL_mode(isvce_process_ctxt_t *ps_proc);
|
||||
|
||||
extern void isvce_pad_mb_mode_buf(svc_intra_pred_ctxt_t *ps_intra_pred_ctxt,
|
||||
UWORD8 u1_spatial_layer_id, UWORD8 u1_num_spatial_layers,
|
||||
DOUBLE d_spatial_res_ratio, UWORD32 u4_wd, UWORD32 u4_ht);
|
||||
|
||||
#endif
|
||||
94
encoder/svc/isvce_ibl_private_defs.h
Normal file
94
encoder/svc/isvce_ibl_private_defs.h
Normal file
|
|
@ -0,0 +1,94 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
* @file
|
||||
* isvce_intra_pred_private_defs.h
|
||||
*
|
||||
* @brief
|
||||
* Contains datatype and macro definitions used exclusively in
|
||||
* residual prediction
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef _ISVCE_IBL_PRIVATE_DEFS_H_
|
||||
#define _ISVCE_IBL_PRIVATE_DEFS_H_
|
||||
|
||||
#include "ih264_typedefs.h"
|
||||
#include "isvc_defs.h"
|
||||
#include "isvc_structs.h"
|
||||
#include "isvce_structs.h"
|
||||
#include "isvc_intra_resample.h"
|
||||
|
||||
/* Structs */
|
||||
typedef struct intra_pred_mb_state_t
|
||||
{
|
||||
coordinates_t s_offsets;
|
||||
|
||||
coordinates_t s_ref_array_dims;
|
||||
|
||||
WORD32 *pi4_ref_array_positions_x;
|
||||
|
||||
WORD32 *pi4_ref_array_positions_y;
|
||||
|
||||
coordinates_t *ps_ref_array_phases;
|
||||
|
||||
coordinates_t s_min_pos;
|
||||
|
||||
coordinates_t s_max_pos;
|
||||
|
||||
} intra_pred_mb_state_t;
|
||||
|
||||
typedef struct intra_pred_layer_state_t
|
||||
{
|
||||
layer_resampler_props_t *ps_luma_props;
|
||||
|
||||
layer_resampler_props_t *ps_chroma_props;
|
||||
|
||||
intra_pred_mb_state_t *ps_luma_mb_states;
|
||||
|
||||
intra_pred_mb_state_t *ps_chroma_mb_states;
|
||||
|
||||
WORD8 *pi1_mb_mode;
|
||||
|
||||
WORD32 i4_mb_mode_stride;
|
||||
|
||||
/* buffer to store the reference
|
||||
layer data before intra sampling */
|
||||
UWORD8 *pu1_refarray_buffer;
|
||||
|
||||
UWORD8 *pu1_refarray_cb;
|
||||
|
||||
UWORD8 *pu1_refarray_cr;
|
||||
|
||||
WORD32 *pi4_temp_interpolation_buffer;
|
||||
|
||||
} intra_pred_layer_state_t;
|
||||
|
||||
typedef struct intra_pred_state_t
|
||||
{
|
||||
/* Array of size numSpatialLayers */
|
||||
intra_pred_layer_state_t *ps_layer_state;
|
||||
|
||||
} intra_pred_state_t;
|
||||
|
||||
#endif
|
||||
737
encoder/svc/isvce_ilp_mv.c
Normal file
737
encoder/svc/isvce_ilp_mv.c
Normal file
|
|
@ -0,0 +1,737 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
* @file
|
||||
* isvce_ilp_mv.c
|
||||
*
|
||||
* @brief
|
||||
* Contains functions used for deriving inter_layer MV's
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
#include <stdint.h>
|
||||
#include <math.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "ih264_typedefs.h"
|
||||
#include "ih264_debug.h"
|
||||
#include "isvc_macros.h"
|
||||
#include "isvc_defs.h"
|
||||
#include "isvce_defs.h"
|
||||
#include "isvce_structs.h"
|
||||
#include "isvce_ilp_mv_private_defs.h"
|
||||
#include "isvce_ilp_mv.h"
|
||||
#include "isvce_ilp_mv_utils.h"
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* Returns size of buffers for storing ILP MV ctxt
|
||||
*
|
||||
* @param[in] u1_num_spatial_layers
|
||||
* Num Spatial Layers
|
||||
*
|
||||
* @param[in] d_spatial_res_ratio
|
||||
* Resolution Ratio b/w spatial layers
|
||||
*
|
||||
* @param[in] u4_wd
|
||||
* Input Width
|
||||
*
|
||||
* @param[in] u4_ht
|
||||
* Input Height
|
||||
*
|
||||
* @returns Size of buffers
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
UWORD32 isvce_get_ilp_mv_ctxt_size(UWORD8 u1_num_spatial_layers, DOUBLE d_spatial_res_ratio,
|
||||
UWORD32 u4_wd, UWORD32 u4_ht)
|
||||
{
|
||||
UWORD32 u4_size = 0;
|
||||
|
||||
if(u1_num_spatial_layers > 1)
|
||||
{
|
||||
WORD32 i;
|
||||
|
||||
u4_size += MAX_PROCESS_CTXT * sizeof(svc_ilp_mv_ctxt_t);
|
||||
u4_size += MAX_PROCESS_CTXT * sizeof(ilp_mv_state_t);
|
||||
|
||||
u4_size += u1_num_spatial_layers * sizeof(ilp_mv_layer_state_t);
|
||||
|
||||
for(i = u1_num_spatial_layers - 1; i >= 1; i--)
|
||||
{
|
||||
WORD32 i4_layer_luma_wd =
|
||||
(WORD32) ((DOUBLE) u4_wd /
|
||||
pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - i)) +
|
||||
0.99;
|
||||
WORD32 i4_layer_luma_ht =
|
||||
((DOUBLE) u4_ht / pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - i)) + 0.99;
|
||||
WORD32 i4_layer_luma_mbs = (i4_layer_luma_wd / MB_SIZE) * (i4_layer_luma_ht / MB_SIZE);
|
||||
|
||||
u4_size += i4_layer_luma_mbs * sizeof(ilp_mv_mb_state_t);
|
||||
}
|
||||
}
|
||||
|
||||
return u4_size;
|
||||
}
|
||||
|
||||
static FORCEINLINE void isvce_ref_layer_pu_and_mb_pos_init(layer_resampler_props_t *ps_layer_props,
|
||||
ilp_mv_mb_state_t *ps_mb_state,
|
||||
coordinates_t *ps_mb_pos,
|
||||
UWORD32 u4_ref_wd, UWORD32 u4_ref_ht,
|
||||
UWORD8 u1_field_pic_flag,
|
||||
UWORD8 u1_field_mb_flag)
|
||||
{
|
||||
UWORD32 i, j;
|
||||
|
||||
coordinates_t(*aps_pu_positions)[MAX_PU_IN_MB_ROW] = ps_mb_state->as_pu_positions;
|
||||
coordinates_t(*aps_mb_positions)[MAX_PU_IN_MB_ROW] = ps_mb_state->as_mb_positions;
|
||||
|
||||
for(i = 0; i < MAX_PU_IN_MB_COL; i++)
|
||||
{
|
||||
UWORD32 u4_y_ref16;
|
||||
|
||||
UWORD32 u4_yc = ps_mb_pos->i4_ordinate * ps_layer_props->u4_mb_ht +
|
||||
(4 * i + 1) * (1 + u1_field_mb_flag - u1_field_pic_flag);
|
||||
|
||||
u4_y_ref16 =
|
||||
(u4_yc * ps_layer_props->u4_scale_y + (1 << (ps_layer_props->u4_shift_y - 1))) >>
|
||||
ps_layer_props->u4_shift_y;
|
||||
u4_y_ref16 = MIN(u4_y_ref16, u4_ref_ht - 1);
|
||||
|
||||
for(j = 0; j < MAX_PU_IN_MB_ROW; j++)
|
||||
{
|
||||
UWORD32 u4_x_ref16;
|
||||
|
||||
UWORD32 u4_xc = ps_mb_pos->i4_abscissa * ps_layer_props->u4_mb_wd + 4 * j + 1;
|
||||
|
||||
u4_x_ref16 =
|
||||
(u4_xc * ps_layer_props->u4_scale_x + (1 << (ps_layer_props->u4_shift_x - 1))) >>
|
||||
ps_layer_props->u4_shift_x;
|
||||
u4_x_ref16 = MIN(u4_x_ref16, u4_ref_wd - 1);
|
||||
|
||||
aps_pu_positions[i][j].i4_abscissa = u4_x_ref16;
|
||||
aps_pu_positions[i][j].i4_ordinate = u4_y_ref16;
|
||||
|
||||
aps_mb_positions[i][j].i4_abscissa = (u4_x_ref16 / MB_SIZE);
|
||||
aps_mb_positions[i][j].i4_ordinate = (u4_y_ref16 / MB_SIZE);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void isvce_ilp_mv_layer_state_init(ilp_mv_layer_state_t *ps_layer_state,
|
||||
DOUBLE d_spatial_res_ratio, UWORD32 u4_wd, UWORD32 u4_ht)
|
||||
{
|
||||
UWORD32 i, j;
|
||||
|
||||
const UWORD8 u1_ref_layer_field_pic_flag = 0;
|
||||
const UWORD8 u1_field_pic_flag = 0;
|
||||
const UWORD8 u1_field_mb_flag = 0;
|
||||
|
||||
ilp_mv_mb_state_t *ps_mb_states;
|
||||
layer_resampler_props_t *ps_layer_props;
|
||||
|
||||
UWORD32 u4_wd_in_mbs;
|
||||
UWORD32 u4_ht_in_mbs;
|
||||
|
||||
UWORD32 u4_ref_wd = (u4_wd / d_spatial_res_ratio);
|
||||
UWORD32 u4_ref_ht = (u4_ht / d_spatial_res_ratio) * (1 + u1_ref_layer_field_pic_flag);
|
||||
UWORD32 u4_scaled_wd = u4_wd;
|
||||
UWORD32 u4_scaled_ht = u4_ht * (1 + u1_field_pic_flag);
|
||||
|
||||
ps_mb_states = ps_layer_state->ps_mb_states;
|
||||
ps_layer_props = ps_layer_state->ps_props;
|
||||
|
||||
u4_wd_in_mbs = u4_scaled_wd / ps_layer_props->u4_mb_wd;
|
||||
u4_ht_in_mbs = u4_scaled_ht / ps_layer_props->u4_mb_ht;
|
||||
|
||||
ps_layer_state->s_mv_scale.i4_abscissa = ((u4_scaled_wd << 16) + (u4_ref_wd >> 1)) / u4_ref_wd;
|
||||
ps_layer_state->s_mv_scale.i4_ordinate = ((u4_scaled_ht << 16) + (u4_ref_ht >> 1)) / u4_ref_ht;
|
||||
|
||||
for(i = 0; i < u4_ht_in_mbs; i++)
|
||||
{
|
||||
for(j = 0; j < u4_wd_in_mbs; j++)
|
||||
{
|
||||
coordinates_t s_mb_pos = {j, i};
|
||||
|
||||
isvce_ref_layer_pu_and_mb_pos_init(ps_layer_props, &ps_mb_states[j + i * u4_wd_in_mbs],
|
||||
&s_mb_pos, u4_ref_wd, u4_ref_ht, u1_field_pic_flag,
|
||||
u1_field_mb_flag);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* Function to initialize svc ilp buffers
|
||||
*
|
||||
* @param[in] ps_codec
|
||||
* Pointer to codec context
|
||||
*
|
||||
* @param[in] ps_mem_rec
|
||||
* Pointer to memory allocated for input buffers
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
void isvce_ilp_mv_ctxt_init(isvce_codec_t *ps_codec, iv_mem_rec_t *ps_mem_rec)
|
||||
{
|
||||
WORD32 i, j;
|
||||
|
||||
const WORD32 i4_num_proc_ctxts = sizeof(ps_codec->as_process) / sizeof(ps_codec->as_process[0]);
|
||||
UWORD8 u1_num_spatial_layers = ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers;
|
||||
|
||||
if(u1_num_spatial_layers > 1)
|
||||
{
|
||||
ilp_mv_layer_state_t *ps_layer_states;
|
||||
ilp_mv_mb_state_t *aps_luma_mb_states[MAX_NUM_SPATIAL_LAYERS];
|
||||
|
||||
DOUBLE d_spatial_res_ratio = ps_codec->s_cfg.s_svc_params.d_spatial_res_ratio;
|
||||
UWORD32 u4_wd = ps_codec->s_cfg.u4_wd;
|
||||
UWORD32 u4_ht = ps_codec->s_cfg.u4_ht;
|
||||
UWORD8 *pu1_buf = ps_mem_rec->pv_base;
|
||||
WORD64 i8_alloc_mem_size =
|
||||
isvce_get_ilp_mv_ctxt_size(u1_num_spatial_layers, d_spatial_res_ratio, u4_wd, u4_ht);
|
||||
|
||||
for(i = 0; i < i4_num_proc_ctxts; i++)
|
||||
{
|
||||
ilp_mv_state_t *ps_ilp_mv_state;
|
||||
svc_ilp_mv_ctxt_t *ps_ilp_mv_ctxt;
|
||||
|
||||
isvce_process_ctxt_t *ps_proc = ps_codec->as_process + i;
|
||||
|
||||
ps_ilp_mv_ctxt = ps_proc->ps_svc_ilp_mv_ctxt = (svc_ilp_mv_ctxt_t *) pu1_buf;
|
||||
pu1_buf += sizeof(svc_ilp_mv_ctxt_t);
|
||||
i8_alloc_mem_size -= sizeof(svc_ilp_mv_ctxt_t);
|
||||
|
||||
ps_ilp_mv_ctxt->s_ilp_mv_constants.pv_state = pu1_buf;
|
||||
ps_ilp_mv_state = (ilp_mv_state_t *) pu1_buf;
|
||||
pu1_buf += sizeof(ilp_mv_state_t);
|
||||
i8_alloc_mem_size -= sizeof(ilp_mv_state_t);
|
||||
|
||||
if(0 == i)
|
||||
{
|
||||
ps_ilp_mv_state->ps_layer_state = (ilp_mv_layer_state_t *) pu1_buf;
|
||||
ps_layer_states = ps_ilp_mv_state->ps_layer_state;
|
||||
pu1_buf += u1_num_spatial_layers * sizeof(ps_ilp_mv_state->ps_layer_state[0]);
|
||||
i8_alloc_mem_size -=
|
||||
u1_num_spatial_layers * sizeof(ps_ilp_mv_state->ps_layer_state[0]);
|
||||
}
|
||||
else
|
||||
{
|
||||
ps_ilp_mv_state->ps_layer_state = ps_layer_states;
|
||||
}
|
||||
|
||||
ASSERT(i8_alloc_mem_size >= 0);
|
||||
|
||||
if(0 == i)
|
||||
{
|
||||
for(j = u1_num_spatial_layers - 1; j >= 1; j--)
|
||||
{
|
||||
ilp_mv_layer_state_t *ps_layer = &ps_ilp_mv_state->ps_layer_state[j];
|
||||
|
||||
WORD32 i4_layer_luma_wd =
|
||||
((DOUBLE) u4_wd / pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - j)) +
|
||||
0.99;
|
||||
WORD32 i4_layer_luma_ht =
|
||||
((DOUBLE) u4_ht / pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - j)) +
|
||||
0.99;
|
||||
WORD32 i4_layer_luma_mbs =
|
||||
(i4_layer_luma_wd / MB_SIZE) * (i4_layer_luma_ht / MB_SIZE);
|
||||
|
||||
ps_layer->ps_mb_states = (ilp_mv_mb_state_t *) pu1_buf;
|
||||
aps_luma_mb_states[j] = ps_layer->ps_mb_states;
|
||||
pu1_buf += i4_layer_luma_mbs * sizeof(ps_layer->ps_mb_states[0]);
|
||||
i8_alloc_mem_size -= u1_num_spatial_layers * sizeof(ps_layer->ps_mb_states[0]);
|
||||
|
||||
ASSERT(i8_alloc_mem_size >= 0);
|
||||
/* Asserts below verify that
|
||||
* 'ps_codec->s_svc_ilp_data.aps_layer_resampler_props' is initialised
|
||||
*/
|
||||
ASSERT(ps_codec->s_svc_ilp_data.aps_layer_resampler_props[Y][j].u4_mb_wd ==
|
||||
MB_SIZE);
|
||||
|
||||
ps_layer->ps_props = &ps_codec->s_svc_ilp_data.aps_layer_resampler_props[Y][j];
|
||||
|
||||
isvce_ilp_mv_layer_state_init(ps_layer, d_spatial_res_ratio, i4_layer_luma_wd,
|
||||
i4_layer_luma_ht);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for(j = u1_num_spatial_layers - 1; j >= 1; j--)
|
||||
{
|
||||
ilp_mv_layer_state_t *ps_layer = &ps_ilp_mv_state->ps_layer_state[j];
|
||||
|
||||
ps_layer->ps_mb_states = aps_luma_mb_states[j];
|
||||
|
||||
ps_layer->ps_props = &ps_codec->s_svc_ilp_data.aps_layer_resampler_props[Y][j];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for(i = 0; i < i4_num_proc_ctxts; i++)
|
||||
{
|
||||
ps_codec->as_process[i].ps_svc_ilp_mv_ctxt = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void isvce_get_ilp_mvs_for_me(svc_ilp_mv_ctxt_t *ps_ilp_mv_ctxt)
|
||||
{
|
||||
svc_layer_data_t *ps_ref_layer_data;
|
||||
ilp_mv_layer_state_t *ps_layer_state;
|
||||
ilp_mv_mb_state_t *ps_mb_state;
|
||||
isvce_mb_info_t *ps_ref_mb_info;
|
||||
coordinates_t s_frame_dims;
|
||||
coordinates_t s_frame_dims_in_mbs;
|
||||
coordinates_t s_ref_frame_dims;
|
||||
coordinates_t s_ref_frame_dims_in_mbs;
|
||||
|
||||
bool b_is_mv_non_identical;
|
||||
WORD32 i, j, k;
|
||||
|
||||
ilp_mv_constants_t *ps_ilp_mv_constants = &ps_ilp_mv_ctxt->s_ilp_mv_constants;
|
||||
ilp_mv_variables_t *ps_ilp_mv_variables = &ps_ilp_mv_ctxt->s_ilp_mv_variables;
|
||||
ilp_mv_outputs_t *ps_ilp_mv_outputs = &ps_ilp_mv_ctxt->s_ilp_mv_outputs;
|
||||
ilp_mv_state_t *ps_ilp_mv_state = (ilp_mv_state_t *) ps_ilp_mv_constants->pv_state;
|
||||
svc_ilp_data_t *ps_svc_ilp_data = ps_ilp_mv_variables->ps_svc_ilp_data;
|
||||
svc_au_data_t *ps_svc_au_data = ps_svc_ilp_data->ps_svc_au_data;
|
||||
coordinates_t *ps_mb_pos = &ps_ilp_mv_variables->s_mb_pos;
|
||||
const isvce_enc_pu_mv_t s_default_mv = {{0, 0}, -1};
|
||||
|
||||
UWORD8 u1_spatial_layer_id = ps_ilp_mv_variables->u1_spatial_layer_id;
|
||||
WORD32 i4_num_ilp_mvs = 0;
|
||||
|
||||
s_frame_dims.i4_abscissa = ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id].u4_width;
|
||||
s_frame_dims.i4_ordinate = ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id].u4_height;
|
||||
s_frame_dims_in_mbs.i4_abscissa = s_frame_dims.i4_abscissa / MB_SIZE;
|
||||
s_frame_dims_in_mbs.i4_ordinate = s_frame_dims.i4_ordinate / MB_SIZE;
|
||||
s_ref_frame_dims.i4_abscissa =
|
||||
ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id - 1].u4_width;
|
||||
s_ref_frame_dims.i4_ordinate =
|
||||
ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id - 1].u4_height;
|
||||
s_ref_frame_dims_in_mbs.i4_abscissa = s_ref_frame_dims.i4_abscissa / MB_SIZE;
|
||||
s_ref_frame_dims_in_mbs.i4_ordinate = s_ref_frame_dims.i4_ordinate / MB_SIZE;
|
||||
|
||||
ps_ref_layer_data = &ps_svc_au_data->ps_svc_layer_data[u1_spatial_layer_id - 1];
|
||||
ps_layer_state = &ps_ilp_mv_state->ps_layer_state[u1_spatial_layer_id];
|
||||
ps_mb_state =
|
||||
&ps_layer_state->ps_mb_states[ps_mb_pos->i4_abscissa +
|
||||
ps_mb_pos->i4_ordinate * s_frame_dims_in_mbs.i4_abscissa];
|
||||
|
||||
for(i = 0; i < MAX_PU_IN_MB_COL; i++)
|
||||
{
|
||||
for(j = 0; j < MAX_PU_IN_MB_ROW; j++)
|
||||
{
|
||||
b_is_mv_non_identical = true;
|
||||
|
||||
ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L0] = s_default_mv;
|
||||
ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L1] = s_default_mv;
|
||||
|
||||
ps_ref_mb_info =
|
||||
&ps_ref_layer_data->ps_mb_info[ps_mb_state->as_mb_positions[i][j].i4_abscissa +
|
||||
ps_mb_state->as_mb_positions[i][j].i4_ordinate *
|
||||
s_ref_frame_dims_in_mbs.i4_abscissa];
|
||||
|
||||
if((ps_ref_mb_info->u2_mb_type == P16x16) || (ps_ref_mb_info->u2_mb_type == B16x16))
|
||||
{
|
||||
ps_ilp_mv_outputs->s_ilp_me_cands.e_mb_type[i4_num_ilp_mvs] =
|
||||
ps_ref_mb_info->u2_mb_type;
|
||||
|
||||
ps_ilp_mv_outputs->s_ilp_me_cands.ae_pred_mode[i4_num_ilp_mvs] =
|
||||
ps_ref_mb_info->as_pu->u1_pred_mode;
|
||||
|
||||
if(ps_ilp_mv_outputs->s_ilp_me_cands.ae_pred_mode[i4_num_ilp_mvs] != L0)
|
||||
{
|
||||
ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L1] =
|
||||
ps_ref_mb_info->as_pu->as_me_info[L1];
|
||||
|
||||
ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L1].s_mv.i2_mvx =
|
||||
(ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L1].s_mv.i2_mvx *
|
||||
ps_layer_state->s_mv_scale.i4_abscissa +
|
||||
32768) >>
|
||||
16;
|
||||
ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L1].s_mv.i2_mvy =
|
||||
(ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L1].s_mv.i2_mvy *
|
||||
ps_layer_state->s_mv_scale.i4_ordinate +
|
||||
32768) >>
|
||||
16;
|
||||
}
|
||||
|
||||
if(ps_ilp_mv_outputs->s_ilp_me_cands.ae_pred_mode[i4_num_ilp_mvs] != L1)
|
||||
{
|
||||
ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L0] =
|
||||
ps_ref_mb_info->as_pu->as_me_info[L0];
|
||||
|
||||
ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L0].s_mv.i2_mvx =
|
||||
(ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L0].s_mv.i2_mvx *
|
||||
ps_layer_state->s_mv_scale.i4_abscissa +
|
||||
32768) >>
|
||||
16;
|
||||
ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L0].s_mv.i2_mvy =
|
||||
(ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L0].s_mv.i2_mvy *
|
||||
ps_layer_state->s_mv_scale.i4_ordinate +
|
||||
32768) >>
|
||||
16;
|
||||
}
|
||||
|
||||
if(i4_num_ilp_mvs == 0)
|
||||
{
|
||||
i4_num_ilp_mvs++;
|
||||
}
|
||||
else
|
||||
{
|
||||
for(k = i4_num_ilp_mvs - 1; k >= 0; k--)
|
||||
{
|
||||
if((ps_ilp_mv_outputs->s_ilp_me_cands.e_mb_type[k] ==
|
||||
ps_ilp_mv_outputs->s_ilp_me_cands.e_mb_type[i4_num_ilp_mvs]) &&
|
||||
(ps_ilp_mv_outputs->s_ilp_me_cands.ae_pred_mode[k] ==
|
||||
ps_ilp_mv_outputs->s_ilp_me_cands.ae_pred_mode[i4_num_ilp_mvs]) &&
|
||||
isvce_check_identical_mv(
|
||||
ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[k],
|
||||
ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs],
|
||||
ps_ilp_mv_outputs->s_ilp_me_cands.ae_pred_mode[k]))
|
||||
{
|
||||
b_is_mv_non_identical = false;
|
||||
}
|
||||
}
|
||||
|
||||
if(b_is_mv_non_identical)
|
||||
{
|
||||
i4_num_ilp_mvs++;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
ps_ilp_mv_outputs->s_ilp_me_cands.e_mb_type[i4_num_ilp_mvs] = INVALID_MB_TYPE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ps_ilp_mv_outputs->s_ilp_me_cands.u4_num_ilp_mvs = i4_num_ilp_mvs;
|
||||
|
||||
for(i = 0; i < MAX_ILP_MV_IN_NBR_RGN; i++)
|
||||
{
|
||||
b_is_mv_non_identical = true;
|
||||
|
||||
ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L0] = s_default_mv;
|
||||
ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L1] = s_default_mv;
|
||||
|
||||
if(ps_mb_pos->i4_abscissa + gai1_nbr_ilp_mv_map[i][0] >= 0 &&
|
||||
ps_mb_pos->i4_abscissa + gai1_nbr_ilp_mv_map[i][0] < s_frame_dims_in_mbs.i4_abscissa &&
|
||||
ps_mb_pos->i4_ordinate + gai1_nbr_ilp_mv_map[i][1] >= 0 &&
|
||||
ps_mb_pos->i4_ordinate + gai1_nbr_ilp_mv_map[i][1] < s_frame_dims_in_mbs.i4_ordinate)
|
||||
{
|
||||
ps_mb_state =
|
||||
&ps_layer_state->ps_mb_states[(ps_mb_pos->i4_abscissa + gai1_nbr_ilp_mv_map[i][0]) +
|
||||
(ps_mb_pos->i4_ordinate + gai1_nbr_ilp_mv_map[i][1]) *
|
||||
s_frame_dims_in_mbs.i4_abscissa];
|
||||
|
||||
ps_ref_mb_info =
|
||||
&ps_ref_layer_data->ps_mb_info[(ps_mb_state
|
||||
->as_mb_positions[gai1_nbr_ilp_mv_map[i][2]]
|
||||
[gai1_nbr_ilp_mv_map[i][3]]
|
||||
.i4_abscissa) +
|
||||
ps_mb_state
|
||||
->as_mb_positions[gai1_nbr_ilp_mv_map[i][2]]
|
||||
[gai1_nbr_ilp_mv_map[i][3]]
|
||||
.i4_ordinate *
|
||||
s_ref_frame_dims_in_mbs.i4_abscissa];
|
||||
|
||||
if((ps_ref_mb_info->u2_mb_type == P16x16) || (ps_ref_mb_info->u2_mb_type == B16x16))
|
||||
{
|
||||
ps_ilp_mv_outputs->s_ilp_me_cands.e_mb_type[i4_num_ilp_mvs] =
|
||||
ps_ref_mb_info->u2_mb_type;
|
||||
|
||||
ps_ilp_mv_outputs->s_ilp_me_cands.ae_pred_mode[i4_num_ilp_mvs] =
|
||||
ps_ref_mb_info->as_pu->u1_pred_mode;
|
||||
|
||||
if(ps_ilp_mv_outputs->s_ilp_me_cands.ae_pred_mode[i4_num_ilp_mvs] != L0)
|
||||
{
|
||||
ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L1] =
|
||||
ps_ref_mb_info->as_pu->as_me_info[L1];
|
||||
|
||||
ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L1].s_mv.i2_mvx =
|
||||
(ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L1].s_mv.i2_mvx *
|
||||
ps_layer_state->s_mv_scale.i4_abscissa +
|
||||
32768) >>
|
||||
16;
|
||||
ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L1].s_mv.i2_mvy =
|
||||
(ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L1].s_mv.i2_mvy *
|
||||
ps_layer_state->s_mv_scale.i4_ordinate +
|
||||
32768) >>
|
||||
16;
|
||||
}
|
||||
|
||||
if(ps_ilp_mv_outputs->s_ilp_me_cands.ae_pred_mode[i4_num_ilp_mvs] != L1)
|
||||
{
|
||||
ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L0] =
|
||||
ps_ref_mb_info->as_pu->as_me_info[L0];
|
||||
|
||||
ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L0].s_mv.i2_mvx =
|
||||
(ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L0].s_mv.i2_mvx *
|
||||
ps_layer_state->s_mv_scale.i4_abscissa +
|
||||
32768) >>
|
||||
16;
|
||||
ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L0].s_mv.i2_mvy =
|
||||
(ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L0].s_mv.i2_mvy *
|
||||
ps_layer_state->s_mv_scale.i4_ordinate +
|
||||
32768) >>
|
||||
16;
|
||||
}
|
||||
|
||||
if(i4_num_ilp_mvs == 0)
|
||||
{
|
||||
i4_num_ilp_mvs++;
|
||||
}
|
||||
else
|
||||
{
|
||||
for(k = i4_num_ilp_mvs - 1; k >= 0; k--)
|
||||
{
|
||||
if((ps_ilp_mv_outputs->s_ilp_me_cands.e_mb_type[k] ==
|
||||
ps_ilp_mv_outputs->s_ilp_me_cands.e_mb_type[i4_num_ilp_mvs]) &&
|
||||
(ps_ilp_mv_outputs->s_ilp_me_cands.ae_pred_mode[k] ==
|
||||
ps_ilp_mv_outputs->s_ilp_me_cands.ae_pred_mode[i4_num_ilp_mvs]) &&
|
||||
isvce_check_identical_mv(
|
||||
ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[k],
|
||||
ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs],
|
||||
ps_ilp_mv_outputs->s_ilp_me_cands.ae_pred_mode[k]))
|
||||
b_is_mv_non_identical = false;
|
||||
}
|
||||
|
||||
if(b_is_mv_non_identical)
|
||||
{
|
||||
i4_num_ilp_mvs++;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
ps_ilp_mv_outputs->s_ilp_me_cands.e_mb_type[i4_num_ilp_mvs] = INVALID_MB_TYPE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ps_ilp_mv_outputs->s_ilp_me_cands.u4_num_ilp_mvs_incl_nbrs = i4_num_ilp_mvs;
|
||||
}
|
||||
|
||||
void isvce_get_mb_ilp_mv(svc_ilp_mv_ctxt_t *ps_ilp_mv_ctxt)
|
||||
{
|
||||
svc_layer_data_t *ps_ref_layer_data;
|
||||
ilp_mv_layer_state_t *ps_layer_state;
|
||||
ilp_mv_mb_state_t *ps_mb_state;
|
||||
isvce_mb_info_t *ps_ref_mb_info;
|
||||
coordinates_t s_frame_dims;
|
||||
coordinates_t s_frame_dims_in_mbs;
|
||||
coordinates_t s_ref_frame_dims;
|
||||
coordinates_t s_ref_frame_dims_in_mbs;
|
||||
|
||||
WORD32 i, j;
|
||||
|
||||
ilp_mv_constants_t *ps_ilp_mv_constants = &ps_ilp_mv_ctxt->s_ilp_mv_constants;
|
||||
ilp_mv_variables_t *ps_ilp_mv_variables = &ps_ilp_mv_ctxt->s_ilp_mv_variables;
|
||||
ilp_mv_outputs_t *ps_ilp_mv_outputs = &ps_ilp_mv_ctxt->s_ilp_mv_outputs;
|
||||
ilp_mv_state_t *ps_ilp_mv_state = (ilp_mv_state_t *) ps_ilp_mv_constants->pv_state;
|
||||
svc_ilp_data_t *ps_svc_ilp_data = ps_ilp_mv_variables->ps_svc_ilp_data;
|
||||
svc_au_data_t *ps_svc_au_data = ps_svc_ilp_data->ps_svc_au_data;
|
||||
coordinates_t *ps_mb_pos = &ps_ilp_mv_variables->s_mb_pos;
|
||||
const isvce_enc_pu_mv_t s_default_mv = {{0, 0}, -1};
|
||||
|
||||
UWORD8 u1_spatial_layer_id = ps_ilp_mv_variables->u1_spatial_layer_id;
|
||||
|
||||
s_frame_dims.i4_abscissa = ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id].u4_width;
|
||||
s_frame_dims.i4_ordinate = ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id].u4_height;
|
||||
s_frame_dims_in_mbs.i4_abscissa = s_frame_dims.i4_abscissa / MB_SIZE;
|
||||
s_frame_dims_in_mbs.i4_ordinate = s_frame_dims.i4_ordinate / MB_SIZE;
|
||||
s_ref_frame_dims.i4_abscissa =
|
||||
ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id - 1].u4_width;
|
||||
s_ref_frame_dims.i4_ordinate =
|
||||
ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id - 1].u4_height;
|
||||
s_ref_frame_dims_in_mbs.i4_abscissa = s_ref_frame_dims.i4_abscissa / MB_SIZE;
|
||||
s_ref_frame_dims_in_mbs.i4_ordinate = s_ref_frame_dims.i4_ordinate / MB_SIZE;
|
||||
|
||||
ps_ref_layer_data = &ps_svc_au_data->ps_svc_layer_data[u1_spatial_layer_id - 1];
|
||||
ps_layer_state = &ps_ilp_mv_state->ps_layer_state[u1_spatial_layer_id];
|
||||
ps_mb_state =
|
||||
&ps_layer_state->ps_mb_states[ps_mb_pos->i4_abscissa +
|
||||
ps_mb_pos->i4_ordinate * s_frame_dims_in_mbs.i4_abscissa];
|
||||
|
||||
ps_ilp_mv_outputs->s_ilp_mv.as_mv[0][L0] = s_default_mv;
|
||||
ps_ilp_mv_outputs->s_ilp_mv.as_mv[0][L1] = s_default_mv;
|
||||
|
||||
ps_ref_mb_info = &ps_ref_layer_data->ps_mb_info[ps_mb_state->as_mb_positions[0][0].i4_abscissa +
|
||||
ps_mb_state->as_mb_positions[0][0].i4_ordinate *
|
||||
s_ref_frame_dims_in_mbs.i4_abscissa];
|
||||
|
||||
if((ps_ref_mb_info->u2_mb_type == P16x16) || (ps_ref_mb_info->u2_mb_type == B16x16))
|
||||
{
|
||||
ps_ilp_mv_outputs->s_ilp_mv.e_mb_type = ps_ref_mb_info->u2_mb_type;
|
||||
|
||||
ps_ilp_mv_outputs->s_ilp_mv.ae_pred_mode[0] = ps_ref_mb_info->as_pu->u1_pred_mode;
|
||||
|
||||
if(ps_ilp_mv_outputs->s_ilp_mv.ae_pred_mode[0] != L0)
|
||||
{
|
||||
ps_ilp_mv_outputs->s_ilp_mv.as_mv[0][L1] = ps_ref_mb_info->as_pu->as_me_info[L1];
|
||||
}
|
||||
|
||||
if(ps_ilp_mv_outputs->s_ilp_mv.ae_pred_mode[0] != L1)
|
||||
{
|
||||
ps_ilp_mv_outputs->s_ilp_mv.as_mv[0][L0] = ps_ref_mb_info->as_pu->as_me_info[L0];
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
ps_ilp_mv_outputs->s_ilp_mv.e_mb_type = INVALID_MB_TYPE;
|
||||
}
|
||||
|
||||
/* Function call to get non 16x16 ilp mvs for me candidates */
|
||||
isvce_get_ilp_mvs_for_me(ps_ilp_mv_ctxt);
|
||||
|
||||
/* Encoder supports only 16x16 partition. */
|
||||
/* The code below ensures only 16x16 ILP MV's are used */
|
||||
for(i = 0; i < MAX_PU_IN_MB_COL; i++)
|
||||
{
|
||||
for(j = 0; j < MAX_PU_IN_MB_ROW; j++)
|
||||
{
|
||||
bool b_unsupported_mv;
|
||||
|
||||
ps_ref_mb_info =
|
||||
&ps_ref_layer_data->ps_mb_info[ps_mb_state->as_mb_positions[i][j].i4_abscissa +
|
||||
ps_mb_state->as_mb_positions[i][j].i4_ordinate *
|
||||
s_ref_frame_dims_in_mbs.i4_abscissa];
|
||||
|
||||
b_unsupported_mv =
|
||||
(ps_ref_mb_info->u2_mb_type != ps_ilp_mv_outputs->s_ilp_mv.e_mb_type) ||
|
||||
(ps_ilp_mv_outputs->s_ilp_mv.ae_pred_mode[0] !=
|
||||
ps_ref_mb_info->as_pu->u1_pred_mode) ||
|
||||
!isvce_check_identical_mv(ps_ilp_mv_outputs->s_ilp_mv.as_mv[0],
|
||||
ps_ref_mb_info->as_pu->as_me_info,
|
||||
ps_ilp_mv_outputs->s_ilp_mv.ae_pred_mode[0]);
|
||||
|
||||
if(b_unsupported_mv)
|
||||
{
|
||||
ps_ilp_mv_outputs->s_ilp_mv.as_mv[0][L0] = s_default_mv;
|
||||
ps_ilp_mv_outputs->s_ilp_mv.as_mv[0][L1] = s_default_mv;
|
||||
ps_ilp_mv_outputs->s_ilp_mv.e_mb_type = INVALID_MB_TYPE;
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(ps_ilp_mv_outputs->s_ilp_mv.e_mb_type != INVALID_MB_TYPE)
|
||||
{
|
||||
if(ps_ilp_mv_outputs->s_ilp_mv.ae_pred_mode[0] != L0)
|
||||
{
|
||||
ps_ilp_mv_outputs->s_ilp_mv.as_mv[0][L1].s_mv.i2_mvx =
|
||||
(ps_ilp_mv_outputs->s_ilp_mv.as_mv[0][L1].s_mv.i2_mvx *
|
||||
ps_layer_state->s_mv_scale.i4_abscissa +
|
||||
32768) >>
|
||||
16;
|
||||
ps_ilp_mv_outputs->s_ilp_mv.as_mv[0][L1].s_mv.i2_mvy =
|
||||
(ps_ilp_mv_outputs->s_ilp_mv.as_mv[0][L1].s_mv.i2_mvy *
|
||||
ps_layer_state->s_mv_scale.i4_ordinate +
|
||||
32768) >>
|
||||
16;
|
||||
}
|
||||
|
||||
if(ps_ilp_mv_outputs->s_ilp_mv.ae_pred_mode[0] != L1)
|
||||
{
|
||||
ps_ilp_mv_outputs->s_ilp_mv.as_mv[0][L0].s_mv.i2_mvx =
|
||||
(ps_ilp_mv_outputs->s_ilp_mv.as_mv[0][L0].s_mv.i2_mvx *
|
||||
ps_layer_state->s_mv_scale.i4_abscissa +
|
||||
32768) >>
|
||||
16;
|
||||
ps_ilp_mv_outputs->s_ilp_mv.as_mv[0][L0].s_mv.i2_mvy =
|
||||
(ps_ilp_mv_outputs->s_ilp_mv.as_mv[0][L0].s_mv.i2_mvy *
|
||||
ps_layer_state->s_mv_scale.i4_ordinate +
|
||||
32768) >>
|
||||
16;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
ps_ilp_mv_outputs->s_ilp_mv.e_mb_type = INVALID_MB_TYPE;
|
||||
ps_ilp_mv_outputs->s_ilp_mv.ae_pred_mode[0] = INVALID_PRED_MODE;
|
||||
}
|
||||
}
|
||||
|
||||
void isvce_mvp_idx_eval(isvce_mb_info_t *ps_mb_info, isvce_enc_pu_mv_t *ps_spatial_mvp,
|
||||
isvce_enc_pu_mv_t *ps_ilp_mvp, UWORD8 *pu1_mvd_costs)
|
||||
{
|
||||
if(USE_ILP_MV_AS_MVP && ps_ilp_mvp && !ps_mb_info->u1_is_intra &&
|
||||
(ps_mb_info->u2_mb_type != PSKIP) && (ps_mb_info->u2_mb_type != BSKIP) &&
|
||||
(ps_mb_info->u2_mb_type != BASE_MODE))
|
||||
{
|
||||
isvce_enc_pu_mv_t *ps_mv;
|
||||
isvce_enc_pu_mv_t *aps_mvps[2];
|
||||
|
||||
WORD32 ai4_mvd_costs[2];
|
||||
WORD32 i, j;
|
||||
|
||||
for(i = 0; i < NUM_PRED_DIRS; i++)
|
||||
{
|
||||
PRED_MODE_T e_pred_mode = (PRED_MODE_T) i;
|
||||
PRED_MODE_T e_cmpl_pred_mode = (e_pred_mode == L0) ? L1 : L0;
|
||||
|
||||
if(ps_mb_info->as_pu->u1_pred_mode != e_pred_mode)
|
||||
{
|
||||
ps_mv = &ps_mb_info->as_pu->as_me_info[e_cmpl_pred_mode];
|
||||
aps_mvps[0] = &ps_spatial_mvp[e_cmpl_pred_mode];
|
||||
aps_mvps[1] = &ps_ilp_mvp[e_cmpl_pred_mode];
|
||||
|
||||
for(j = 0; j < 2; j++)
|
||||
{
|
||||
if((aps_mvps[j]->i1_ref_idx != -1) &&
|
||||
(!j || ((j == 1) && (ps_mv->i1_ref_idx == aps_mvps[j]->i1_ref_idx))))
|
||||
{
|
||||
ai4_mvd_costs[j] =
|
||||
pu1_mvd_costs[ps_mv->s_mv.i2_mvx - aps_mvps[j]->s_mv.i2_mvx] +
|
||||
pu1_mvd_costs[ps_mv->s_mv.i2_mvy - aps_mvps[j]->s_mv.i2_mvy];
|
||||
}
|
||||
else
|
||||
{
|
||||
ai4_mvd_costs[j] = INT32_MAX;
|
||||
}
|
||||
}
|
||||
|
||||
ps_mb_info->as_pu->au1_mvp_idx[e_cmpl_pred_mode] =
|
||||
ai4_mvd_costs[0] > ai4_mvd_costs[1];
|
||||
}
|
||||
else
|
||||
{
|
||||
ps_mb_info->as_pu->au1_mvp_idx[e_cmpl_pred_mode] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
ps_mb_info->as_pu->au1_mvp_idx[L0] = 0;
|
||||
ps_mb_info->as_pu->au1_mvp_idx[L1] = 0;
|
||||
}
|
||||
}
|
||||
115
encoder/svc/isvce_ilp_mv.h
Normal file
115
encoder/svc/isvce_ilp_mv.h
Normal file
|
|
@ -0,0 +1,115 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
* @file
|
||||
* isvce_ilp_mv.h
|
||||
*
|
||||
* @brief
|
||||
* Contains function declarations for function declared in
|
||||
* isvce_ilp_mv.c
|
||||
*
|
||||
* @author
|
||||
* ittiam
|
||||
*
|
||||
* @remarks
|
||||
* None
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef _ISVCE_ILP_MV_H_
|
||||
#define _ISVCE_ILP_MV_H_
|
||||
|
||||
#include "ih264_typedefs.h"
|
||||
#include "iv2.h"
|
||||
#include "isvc_macros.h"
|
||||
#include "ih264_debug.h"
|
||||
#include "isvc_defs.h"
|
||||
#include "isvc_structs.h"
|
||||
#include "isvce_defs.h"
|
||||
#include "isvce_pred_structs.h"
|
||||
#include "isvce_structs.h"
|
||||
#include "isvce_structs.h"
|
||||
#include "isvce_utils.h"
|
||||
|
||||
/* Structs */
|
||||
typedef struct ilp_mv_constants_t
|
||||
{
|
||||
void *pv_state;
|
||||
} ilp_mv_constants_t;
|
||||
|
||||
typedef struct ilp_mv_outputs_t
|
||||
{
|
||||
ilp_mv_t s_ilp_mv;
|
||||
|
||||
ilp_me_cands_t s_ilp_me_cands;
|
||||
|
||||
} ilp_mv_outputs_t;
|
||||
|
||||
typedef struct ilp_mv_variables_t
|
||||
{
|
||||
svc_ilp_data_t *ps_svc_ilp_data;
|
||||
|
||||
coordinates_t s_mb_pos;
|
||||
|
||||
UWORD8 u1_spatial_layer_id;
|
||||
} ilp_mv_variables_t;
|
||||
|
||||
typedef struct svc_ilp_mv_ctxt_t
|
||||
{
|
||||
ilp_mv_constants_t s_ilp_mv_constants;
|
||||
|
||||
ilp_mv_variables_t s_ilp_mv_variables;
|
||||
|
||||
ilp_mv_outputs_t s_ilp_mv_outputs;
|
||||
|
||||
} svc_ilp_mv_ctxt_t;
|
||||
|
||||
/* Function declarations */
|
||||
extern UWORD32 isvce_get_ilp_mv_ctxt_size(UWORD8 u1_num_spatial_layers, DOUBLE d_spatial_res_ratio,
|
||||
UWORD32 u4_wd, UWORD32 u4_ht);
|
||||
|
||||
extern void isvce_ilp_mv_ctxt_init(isvce_codec_t *ps_codec, iv_mem_rec_t *ps_mem_rec);
|
||||
|
||||
extern void isvce_get_mb_ilp_mv(svc_ilp_mv_ctxt_t *ps_ilp_mv_ctxt);
|
||||
|
||||
extern void isvce_mvp_idx_eval(isvce_mb_info_t *ps_mb_info, isvce_enc_pu_mv_t *ps_spatial_mvp,
|
||||
isvce_enc_pu_mv_t *ps_ilp_mvp, UWORD8 *pu1_mvd_costs);
|
||||
|
||||
static FORCEINLINE UWORD8 isvce_is_ilp_mv_winning_mv(isvce_mb_info_t *ps_mb_info,
|
||||
ilp_mv_t *ps_ilp_mv)
|
||||
{
|
||||
if(ENABLE_ILP_MV && ps_ilp_mv && (ps_mb_info->u2_mb_type != PSKIP) &&
|
||||
(ps_mb_info->u2_mb_type != BSKIP))
|
||||
{
|
||||
if((ps_mb_info->u2_mb_type == ps_ilp_mv->e_mb_type) &&
|
||||
(((PRED_MODE_T) ps_mb_info->as_pu->u1_pred_mode) == ps_ilp_mv->ae_pred_mode[0]))
|
||||
{
|
||||
return isvce_check_identical_mv(ps_mb_info->as_pu->as_me_info, ps_ilp_mv->as_mv[0],
|
||||
ps_ilp_mv->ae_pred_mode[0]);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
68
encoder/svc/isvce_ilp_mv_private_defs.h
Normal file
68
encoder/svc/isvce_ilp_mv_private_defs.h
Normal file
|
|
@ -0,0 +1,68 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
* @file
|
||||
* isvc_svc_ilp_mv_private_defs.h
|
||||
*
|
||||
* @brief
|
||||
* Contains datatype and macro definitions used exclusively in
|
||||
* ILP MV derivations
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef _ISVCE_ILP_MV_PRIVATE_DEFS_H_
|
||||
#define _ISVCE_ILP_MV_PRIVATE_DEFS_H_
|
||||
|
||||
#include "ih264_typedefs.h"
|
||||
#include "isvc_defs.h"
|
||||
#include "isvc_structs.h"
|
||||
#include "isvce_structs.h"
|
||||
|
||||
/* Structs */
|
||||
/* Offsets, etc used for resLayer MV upsampling */
|
||||
/* Derived as per 'G.8.6.1.1' for all MB's once during init */
|
||||
typedef struct ilp_mv_mb_state_t
|
||||
{
|
||||
coordinates_t as_pu_positions[MAX_PU_IN_MB_COL][MAX_PU_IN_MB_ROW];
|
||||
|
||||
coordinates_t as_mb_positions[MAX_PU_IN_MB_COL][MAX_PU_IN_MB_ROW];
|
||||
} ilp_mv_mb_state_t;
|
||||
|
||||
typedef struct ilp_mv_layer_state_t
|
||||
{
|
||||
layer_resampler_props_t *ps_props;
|
||||
|
||||
ilp_mv_mb_state_t *ps_mb_states;
|
||||
|
||||
coordinates_t s_mv_scale;
|
||||
|
||||
} ilp_mv_layer_state_t;
|
||||
|
||||
typedef struct ilp_mv_state_t
|
||||
{
|
||||
/* Array of size numSpatialLayers */
|
||||
ilp_mv_layer_state_t *ps_layer_state;
|
||||
|
||||
} ilp_mv_state_t;
|
||||
|
||||
#endif
|
||||
111
encoder/svc/isvce_ilp_mv_utils.h
Normal file
111
encoder/svc/isvce_ilp_mv_utils.h
Normal file
|
|
@ -0,0 +1,111 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
/**
|
||||
*******************************************************************************
|
||||
* @file
|
||||
* isvce_ilp_mv_utils.h
|
||||
*
|
||||
* @brief
|
||||
* Defs to perform experiments in ilp mv
|
||||
*
|
||||
*
|
||||
* @remarks
|
||||
* None
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
#ifndef _ISVCE_ILP_MV_UTILS_H_
|
||||
#define _ISVCE_ILP_MV_UTILS_H_
|
||||
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "ih264_typedefs.h"
|
||||
#include "isvc_defs.h"
|
||||
#include "isvc_macros.h"
|
||||
#include "isvce_pred_structs.h"
|
||||
#include "isvce_structs.h"
|
||||
|
||||
#define MAX_CAND_IF_NUM_ILP_MV_LT_2 8
|
||||
#define MAX_CAND_IF_NUM_ILP_MV_GTEQ_2 6
|
||||
|
||||
/* nbr_mb.x, nbr_mb.y, pu_pos.x, pu_pos.y */
|
||||
#define NBR_PU_AND_MB_POS 4
|
||||
|
||||
static const WORD8 gai1_nbr_ilp_mv_map[MAX_ILP_MV_IN_NBR_RGN][NBR_PU_AND_MB_POS] = {
|
||||
{-1, 0, 3, 0},
|
||||
{0, -1, 0, 3},
|
||||
{1, 0, 0, 0},
|
||||
{0, 1, 0, 0},
|
||||
};
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* This function checks if the max difference between ILP MVs is less than four
|
||||
* or not if number of ILP MVs is greater than or equal to two
|
||||
*
|
||||
* @param[in] ps_me
|
||||
* Pointer to ilp_me_cands
|
||||
*
|
||||
* @returns One if number of ILP MVs is greater than equal to two and max
|
||||
* difference between them is less than 4 otherwise returns zero
|
||||
*
|
||||
* @remarks none
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
static FORCEINLINE bool isvce_check_max_mv_diff_lt_4(ilp_me_cands_t *ps_ilp_me_cands,
|
||||
WORD32 i4_reflist)
|
||||
{
|
||||
UWORD32 i, j;
|
||||
UWORD32 u4_mv_diff_x, u4_mv_diff_y;
|
||||
|
||||
for(i = 1; i < ps_ilp_me_cands->u4_num_ilp_mvs; i++)
|
||||
{
|
||||
for(j = 0; j < i; j++)
|
||||
{
|
||||
if(((ps_ilp_me_cands->ae_pred_mode[i] == ((PRED_MODE_T) i4_reflist)) ||
|
||||
((ps_ilp_me_cands->ae_pred_mode[i] == BI))) &&
|
||||
((ps_ilp_me_cands->ae_pred_mode[j] == ((PRED_MODE_T) i4_reflist)) ||
|
||||
((ps_ilp_me_cands->ae_pred_mode[j] == BI))))
|
||||
{
|
||||
u4_mv_diff_x = ABS(ps_ilp_me_cands->as_mv[i][i4_reflist].s_mv.i2_mvx -
|
||||
ps_ilp_me_cands->as_mv[j][i4_reflist].s_mv.i2_mvx);
|
||||
|
||||
u4_mv_diff_y = ABS(ps_ilp_me_cands->as_mv[i][i4_reflist].s_mv.i2_mvy -
|
||||
ps_ilp_me_cands->as_mv[j][i4_reflist].s_mv.i2_mvy);
|
||||
|
||||
if(u4_mv_diff_x >= 4 || u4_mv_diff_y >= 4)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
#endif
|
||||
116
encoder/svc/isvce_interface_structs.h
Normal file
116
encoder/svc/isvce_interface_structs.h
Normal file
|
|
@ -0,0 +1,116 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
* @file
|
||||
* isvce_interface_structs.h
|
||||
*
|
||||
* @brief
|
||||
* Contains struct definition used for interface objects such as input,
|
||||
* output, and rec
|
||||
*
|
||||
* @author
|
||||
* ittiam
|
||||
*
|
||||
* @remarks
|
||||
* None
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef _ISVCE_INTERFACE_STRUCTS_H_
|
||||
#define _ISVCE_INTERFACE_STRUCTS_H_
|
||||
|
||||
#include "isvc_structs.h"
|
||||
|
||||
typedef struct isvce_raw_inp_buf_t
|
||||
{
|
||||
/** Descriptor of raw buffer */
|
||||
iv_raw_buf_t s_raw_buf;
|
||||
|
||||
/** Lower 32bits of time stamp corresponding to the above buffer */
|
||||
UWORD32 u4_timestamp_low;
|
||||
|
||||
/** Upper 32bits of time stamp corresponding to the above buffer */
|
||||
UWORD32 u4_timestamp_high;
|
||||
|
||||
/** Flag to indicate if the current buffer is last buffer */
|
||||
UWORD32 u4_is_last;
|
||||
|
||||
/** Flag to indicate if mb info is sent along with input buffer */
|
||||
UWORD32 u4_mb_info_type;
|
||||
|
||||
/** Flag to indicate the size of mb info structure */
|
||||
UWORD32 u4_mb_info_size;
|
||||
|
||||
/** Buffer containing mb info if isvce_mb_info_type is non-zero */
|
||||
void *pv_mb_info;
|
||||
|
||||
/** Flag to indicate if pic info is sent along with input buffer */
|
||||
UWORD32 u4_pic_info_type;
|
||||
|
||||
/** Buffer containing pic info if isvce_mb_info_type is non-zero */
|
||||
void *pv_pic_info;
|
||||
|
||||
/** SEI CCV params flag */
|
||||
UWORD8 u1_sei_ccv_params_present_flag;
|
||||
|
||||
/** SEI CCV params info */
|
||||
sei_ccv_params_t s_sei_ccv;
|
||||
|
||||
} isvce_raw_inp_buf_t;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
/** Descriptor of bitstream buffer */
|
||||
iv_bits_buf_t as_bits_buf[MAX_NUM_SPATIAL_LAYERS];
|
||||
|
||||
/** Lower 32bits of time stamp corresponding to the above buffer */
|
||||
UWORD32 u4_timestamp_low;
|
||||
|
||||
/** Upper 32bits of time stamp corresponding to the above buffer */
|
||||
UWORD32 u4_timestamp_high;
|
||||
|
||||
/** Flag to indicate if the current buffer is last buffer */
|
||||
UWORD32 u4_is_last;
|
||||
|
||||
} isvce_out_buf_t;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
/** Descriptor of picture buffer */
|
||||
svc_au_buf_t s_pic_buf;
|
||||
|
||||
/** Lower 32bits of time stamp corresponding to the above buffer */
|
||||
UWORD32 u4_timestamp_low;
|
||||
|
||||
/** Upper 32bits of time stamp corresponding to the above buffer */
|
||||
UWORD32 u4_timestamp_high;
|
||||
|
||||
/** Flag to indicate if the current buffer is last buffer */
|
||||
UWORD32 u4_is_last;
|
||||
|
||||
/** Picture count corresponding to current picture */
|
||||
WORD32 i4_pic_cnt;
|
||||
|
||||
} isvce_rec_buf_t;
|
||||
|
||||
#endif
|
||||
2334
encoder/svc/isvce_intra_modes_eval.c
Normal file
2334
encoder/svc/isvce_intra_modes_eval.c
Normal file
File diff suppressed because it is too large
Load diff
361
encoder/svc/isvce_intra_modes_eval.h
Normal file
361
encoder/svc/isvce_intra_modes_eval.h
Normal file
|
|
@ -0,0 +1,361 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
* @file
|
||||
* isvce_intra_modes_eval.h
|
||||
*
|
||||
* @brief
|
||||
* This file contains declarations of routines that perform rate distortion
|
||||
* analysis on a macroblock if coded as intra.
|
||||
*
|
||||
* @author
|
||||
* ittiam
|
||||
*
|
||||
* @remarks
|
||||
* none
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef _ISVCE_INTRA_MODES_EVAL_H_
|
||||
#define _ISVCE_INTRA_MODES_EVAL_H_
|
||||
|
||||
/**
|
||||
******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* derivation process for subblock/partition availability
|
||||
*
|
||||
* @par Description
|
||||
* Calculates the availability of the left, top, topright and topleft subblock
|
||||
* or partitions.
|
||||
*
|
||||
* @param[in] ps_proc_ctxt
|
||||
* pointer to macroblock context (handle)
|
||||
*
|
||||
* @param[in] i1_pel_pos_x
|
||||
* column position of the pel wrt the current block
|
||||
*
|
||||
* @param[in] i1_pel_pos_y
|
||||
* row position of the pel in wrt current block
|
||||
*
|
||||
* @remarks Assumptions: before calling this function it is assumed that
|
||||
* the neighbor availability of the current macroblock is already derived.
|
||||
* Based on table 6-3 of H264 specification
|
||||
*
|
||||
* @return availability status (yes or no)
|
||||
*
|
||||
******************************************************************************
|
||||
*/
|
||||
UWORD8 isvce_derive_ngbr_avbl_of_mb_partitions(block_neighbors_t *s_ngbr_avbl, WORD8 i1_pel_pos_x,
|
||||
WORD8 i1_pel_pos_y);
|
||||
|
||||
/**
|
||||
******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* evaluate best intra 16x16 mode (rate distortion opt off)
|
||||
*
|
||||
* @par Description
|
||||
* This function evaluates all the possible intra 16x16 modes and finds the mode
|
||||
* that best represents the macro-block (least distortion) and occupies fewer
|
||||
* bits in the bit-stream.
|
||||
*
|
||||
* @param[in] ps_proc_ctxt
|
||||
* pointer to process context (handle)
|
||||
*
|
||||
* @remarks
|
||||
* Ideally the cost of encoding a macroblock is calculated as
|
||||
* (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
|
||||
* input block and the reconstructed block and rate is the number of bits taken
|
||||
* to place the macroblock in the bit-stream. In this routine the rate does not
|
||||
* exactly point to the total number of bits it takes, rather it points to header
|
||||
* bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits
|
||||
* and residual bits fall in to texture bits the number of bits taken to encoding
|
||||
* mbtype is considered as rate, we compute cost. Further we will approximate
|
||||
* the distortion as the deviation b/w input and the predicted block as opposed
|
||||
* to input and reconstructed block.
|
||||
*
|
||||
* NOTE: As per the Document JVT-O079, for intra 16x16 macroblock,
|
||||
* the SAD and cost are one and the same.
|
||||
*
|
||||
* @return none
|
||||
*
|
||||
******************************************************************************
|
||||
*/
|
||||
void isvce_evaluate_intra16x16_modes_for_least_cost_rdoptoff(isvce_process_ctxt_t *ps_proc_ctxt);
|
||||
|
||||
/**
|
||||
******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* evaluate best intra 8x8 mode (rate distortion opt on)
|
||||
*
|
||||
* @par Description
|
||||
* This function evaluates all the possible intra 8x8 modes and finds the mode
|
||||
* that best represents the macro-block (least distortion) and occupies fewer
|
||||
* bits in the bit-stream.
|
||||
*
|
||||
* @param[in] ps_proc_ctxt
|
||||
* pointer to proc ctxt
|
||||
*
|
||||
* @remarks Ideally the cost of encoding a macroblock is calculated as
|
||||
* (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
|
||||
* input block and the reconstructed block and rate is the number of bits taken
|
||||
* to place the macroblock in the bit-stream. In this routine the rate does not
|
||||
* exactly point to the total number of bits it takes, rather it points to header
|
||||
* bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits
|
||||
* and residual bits fall in to texture bits the number of bits taken to encoding
|
||||
* mbtype is considered as rate, we compute cost. Further we will approximate
|
||||
* the distortion as the deviation b/w input and the predicted block as opposed
|
||||
* to input and reconstructed block.
|
||||
*
|
||||
* NOTE: TODO: This function needs to be tested
|
||||
*
|
||||
* @return none
|
||||
*
|
||||
******************************************************************************
|
||||
*/
|
||||
void isvce_evaluate_intra8x8_modes_for_least_cost_rdoptoff(isvce_process_ctxt_t *ps_proc_ctxt);
|
||||
|
||||
/**
|
||||
******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* evaluate best intra 4x4 mode (rate distortion opt on)
|
||||
*
|
||||
* @par Description
|
||||
* This function evaluates all the possible intra 4x4 modes and finds the mode
|
||||
* that best represents the macro-block (least distortion) and occupies fewer
|
||||
* bits in the bit-stream.
|
||||
*
|
||||
* @param[in] ps_proc_ctxt
|
||||
* pointer to proc ctxt
|
||||
*
|
||||
* @remarks
|
||||
* Ideally the cost of encoding a macroblock is calculated as
|
||||
* (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
|
||||
* input block and the reconstructed block and rate is the number of bits taken
|
||||
* to place the macroblock in the bit-stream. In this routine the rate does not
|
||||
* exactly point to the total number of bits it takes, rather it points to header
|
||||
* bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits
|
||||
* and residual bits fall in to texture bits the number of bits taken to encoding
|
||||
* mbtype is considered as rate, we compute cost. Further we will approximate
|
||||
* the distortion as the deviation b/w input and the predicted block as opposed
|
||||
* to input and reconstructed block.
|
||||
*
|
||||
* NOTE: As per the Document JVT-O079, for the whole intra 4x4 macroblock,
|
||||
* 24*lambda is added to the SAD before comparison with the best SAD for
|
||||
* inter prediction. This is an empirical value to prevent using too many intra
|
||||
* blocks.
|
||||
*
|
||||
* @return none
|
||||
*
|
||||
******************************************************************************
|
||||
*/
|
||||
void isvce_evaluate_intra4x4_modes_for_least_cost_rdopton(isvce_process_ctxt_t *ps_proc_ctxt);
|
||||
|
||||
/**
|
||||
******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* evaluate best intra 4x4 mode (rate distortion opt off)
|
||||
*
|
||||
* @par Description
|
||||
* This function evaluates all the possible intra 4x4 modes and finds the mode
|
||||
* that best represents the macro-block (least distortion) and occupies fewer
|
||||
* bits in the bit-stream.
|
||||
*
|
||||
* @param[in] ps_proc_ctxt
|
||||
* pointer to proc ctxt
|
||||
*
|
||||
* @remarks
|
||||
* Ideally the cost of encoding a macroblock is calculated as
|
||||
* (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
|
||||
* input block and the reconstructed block and rate is the number of bits taken
|
||||
* to place the macroblock in the bit-stream. In this routine the rate does not
|
||||
* exactly point to the total number of bits it takes, rather it points to header
|
||||
* bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits
|
||||
* and residual bits fall in to texture bits the number of bits taken to encoding
|
||||
* mbtype is considered as rate, we compute cost. Further we will approximate
|
||||
* the distortion as the deviation b/w input and the predicted block as opposed
|
||||
* to input and reconstructed block.
|
||||
*
|
||||
* NOTE: As per the Document JVT-O079, for the whole intra 4x4 macroblock,
|
||||
* 24*lambda is added to the SAD before comparison with the best SAD for
|
||||
* inter prediction. This is an empirical value to prevent using too many intra
|
||||
* blocks.
|
||||
*
|
||||
* @return none
|
||||
*
|
||||
******************************************************************************
|
||||
*/
|
||||
void isvce_evaluate_intra4x4_modes_for_least_cost_rdoptoff(isvce_process_ctxt_t *ps_proc_ctxt);
|
||||
|
||||
/**
|
||||
******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* evaluate best chroma intra 8x8 mode (rate distortion opt off)
|
||||
*
|
||||
* @par Description
|
||||
* This function evaluates all the possible chroma intra 8x8 modes and finds
|
||||
* the mode that best represents the macroblock (least distortion) and occupies
|
||||
* fewer bits in the bitstream.
|
||||
*
|
||||
* @param[in] ps_proc_ctxt
|
||||
* pointer to macroblock context (handle)
|
||||
*
|
||||
* @remarks
|
||||
* For chroma best intra pred mode is calculated based only on SAD
|
||||
*
|
||||
* @returns none
|
||||
*
|
||||
******************************************************************************
|
||||
*/
|
||||
void isvce_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff(
|
||||
isvce_process_ctxt_t *ps_proc_ctxt);
|
||||
|
||||
/**
|
||||
******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* Evaluate best intra 16x16 mode (among VERT, HORZ and DC) and do the
|
||||
* prediction.
|
||||
*
|
||||
* @par Description
|
||||
* This function evaluates first three 16x16 modes and compute corresponding sad
|
||||
* and return the buffer predicted with best mode.
|
||||
*
|
||||
* @param[in] pu1_src
|
||||
* UWORD8 pointer to the source
|
||||
*
|
||||
* @param[in] pu1_ngbr_pels_i16
|
||||
* UWORD8 pointer to neighbouring pels
|
||||
*
|
||||
* @param[out] pu1_dst
|
||||
* UWORD8 pointer to the destination
|
||||
*
|
||||
* @param[in] src_strd
|
||||
* integer source stride
|
||||
*
|
||||
* @param[in] dst_strd
|
||||
* integer destination stride
|
||||
*
|
||||
* @param[in] u4_n_avblty
|
||||
* availability of neighbouring pixels
|
||||
*
|
||||
* @param[in] u4_intra_mode
|
||||
* Pointer to the variable in which best mode is returned
|
||||
*
|
||||
* @param[in] pu4_sadmin
|
||||
* Pointer to the variable in which minimum sad is returned
|
||||
*
|
||||
* @param[in] u4_valid_intra_modes
|
||||
* Says what all modes are valid
|
||||
*
|
||||
* @returns none
|
||||
*
|
||||
******************************************************************************
|
||||
*/
|
||||
typedef void isvce_evaluate_intra_modes_ft(UWORD8 *pu1_src, UWORD8 *pu1_ngbr_pels_i16,
|
||||
UWORD8 *pu1_dst, UWORD32 src_strd, UWORD32 dst_strd,
|
||||
WORD32 u4_n_avblty, UWORD32 *u4_intra_mode,
|
||||
WORD32 *pu4_sadmin, UWORD32 u4_valid_intra_modes);
|
||||
|
||||
isvce_evaluate_intra_modes_ft isvce_evaluate_intra16x16_modes;
|
||||
isvce_evaluate_intra_modes_ft isvce_evaluate_intra_chroma_modes;
|
||||
|
||||
/* assembly */
|
||||
isvce_evaluate_intra_modes_ft isvce_evaluate_intra16x16_modes_a9q;
|
||||
isvce_evaluate_intra_modes_ft isvce_evaluate_intra_chroma_modes_a9q;
|
||||
|
||||
isvce_evaluate_intra_modes_ft isvce_evaluate_intra16x16_modes_av8;
|
||||
isvce_evaluate_intra_modes_ft isvce_evaluate_intra_chroma_modes_av8;
|
||||
|
||||
/* x86 intrinsics */
|
||||
isvce_evaluate_intra_modes_ft isvce_evaluate_intra16x16_modes_ssse3;
|
||||
isvce_evaluate_intra_modes_ft isvce_evaluate_intra_chroma_modes_ssse3;
|
||||
|
||||
/**
|
||||
******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* Evaluate best intra 4x4 mode and perform prediction.
|
||||
*
|
||||
* @par Description
|
||||
* This function evaluates 4x4 modes and compute corresponding sad
|
||||
* and return the buffer predicted with best mode.
|
||||
*
|
||||
* @param[in] pu1_src
|
||||
* UWORD8 pointer to the source
|
||||
*
|
||||
* @param[in] pu1_ngbr_pels
|
||||
* UWORD8 pointer to neighbouring pels
|
||||
*
|
||||
* @param[out] pu1_dst
|
||||
* UWORD8 pointer to the destination
|
||||
*
|
||||
* @param[in] src_strd
|
||||
* integer source stride
|
||||
*
|
||||
* @param[in] dst_strd
|
||||
* integer destination stride
|
||||
*
|
||||
* @param[in] u4_n_avblty
|
||||
* availability of neighbouring pixels
|
||||
*
|
||||
* @param[in] u4_intra_mode
|
||||
* Pointer to the variable in which best mode is returned
|
||||
*
|
||||
* @param[in] pu4_sadmin
|
||||
* Pointer to the variable in which minimum cost is returned
|
||||
*
|
||||
* @param[in] u4_valid_intra_modes
|
||||
* Says what all modes are valid
|
||||
*
|
||||
* @param[in] u4_lambda
|
||||
* Lamda value for computing cost from SAD
|
||||
*
|
||||
* @param[in] u4_predictd_mode
|
||||
* Predicted mode for cost computation
|
||||
*
|
||||
* @returns none
|
||||
*
|
||||
******************************************************************************
|
||||
*/
|
||||
typedef void isvce_evaluate_intra_4x4_modes_ft(UWORD8 *pu1_src, UWORD8 *pu1_ngbr_pels,
|
||||
UWORD8 *pu1_dst, UWORD32 src_strd, UWORD32 dst_strd,
|
||||
WORD32 u4_n_avblty, UWORD32 *u4_intra_mode,
|
||||
WORD32 *pu4_sadmin, UWORD32 u4_valid_intra_modes,
|
||||
UWORD32 u4_lambda, UWORD32 u4_predictd_mode);
|
||||
|
||||
isvce_evaluate_intra_4x4_modes_ft isvce_evaluate_intra_4x4_modes;
|
||||
|
||||
/* x86 intrinsics */
|
||||
isvce_evaluate_intra_4x4_modes_ft isvce_evaluate_intra_4x4_modes_ssse3;
|
||||
|
||||
/* assembly */
|
||||
isvce_evaluate_intra_4x4_modes_ft isvce_evaluate_intra_4x4_modes_a9q;
|
||||
isvce_evaluate_intra_4x4_modes_ft isvce_evaluate_intra_4x4_modes_av8;
|
||||
|
||||
#endif
|
||||
480
encoder/svc/isvce_mc.c
Normal file
480
encoder/svc/isvce_mc.c
Normal file
|
|
@ -0,0 +1,480 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
* @file
|
||||
* isvce_mc.c
|
||||
*
|
||||
* @brief
|
||||
* Contains definition of functions for motion compensation
|
||||
*
|
||||
* @author
|
||||
* ittiam
|
||||
*
|
||||
* @par List of Functions:
|
||||
* - isvce_motion_comp_luma()
|
||||
* - isvce_motion_comp_chroma()
|
||||
*
|
||||
* @remarks
|
||||
* None
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
/*****************************************************************************/
|
||||
/* File Includes */
|
||||
/*****************************************************************************/
|
||||
|
||||
/* System include files */
|
||||
#include <stdio.h>
|
||||
|
||||
/* User include files */
|
||||
#include "ih264_typedefs.h"
|
||||
#include "ih264_debug.h"
|
||||
#include "isvc_defs.h"
|
||||
#include "iv2.h"
|
||||
#include "ive2.h"
|
||||
#include "ime_distortion_metrics.h"
|
||||
#include "ime_defs.h"
|
||||
#include "ime_structs.h"
|
||||
#include "isvc_structs.h"
|
||||
#include "isvc_inter_pred_filters.h"
|
||||
#include "isvc_mem_fns.h"
|
||||
#include "ih264_padding.h"
|
||||
#include "ih264_intra_pred_filters.h"
|
||||
#include "ih264_deblk_edge_filters.h"
|
||||
#include "isvc_trans_quant_itrans_iquant.h"
|
||||
#include "isvc_cabac_tables.h"
|
||||
#include "isvce_defs.h"
|
||||
#include "ih264e_error.h"
|
||||
#include "ih264e_bitstream.h"
|
||||
#include "irc_cntrl_param.h"
|
||||
#include "irc_frame_info_collector.h"
|
||||
#include "isvce_rate_control.h"
|
||||
#include "isvce_cabac_structs.h"
|
||||
#include "isvce_structs.h"
|
||||
#include "isvce_mc.h"
|
||||
#include "ih264e_half_pel.h"
|
||||
#include "isvce_ibl_eval.h"
|
||||
|
||||
/*****************************************************************************/
|
||||
/* Function Definitions */
|
||||
/*****************************************************************************/
|
||||
|
||||
/**
|
||||
******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* performs motion compensation for a luma mb for the given mv.
|
||||
*
|
||||
* @par Description
|
||||
* This routine performs motion compensation of an inter mb. When the inter
|
||||
* mb mode is P16x16, there is no need to copy 16x16 unit from reference buffer
|
||||
* to pred buffer. In this case the function returns pointer and stride of the
|
||||
* ref. buffer and this info is used in place of pred buffer else where.
|
||||
* In other cases, the pred buffer is populated via copy / filtering + copy
|
||||
* (q pel cases) and returned.
|
||||
*
|
||||
* @param[in] ps_proc
|
||||
* pointer to current proc ctxt
|
||||
*
|
||||
* @return none
|
||||
*
|
||||
* @remarks Assumes half pel buffers for the entire frame are populated.
|
||||
*
|
||||
******************************************************************************
|
||||
*/
|
||||
void isvce_motion_comp_luma(isvce_process_ctxt_t *ps_proc, buffer_container_t *ps_pred)
|
||||
{
|
||||
/* codec context */
|
||||
isvce_codec_t *ps_codec = ps_proc->ps_codec;
|
||||
|
||||
/* me ctxt */
|
||||
isvce_me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt;
|
||||
|
||||
isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns;
|
||||
inter_pred_fxns_t *ps_inter_pred_fxns = &ps_isa_dependent_fxns->s_inter_pred_fxns;
|
||||
|
||||
/* Pointer to the structure having motion vectors, size and position of curr
|
||||
* partitions */
|
||||
isvce_enc_pu_t *ps_curr_pu;
|
||||
|
||||
/* pointers to full pel, half pel x, half pel y, half pel xy reference buffer
|
||||
*/
|
||||
UWORD8 *pu1_ref[4];
|
||||
|
||||
/* pred buffer ptr */
|
||||
UWORD8 *pu1_pred;
|
||||
|
||||
/* strides of full pel, half pel x, half pel y, half pel xy reference buffer
|
||||
*/
|
||||
WORD32 i4_ref_strd[4];
|
||||
|
||||
/* pred buffer stride */
|
||||
WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
|
||||
|
||||
/* full pel motion vectors */
|
||||
WORD32 u4_mv_x_full, u4_mv_y_full;
|
||||
|
||||
/* half pel motion vectors */
|
||||
WORD32 u4_mv_x_hpel, u4_mv_y_hpel;
|
||||
|
||||
/* quarter pel motion vectors */
|
||||
WORD32 u4_mv_x_qpel, u4_mv_y_qpel;
|
||||
|
||||
/* width & height of the partition */
|
||||
UWORD32 wd, ht;
|
||||
|
||||
/* partition idx */
|
||||
UWORD32 u4_num_prtn;
|
||||
|
||||
/* half / qpel coefficient */
|
||||
UWORD32 u4_subpel_factor;
|
||||
|
||||
/* BIPRED Flag */
|
||||
WORD32 i4_bipred_flag;
|
||||
|
||||
/* temp var */
|
||||
UWORD32 u4_lkup_idx1;
|
||||
|
||||
if((ps_proc->ps_mb_info->u2_mb_type == BASE_MODE) && ps_proc->ps_mb_info->u1_is_intra)
|
||||
{
|
||||
svc_intra_pred_ctxt_t *ps_intra_pred_ctxt = ps_proc->ps_intra_pred_ctxt;
|
||||
|
||||
ps_pred->pv_data =
|
||||
(UWORD8 *) (ps_intra_pred_ctxt->s_intra_pred_outputs.s_pred_buf.as_component_bufs[Y]
|
||||
.pv_data);
|
||||
ps_pred->i4_data_stride =
|
||||
ps_intra_pred_ctxt->s_intra_pred_outputs.s_pred_buf.as_component_bufs[Y].i4_data_stride;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
/* Init */
|
||||
i4_ref_strd[0] = ps_proc->as_ref_buf_props[0].as_component_bufs[0].i4_data_stride;
|
||||
|
||||
i4_ref_strd[1] = i4_ref_strd[2] = i4_ref_strd[3] = ps_me_ctxt->u4_subpel_buf_strd;
|
||||
|
||||
for(u4_num_prtn = 0; u4_num_prtn < ps_proc->u4_num_sub_partitions; u4_num_prtn++)
|
||||
{
|
||||
mv_t *ps_curr_mv;
|
||||
|
||||
/* update ptr to curr partition */
|
||||
ps_curr_pu = ps_proc->ps_mb_info->as_pu + u4_num_prtn;
|
||||
|
||||
/* Set no no bipred */
|
||||
i4_bipred_flag = 0;
|
||||
|
||||
switch(ps_curr_pu->u1_pred_mode)
|
||||
{
|
||||
case PRED_L0:
|
||||
ps_curr_mv = &ps_curr_pu->as_me_info[0].s_mv;
|
||||
pu1_ref[0] = ps_proc->as_ref_buf_props[0].as_component_bufs[0].pv_data;
|
||||
break;
|
||||
|
||||
case PRED_L1:
|
||||
ps_curr_mv = &ps_curr_pu->as_me_info[1].s_mv;
|
||||
pu1_ref[0] = ps_proc->as_ref_buf_props[1].as_component_bufs[0].pv_data;
|
||||
break;
|
||||
|
||||
case PRED_BI:
|
||||
/*
|
||||
* In case of PRED_BI, we only need to ensure that
|
||||
* the reference buffer that gets selected is
|
||||
* ps_proc->pu1_best_subpel_buf
|
||||
*/
|
||||
|
||||
/* Dummy */
|
||||
ps_curr_mv = &ps_curr_pu->as_me_info[0].s_mv;
|
||||
pu1_ref[0] = ps_proc->as_ref_buf_props[0].as_component_bufs[0].pv_data;
|
||||
|
||||
i4_bipred_flag = 1;
|
||||
break;
|
||||
|
||||
default:
|
||||
ps_curr_mv = &ps_curr_pu->as_me_info[0].s_mv;
|
||||
pu1_ref[0] = ps_proc->as_ref_buf_props[0].as_component_bufs[0].pv_data;
|
||||
break;
|
||||
}
|
||||
|
||||
/* get full pel mv's (full pel units) */
|
||||
u4_mv_x_full = ps_curr_mv->i2_mvx >> 2;
|
||||
u4_mv_y_full = ps_curr_mv->i2_mvy >> 2;
|
||||
|
||||
/* get half pel mv's */
|
||||
u4_mv_x_hpel = (ps_curr_mv->i2_mvx & 0x2) >> 1;
|
||||
u4_mv_y_hpel = (ps_curr_mv->i2_mvy & 0x2) >> 1;
|
||||
|
||||
/* get quarter pel mv's */
|
||||
u4_mv_x_qpel = (ps_curr_mv->i2_mvx & 0x1);
|
||||
u4_mv_y_qpel = (ps_curr_mv->i2_mvy & 0x1);
|
||||
|
||||
/* width and height of partition */
|
||||
wd = (ps_curr_pu->u1_wd_in_4x4_m1 + 1) << 2;
|
||||
ht = (ps_curr_pu->u1_ht_in_4x4_m1 + 1) << 2;
|
||||
|
||||
/* decision ? qpel/hpel, fpel */
|
||||
u4_subpel_factor =
|
||||
(u4_mv_y_hpel << 3) + (u4_mv_x_hpel << 2) + (u4_mv_y_qpel << 1) + (u4_mv_x_qpel);
|
||||
|
||||
/* Move ref to position given by MV */
|
||||
pu1_ref[0] += ((u4_mv_y_full * i4_ref_strd[0]) + u4_mv_x_full);
|
||||
|
||||
/* Sub pel ptrs/ Biperd pointers init */
|
||||
pu1_ref[1] = ps_proc->pu1_best_subpel_buf;
|
||||
i4_ref_strd[1] = ps_proc->u4_bst_spel_buf_strd;
|
||||
|
||||
/* update pred buff ptr */
|
||||
pu1_pred = ps_proc->pu1_pred_mb + 4 * ps_curr_pu->u1_pos_y_in_4x4 * i4_pred_strd +
|
||||
4 * ps_curr_pu->u1_pos_x_in_4x4;
|
||||
|
||||
/* u4_lkup_idx1 will be non zero for half pel and bipred */
|
||||
u4_lkup_idx1 = ((u4_subpel_factor >> 2) != 0) || i4_bipred_flag;
|
||||
|
||||
{
|
||||
/********************************************************************/
|
||||
/* if the block is P16x16 MB and mv are not quarter pel motion */
|
||||
/* vectors, there is no need to copy 16x16 unit from reference frame*/
|
||||
/* to pred buffer. We might as well send the reference frame buffer */
|
||||
/* pointer as pred buffer (ofc with updated stride) to fwd transform*/
|
||||
/* and inverse transform unit. */
|
||||
/********************************************************************/
|
||||
if(ps_proc->u4_num_sub_partitions == 1)
|
||||
{
|
||||
ps_pred->pv_data = pu1_ref[u4_lkup_idx1];
|
||||
ps_pred->i4_data_stride = i4_ref_strd[u4_lkup_idx1];
|
||||
}
|
||||
/*
|
||||
* Copying half pel or full pel to prediction buffer
|
||||
* Currently ps_proc->u4_num_sub_partitions will always be 1 as we only
|
||||
* support 16x16 in P mbs
|
||||
*/
|
||||
else
|
||||
{
|
||||
ps_inter_pred_fxns->pf_inter_pred_luma_copy(pu1_ref[u4_lkup_idx1], pu1_pred,
|
||||
i4_ref_strd[u4_lkup_idx1], i4_pred_strd,
|
||||
ht, wd, NULL, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* performs motion compensation for chroma mb
|
||||
*
|
||||
* @par Description
|
||||
* Copies a MB of data from the reference buffer (Full pel, half pel or q pel)
|
||||
* according to the motion vectors given
|
||||
*
|
||||
* @param[in] ps_proc
|
||||
* pointer to current proc ctxt
|
||||
*
|
||||
* @return none
|
||||
*
|
||||
* @remarks Assumes half pel and quarter pel buffers for the entire frame are
|
||||
* populated.
|
||||
******************************************************************************
|
||||
*/
|
||||
void isvce_motion_comp_chroma(isvce_process_ctxt_t *ps_proc, buffer_container_t *ps_pred)
|
||||
{
|
||||
/* codec context */
|
||||
isvce_codec_t *ps_codec = ps_proc->ps_codec;
|
||||
isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns;
|
||||
inter_pred_fxns_t *ps_inter_pred_fxns = &ps_isa_dependent_fxns->s_inter_pred_fxns;
|
||||
|
||||
/* Pointer to the structure having motion vectors, size and position of curr
|
||||
* partitions */
|
||||
isvce_enc_pu_t *ps_curr_pu;
|
||||
|
||||
/* pointers to full pel, half pel x, half pel y, half pel xy reference buffer
|
||||
*/
|
||||
UWORD8 *pu1_ref;
|
||||
|
||||
/* pred buffer ptr */
|
||||
UWORD8 *pu1_pred;
|
||||
|
||||
/* strides of full pel reference buffer */
|
||||
WORD32 i4_ref_strd;
|
||||
|
||||
/* pred buffer stride */
|
||||
WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
|
||||
|
||||
/* full pel motion vectors */
|
||||
WORD32 u4_mv_x_full, u4_mv_y_full;
|
||||
|
||||
/* half pel motion vectors */
|
||||
WORD32 u4_mv_x_hpel, u4_mv_y_hpel;
|
||||
|
||||
/* quarter pel motion vectors */
|
||||
WORD32 u4_mv_x_qpel, u4_mv_y_qpel;
|
||||
|
||||
/* width & height of the partition */
|
||||
UWORD32 wd, ht;
|
||||
|
||||
/* partition idx */
|
||||
UWORD32 u4_num_prtn;
|
||||
|
||||
WORD32 u4_mv_x;
|
||||
WORD32 u4_mv_y;
|
||||
UWORD8 u1_dx, u1_dy;
|
||||
|
||||
ASSERT(ps_proc->u4_num_sub_partitions <= ENC_MAX_PU_IN_MB);
|
||||
|
||||
if((ps_proc->ps_mb_info->u2_mb_type == BASE_MODE) && ps_proc->ps_mb_info->u1_is_intra)
|
||||
{
|
||||
svc_intra_pred_ctxt_t *ps_intra_pred_ctxt = ps_proc->ps_intra_pred_ctxt;
|
||||
|
||||
ps_pred->pv_data =
|
||||
(UWORD8 *) (ps_intra_pred_ctxt->s_intra_pred_outputs.s_pred_buf.as_component_bufs[UV]
|
||||
.pv_data);
|
||||
ps_pred->i4_data_stride =
|
||||
ps_intra_pred_ctxt->s_intra_pred_outputs.s_pred_buf.as_component_bufs[UV]
|
||||
.i4_data_stride;
|
||||
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
ps_pred->pv_data = ps_proc->pu1_pred_mb;
|
||||
ps_pred->i4_data_stride = ps_proc->i4_pred_strd;
|
||||
}
|
||||
|
||||
for(u4_num_prtn = 0; u4_num_prtn < ps_proc->u4_num_sub_partitions; u4_num_prtn++)
|
||||
{
|
||||
mv_t *ps_curr_mv;
|
||||
|
||||
ps_curr_pu = ps_proc->ps_mb_info->as_pu + u4_num_prtn;
|
||||
|
||||
if(ps_curr_pu->u1_pred_mode != BI)
|
||||
{
|
||||
ps_curr_mv = &ps_curr_pu->as_me_info[ps_curr_pu->u1_pred_mode].s_mv;
|
||||
pu1_ref =
|
||||
ps_proc->as_ref_buf_props[ps_curr_pu->u1_pred_mode].as_component_bufs[1].pv_data;
|
||||
i4_ref_strd = ps_proc->as_ref_buf_props[ps_curr_pu->u1_pred_mode]
|
||||
.as_component_bufs[1]
|
||||
.i4_data_stride;
|
||||
|
||||
u4_mv_x = ps_curr_mv->i2_mvx >> 3;
|
||||
u4_mv_y = ps_curr_mv->i2_mvy >> 3;
|
||||
|
||||
/* corresponds to full pel motion vector in luma, but in chroma
|
||||
* corresponds to pel formed wiith dx, dy =4 */
|
||||
u4_mv_x_full = (ps_curr_mv->i2_mvx & 0x4) >> 2;
|
||||
u4_mv_y_full = (ps_curr_mv->i2_mvy & 0x4) >> 2;
|
||||
|
||||
/* get half pel mv's */
|
||||
u4_mv_x_hpel = (ps_curr_mv->i2_mvx & 0x2) >> 1;
|
||||
u4_mv_y_hpel = (ps_curr_mv->i2_mvy & 0x2) >> 1;
|
||||
|
||||
/* get quarter pel mv's */
|
||||
u4_mv_x_qpel = (ps_curr_mv->i2_mvx & 0x1);
|
||||
u4_mv_y_qpel = (ps_curr_mv->i2_mvy & 0x1);
|
||||
|
||||
/* width and height of sub macro block */
|
||||
wd = (ps_curr_pu->u1_wd_in_4x4_m1 + 1) << 1;
|
||||
ht = (ps_curr_pu->u1_ht_in_4x4_m1 + 1) << 1;
|
||||
|
||||
/* move the pointers so that they point to the motion compensated
|
||||
* locations */
|
||||
pu1_ref += ((u4_mv_y * i4_ref_strd) + (u4_mv_x << 1));
|
||||
|
||||
pu1_pred = ps_proc->pu1_pred_mb + 4 * ps_curr_pu->u1_pos_y_in_4x4 * i4_pred_strd +
|
||||
2 * ps_curr_pu->u1_pos_x_in_4x4;
|
||||
|
||||
u1_dx = (u4_mv_x_full << 2) + (u4_mv_x_hpel << 1) + (u4_mv_x_qpel);
|
||||
u1_dy = (u4_mv_y_full << 2) + (u4_mv_y_hpel << 1) + (u4_mv_y_qpel);
|
||||
|
||||
/* cases where u1_dx = 0 or u1_dy = 0 are dealt separately in neon with
|
||||
* separate functions for better performance
|
||||
*
|
||||
* isvc_inter_pred_chroma_dx_zero_a9q
|
||||
* and
|
||||
* isvc_inter_pred_chroma_dy_zero_a9q
|
||||
*/
|
||||
|
||||
ps_inter_pred_fxns->pf_inter_pred_chroma(pu1_ref, pu1_pred, i4_ref_strd, i4_pred_strd,
|
||||
u1_dx, u1_dy, ht, wd);
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* We need to interpolate the L0 and L1 ref pics with the chorma MV
|
||||
* then use them to average for bilinrar interpred
|
||||
*/
|
||||
WORD32 i4_predmode;
|
||||
UWORD8 *pu1_ref_buf[2];
|
||||
|
||||
/* Temporary buffers to store the interpolated value from L0 and L1 */
|
||||
pu1_ref_buf[L0] = ps_proc->apu1_subpel_buffs[0];
|
||||
pu1_ref_buf[L1] = ps_proc->apu1_subpel_buffs[1];
|
||||
|
||||
for(i4_predmode = 0; i4_predmode < BI; i4_predmode++)
|
||||
{
|
||||
ps_curr_mv = &ps_curr_pu->as_me_info[i4_predmode].s_mv;
|
||||
pu1_ref = ps_proc->as_ref_buf_props[i4_predmode].as_component_bufs[1].pv_data;
|
||||
i4_ref_strd =
|
||||
ps_proc->as_ref_buf_props[i4_predmode].as_component_bufs[1].i4_data_stride;
|
||||
|
||||
u4_mv_x = ps_curr_mv->i2_mvx >> 3;
|
||||
u4_mv_y = ps_curr_mv->i2_mvy >> 3;
|
||||
|
||||
/*
|
||||
* corresponds to full pel motion vector in luma, but in chroma
|
||||
* corresponds to pel formed wiith dx, dy =4
|
||||
*/
|
||||
u4_mv_x_full = (ps_curr_mv->i2_mvx & 0x4) >> 2;
|
||||
u4_mv_y_full = (ps_curr_mv->i2_mvy & 0x4) >> 2;
|
||||
|
||||
/* get half pel mv's */
|
||||
u4_mv_x_hpel = (ps_curr_mv->i2_mvx & 0x2) >> 1;
|
||||
u4_mv_y_hpel = (ps_curr_mv->i2_mvy & 0x2) >> 1;
|
||||
|
||||
/* get quarter pel mv's */
|
||||
u4_mv_x_qpel = (ps_curr_mv->i2_mvx & 0x1);
|
||||
u4_mv_y_qpel = (ps_curr_mv->i2_mvy & 0x1);
|
||||
|
||||
/* width and height of sub macro block */
|
||||
wd = (ps_curr_pu->u1_wd_in_4x4_m1 + 1) << 1;
|
||||
ht = (ps_curr_pu->u1_ht_in_4x4_m1 + 1) << 1;
|
||||
|
||||
/* move the pointers so that they point to the motion compensated
|
||||
* locations */
|
||||
pu1_ref += ((u4_mv_y * i4_ref_strd) + (u4_mv_x << 1));
|
||||
|
||||
pu1_pred = ps_proc->pu1_pred_mb + 4 * ps_curr_pu->u1_pos_y_in_4x4 * i4_pred_strd +
|
||||
2 * ps_curr_pu->u1_pos_x_in_4x4;
|
||||
|
||||
u1_dx = (u4_mv_x_full << 2) + (u4_mv_x_hpel << 1) + (u4_mv_x_qpel);
|
||||
u1_dy = (u4_mv_y_full << 2) + (u4_mv_y_hpel << 1) + (u4_mv_y_qpel);
|
||||
|
||||
ps_inter_pred_fxns->pf_inter_pred_chroma(
|
||||
pu1_ref, pu1_ref_buf[i4_predmode], i4_ref_strd, MB_SIZE, u1_dx, u1_dy, ht, wd);
|
||||
}
|
||||
|
||||
ps_inter_pred_fxns->pf_inter_pred_luma_bilinear(pu1_ref_buf[L0], pu1_ref_buf[L1],
|
||||
pu1_pred, MB_SIZE, MB_SIZE,
|
||||
i4_pred_strd, MB_SIZE >> 1, MB_SIZE);
|
||||
}
|
||||
}
|
||||
}
|
||||
87
encoder/svc/isvce_mc.h
Normal file
87
encoder/svc/isvce_mc.h
Normal file
|
|
@ -0,0 +1,87 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
* @file
|
||||
* isvce_mc.h
|
||||
*
|
||||
* @brief
|
||||
* This file contains declarations of routines that perform motion compensation
|
||||
* of luma and chroma macroblocks.
|
||||
*
|
||||
* @author
|
||||
* ittiam
|
||||
*
|
||||
* @remarks
|
||||
* none
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
#ifndef _ISVCE_MC_H_
|
||||
#define _ISVCE_MC_H_
|
||||
|
||||
/**
|
||||
******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* performs motion compensation for a luma mb for the given mv.
|
||||
*
|
||||
* @par Description
|
||||
* This routine performs motion compensation of an inter mb. When the inter
|
||||
* mb mode is P16x16, there is no need to copy 16x16 unit from reference buffer
|
||||
* to pred buffer. In this case the function returns pointer and stride of the
|
||||
* ref. buffer and this info is used in place of pred buffer else where.
|
||||
* In other cases, the pred buffer is populated via copy / filtering + copy
|
||||
* (q pel cases) and returned.
|
||||
*
|
||||
* @param[in] ps_proc
|
||||
* pointer to current proc ctxt
|
||||
*
|
||||
* @return none
|
||||
*
|
||||
* @remarks Assumes half pel buffers for the entire frame are populated.
|
||||
*
|
||||
******************************************************************************
|
||||
*/
|
||||
extern void isvce_motion_comp_luma(isvce_process_ctxt_t *ps_proc, buffer_container_t *ps_pred);
|
||||
|
||||
/**
|
||||
******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* performs motion compensation for chroma mb
|
||||
*
|
||||
* @par Description
|
||||
* Copies a MB of data from the reference buffer (Full pel, half pel or q pel)
|
||||
* according to the motion vectors given
|
||||
*
|
||||
* @param[in] ps_proc
|
||||
* pointer to current proc ctxt
|
||||
*
|
||||
* @return none
|
||||
*
|
||||
* @remarks Assumes half pel and quarter pel buffers for the entire frame are
|
||||
* populated.
|
||||
******************************************************************************
|
||||
*/
|
||||
extern void isvce_motion_comp_chroma(isvce_process_ctxt_t *ps_proc, buffer_container_t *ps_pred);
|
||||
|
||||
#endif
|
||||
2924
encoder/svc/isvce_me.c
Normal file
2924
encoder/svc/isvce_me.c
Normal file
File diff suppressed because it is too large
Load diff
381
encoder/svc/isvce_me.h
Normal file
381
encoder/svc/isvce_me.h
Normal file
|
|
@ -0,0 +1,381 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
* @file
|
||||
* isvce_me.h
|
||||
*
|
||||
* @brief
|
||||
* Contains declarations of global variables for H264 encoder
|
||||
*
|
||||
* @author
|
||||
* ittiam
|
||||
*
|
||||
* @remarks
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef _ISVCE_ME_H_
|
||||
#define _ISVCE_ME_H_
|
||||
|
||||
#include "ih264_typedefs.h"
|
||||
|
||||
#include "isvce_structs.h"
|
||||
|
||||
/*****************************************************************************/
|
||||
/* Constant Macros */
|
||||
/*****************************************************************************/
|
||||
|
||||
/**
|
||||
******************************************************************************
|
||||
* @brief Skip Bias value for P slice
|
||||
******************************************************************************
|
||||
*/
|
||||
#define SKIP_BIAS_P 0
|
||||
|
||||
/**
|
||||
******************************************************************************
|
||||
* @brief Skip Bias value for B slice
|
||||
******************************************************************************
|
||||
*/
|
||||
#define SKIP_BIAS_B 0
|
||||
|
||||
/*****************************************************************************/
|
||||
/* Function Macros */
|
||||
/*****************************************************************************/
|
||||
|
||||
/**
|
||||
******************************************************************************
|
||||
* @brief compute median of 3 elements (a, b, c) and store the output
|
||||
* in to result. This is used for mv prediction
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#define MEDIAN(a, b, c, result) \
|
||||
if(a > b) \
|
||||
{ \
|
||||
if(b > c) \
|
||||
result = b; \
|
||||
else \
|
||||
{ \
|
||||
if(a > c) \
|
||||
result = c; \
|
||||
else \
|
||||
result = a; \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
if(c > b) \
|
||||
result = b; \
|
||||
else \
|
||||
{ \
|
||||
if(c > a) \
|
||||
result = c; \
|
||||
else \
|
||||
result = a; \
|
||||
} \
|
||||
}
|
||||
|
||||
/*****************************************************************************/
|
||||
/* Extern Function Declarations */
|
||||
/*****************************************************************************/
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* This function populates the length of the codewords for motion vectors in
|
||||
*the range (-search range, search range) in pixels
|
||||
*
|
||||
* @param[in] ps_me
|
||||
* Pointer to me ctxt
|
||||
*
|
||||
* @param[out] pu1_mv_bits
|
||||
* length of the codeword for all mv's
|
||||
*
|
||||
* @remarks The length of the code words are derived from signed exponential
|
||||
* goloumb codes.
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
void isvce_init_mv_bits(isvce_me_ctxt_t *ps_me);
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief The function computes the parameters for a P skip MB
|
||||
*
|
||||
* @par Description:
|
||||
* The function computes the parameters for a P skip MB
|
||||
*
|
||||
* @param[in] ps_proc
|
||||
* Process context
|
||||
*
|
||||
* @param[in] u4_for_me
|
||||
* Flag to indicate the purpose of computing skip
|
||||
*
|
||||
* @param[out] ps_pred_mv
|
||||
* Flag to indicate the current active refernce list
|
||||
*
|
||||
* @returns
|
||||
* 1) Updates skip MV in proc
|
||||
* 2) Returns if the current MB can be coded as skip or not
|
||||
*
|
||||
* @remarks The code implements the logic as described in sec 8.4.1.1 in H264
|
||||
* specification.
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
FT_FIND_SKIP_PARAMS isvce_find_pskip_params;
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief The function computes the parameters for a P skip MB
|
||||
*
|
||||
* @par Description:
|
||||
* The function computes the parameters for a P skip MB
|
||||
*
|
||||
* @param[in] ps_proc
|
||||
* Process context
|
||||
*
|
||||
* @param[in] u4_for_me
|
||||
* Flag to indicate the purpose of computing skip
|
||||
*
|
||||
* @param[out] ps_pred_mv
|
||||
* Flag to indicate the current active refernce list
|
||||
*
|
||||
* @returns
|
||||
* 1) Updates skip MV in proc
|
||||
* 2) Returns if the current MB can be coded as skip or not
|
||||
*
|
||||
* @remarks The code implements the logic as described in sec 8.4.1.1 in H264
|
||||
* specification.
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
FT_FIND_SKIP_PARAMS isvce_find_pskip_params_me;
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief The function computes the parameters for a B skip MB
|
||||
*
|
||||
* @par Description:
|
||||
* The function computes the parameters for a B skip MB
|
||||
*
|
||||
* @param[in] ps_proc
|
||||
* Process context
|
||||
*
|
||||
* @param[in] u4_for_me
|
||||
* Flag to indicate the purpose of computing skip
|
||||
*
|
||||
* @param[out] ps_pred_mv
|
||||
* Flag to indicate the current active refernce list
|
||||
*
|
||||
* @returns
|
||||
* 1) Updates skip MV in proc
|
||||
* 2) Returns if the current MB can be coded as skip or not
|
||||
*
|
||||
* @remarks The code implements the logic as described in sec 8.4.1.1 in H264
|
||||
* specification.
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
FT_FIND_SKIP_PARAMS isvce_find_bskip_params;
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief The function computes the parameters for a B skip MB
|
||||
*
|
||||
* @par Description:
|
||||
* The function computes the parameters for a B skip MB
|
||||
*
|
||||
* @param[in] ps_proc
|
||||
* Process context
|
||||
*
|
||||
* @param[in] u4_for_me
|
||||
* Flag to indicate the purpose of computing skip
|
||||
*
|
||||
* @param[out] ps_pred_mv
|
||||
* Flag to indicate the current active refernce list
|
||||
*
|
||||
* @returns
|
||||
* 1) Updates skip MV in proc
|
||||
* 2) The type of SKIP [L0/L1/BI]
|
||||
*
|
||||
* @remarks
|
||||
*******************************************************************************
|
||||
*/
|
||||
FT_FIND_SKIP_PARAMS isvce_find_bskip_params_me;
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief motion vector predictor
|
||||
*
|
||||
* @par Description:
|
||||
* The routine calculates the motion vector predictor for a given block,
|
||||
* given the candidate MV predictors.
|
||||
*
|
||||
* @param[in] ps_left_mb_pu
|
||||
* pointer to left mb motion vector info
|
||||
*
|
||||
* @param[in] ps_top_row_pu
|
||||
* pointer to top & top right mb motion vector info
|
||||
*
|
||||
* @param[out] ps_pred_mv
|
||||
* pointer to candidate predictors for the current block
|
||||
*
|
||||
* @returns The x & y components of the MV predictor.
|
||||
*
|
||||
* @remarks The code implements the logic as described in sec 8.4.1.3 in H264
|
||||
* specification.
|
||||
* Assumptions : 1. Assumes Only partition of size 16x16
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
void isvce_get_mv_predictor(isvce_enc_pu_mv_t *ps_pred_mv, isvce_enc_pu_mv_t *ps_neig_mv,
|
||||
WORD32 pred_algo);
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief This fucntion evalues ME for 2 reference lists
|
||||
*
|
||||
* @par Description:
|
||||
* It evaluates skip, full-pel an half-pel and assigns the correct MV in proc
|
||||
*
|
||||
* @param[in] ps_proc
|
||||
* Process context corresponding to the job
|
||||
*
|
||||
* @returns none
|
||||
*
|
||||
* @remarks none
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
FT_ME_ALGORITHM isvce_compute_me_multi_reflist;
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief This fucntion evalues ME for single reflist [Pred L0]
|
||||
*
|
||||
* @par Description:
|
||||
* It evaluates skip, full-pel an half-pel and assigns the correct MV in proc
|
||||
*
|
||||
* @param[in] ps_proc
|
||||
* Process context corresponding to the job
|
||||
*
|
||||
* @returns none
|
||||
*
|
||||
* @remarks none
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
FT_ME_ALGORITHM isvce_compute_me_single_reflist;
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief This function initializes me ctxt
|
||||
*
|
||||
* @par Description:
|
||||
* Before dispatching the current job to me thread, the me context associated
|
||||
* with the job is initialized.
|
||||
*
|
||||
* @param[in] ps_proc
|
||||
* Process context corresponding to the job
|
||||
*
|
||||
* @returns none
|
||||
*
|
||||
* @remarks none
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
void isvce_init_me(isvce_process_ctxt_t *ps_proc);
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief This function performs motion estimation for the current NMB
|
||||
*
|
||||
* @par Description:
|
||||
* Intializes input and output pointers required by the function
|
||||
*isvce_compute_me and calls the function isvce_compute_me in a loop to
|
||||
*process NMBs.
|
||||
*
|
||||
* @param[in] ps_proc
|
||||
* Process context corresponding to the job
|
||||
*
|
||||
* @returns
|
||||
*
|
||||
* @remarks none
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
void isvce_compute_me_nmb(isvce_process_ctxt_t *ps_proc, UWORD32 u4_nmb_count);
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief This function performs MV prediction
|
||||
*
|
||||
* @par Description:
|
||||
*
|
||||
* @param[in] ps_proc
|
||||
* Process context corresponding to the job
|
||||
*
|
||||
* @returns none
|
||||
*
|
||||
* @remarks none
|
||||
* This function will update the MB availability since intra inter decision
|
||||
* should be done before the call
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
void isvce_mv_pred(isvce_process_ctxt_t *ps_proc, WORD32 i4_reflist);
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief This function approximates Pred. MV
|
||||
*
|
||||
* @par Description:
|
||||
*
|
||||
* @param[in] ps_proc
|
||||
* Process context corresponding to the job
|
||||
*
|
||||
* @returns none
|
||||
*
|
||||
* @remarks none
|
||||
* Motion estimation happens at nmb level. For cost calculations, mv is appro
|
||||
* ximated using this function
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
void isvce_mv_pred_me(isvce_process_ctxt_t *ps_proc, WORD32 i4_ref_list);
|
||||
|
||||
#endif
|
||||
191
encoder/svc/isvce_mode_stat_visualiser.c
Normal file
191
encoder/svc/isvce_mode_stat_visualiser.c
Normal file
|
|
@ -0,0 +1,191 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
* @file
|
||||
* isvce_mode_stat_visualiser.c
|
||||
*
|
||||
* @brief
|
||||
* Contains functions used for synthesising analysis YUV
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
#include "isvce_defs.h"
|
||||
|
||||
#if ENABLE_MODE_STAT_VISUALISER
|
||||
#include "ih264_typedefs.h"
|
||||
#include "isvc_macros.h"
|
||||
#include "ih264_debug.h"
|
||||
#include "isvc_defs.h"
|
||||
#include "isvc_structs.h"
|
||||
#include "isvce_structs.h"
|
||||
#include "isvce_structs.h"
|
||||
#include "ih264e_fmt_conv.h"
|
||||
#include "isvce_mode_stat_visualiser.h"
|
||||
|
||||
#define MAX_NUM_MB_MODE_VISUALISATIONS 1
|
||||
|
||||
static const UWORD8 gau1_output_file_path[] = "out.yuv";
|
||||
|
||||
static const double gd_alpha = 0.5;
|
||||
|
||||
static const UWORD8 gau1_colors[MAX_NUM_MB_MODE_VISUALISATIONS][NUM_COMPONENTS] = {
|
||||
/* Red */
|
||||
{81, 90, 240},
|
||||
};
|
||||
|
||||
UWORD32 isvce_get_msv_ctxt_size(UWORD32 u4_wd, UWORD32 u4_ht)
|
||||
{
|
||||
UWORD32 u4_size = sizeof(mode_stat_visualiser_t);
|
||||
WORD32 i4_num_luma_samples = u4_wd * u4_ht;
|
||||
WORD32 i4_num_chroma_samples = i4_num_luma_samples / 4;
|
||||
|
||||
u4_size += (i4_num_luma_samples + i4_num_chroma_samples * 2) * sizeof(UWORD8);
|
||||
|
||||
return u4_size;
|
||||
}
|
||||
|
||||
void isvce_msv_ctxt_init(isvce_codec_t *ps_codec, iv_mem_rec_t *ps_mem_rec)
|
||||
{
|
||||
mode_stat_visualiser_t *ps_mode_stat_visualiser;
|
||||
yuv_buf_props_t *ps_frame_buf;
|
||||
|
||||
WORD32 i;
|
||||
|
||||
UWORD32 u4_wd = ps_codec->s_cfg.u4_wd;
|
||||
UWORD32 u4_ht = ps_codec->s_cfg.u4_ht;
|
||||
WORD32 i4_num_luma_samples = u4_wd * u4_ht;
|
||||
WORD32 i4_num_chroma_samples = i4_num_luma_samples / 4;
|
||||
UWORD8 *pu1_buf = ps_mem_rec->pv_base;
|
||||
WORD64 i8_alloc_mem_size = isvce_get_msv_ctxt_size(u4_wd, u4_ht);
|
||||
|
||||
ps_mode_stat_visualiser = ps_codec->ps_mode_stat_visualiser =
|
||||
(mode_stat_visualiser_t *) pu1_buf;
|
||||
pu1_buf += sizeof(ps_mode_stat_visualiser[0]);
|
||||
i8_alloc_mem_size -= sizeof(ps_mode_stat_visualiser[0]);
|
||||
|
||||
ps_frame_buf = &ps_mode_stat_visualiser->s_frame_buf;
|
||||
|
||||
ps_mode_stat_visualiser->ps_output_file = fopen((const char *) gau1_output_file_path, "w");
|
||||
|
||||
ps_frame_buf->e_color_format = IV_YUV_420P;
|
||||
ps_frame_buf->u1_bit_depth = 8;
|
||||
ps_frame_buf->u4_width = u4_wd;
|
||||
ps_frame_buf->u4_height = u4_ht;
|
||||
|
||||
for(i = 0; i < NUM_COMPONENTS; i++)
|
||||
{
|
||||
UWORD8 u1_is_chroma = (((COMPONENT_TYPE) i) != Y);
|
||||
UWORD32 u4_buf_size = u1_is_chroma ? i4_num_chroma_samples : i4_num_luma_samples;
|
||||
UWORD32 u4_stride = u4_wd >> u1_is_chroma;
|
||||
|
||||
ps_frame_buf->as_component_bufs[i].pv_data = pu1_buf;
|
||||
ps_frame_buf->as_component_bufs[i].i4_data_stride = u4_stride;
|
||||
|
||||
pu1_buf += u4_buf_size;
|
||||
i8_alloc_mem_size -= u4_buf_size;
|
||||
}
|
||||
|
||||
ASSERT(i8_alloc_mem_size >= 0);
|
||||
}
|
||||
|
||||
void isvce_msv_ctxt_delete(mode_stat_visualiser_t *ps_mode_stat_visualiser)
|
||||
{
|
||||
fclose(ps_mode_stat_visualiser->ps_output_file);
|
||||
}
|
||||
|
||||
void isvce_msv_get_input_frame(mode_stat_visualiser_t *ps_mode_stat_visualiser,
|
||||
isvce_inp_buf_t *ps_inp_buf)
|
||||
{
|
||||
svc_params_t *ps_svc_params = &ps_inp_buf->s_svc_params;
|
||||
yuv_buf_props_t *ps_target_layer_yuv_buf =
|
||||
&ps_inp_buf->as_layer_yuv_buf_props[ps_svc_params->u1_num_spatial_layers - 1];
|
||||
yuv_buf_props_t *ps_frame_buf = &ps_mode_stat_visualiser->s_frame_buf;
|
||||
|
||||
ASSERT(ps_target_layer_yuv_buf->u4_width == ps_frame_buf->u4_width);
|
||||
ASSERT(ps_target_layer_yuv_buf->u4_height == ps_frame_buf->u4_height);
|
||||
ASSERT(ps_target_layer_yuv_buf->u1_bit_depth == ps_frame_buf->u1_bit_depth);
|
||||
ASSERT(ps_target_layer_yuv_buf->e_color_format == IV_YUV_420SP_UV);
|
||||
ASSERT(ps_frame_buf->u1_bit_depth == IV_YUV_420P);
|
||||
ASSERT(ps_target_layer_yuv_buf->as_component_bufs[U].i4_data_stride ==
|
||||
ps_target_layer_yuv_buf->as_component_bufs[V].i4_data_stride);
|
||||
|
||||
isvce_fmt_conv_420sp_to_420p(
|
||||
ps_target_layer_yuv_buf->as_component_bufs[Y].pv_data,
|
||||
ps_target_layer_yuv_buf->as_component_bufs[UV].pv_data,
|
||||
ps_frame_buf->as_component_bufs[Y].pv_data, ps_frame_buf->as_component_bufs[U].pv_data,
|
||||
ps_frame_buf->as_component_bufs[V].pv_data, ps_frame_buf->u4_width, ps_frame_buf->u4_height,
|
||||
ps_target_layer_yuv_buf->as_component_bufs[Y].i4_data_stride,
|
||||
ps_target_layer_yuv_buf->as_component_bufs[UV].i4_data_stride,
|
||||
ps_frame_buf->as_component_bufs[Y].i4_data_stride,
|
||||
ps_frame_buf->as_component_bufs[U].i4_data_stride, 1, 0);
|
||||
}
|
||||
|
||||
void isvce_msv_set_mode(mode_stat_visualiser_t *ps_mode_stat_visualiser,
|
||||
isvce_mb_info_t *ps_mb_info, coordinates_t *ps_mb_pos)
|
||||
{
|
||||
UWORD32 i, j, k;
|
||||
|
||||
for(i = 0; i < NUM_COMPONENTS; i++)
|
||||
{
|
||||
UWORD8 u1_is_chroma = (((COMPONENT_TYPE) i) != Y);
|
||||
UWORD32 u4_wd = MB_SIZE >> u1_is_chroma;
|
||||
UWORD32 u4_ht = MB_SIZE >> u1_is_chroma;
|
||||
UWORD8 *pu1_buf = ps_mode_stat_visualiser->s_frame_buf.as_component_bufs[i].pv_data;
|
||||
WORD32 i4_stride = ps_mode_stat_visualiser->s_frame_buf.as_component_bufs[i].i4_data_stride;
|
||||
|
||||
pu1_buf += ps_mb_pos->i4_abscissa * u4_wd + ps_mb_pos->i4_ordinate * u4_ht * i4_stride;
|
||||
|
||||
for(j = 0; j < u4_ht; j++)
|
||||
{
|
||||
for(k = 0; k < u4_wd; k++)
|
||||
{
|
||||
if(ps_mb_info->u1_residual_prediction_flag)
|
||||
{
|
||||
pu1_buf[k + j * i4_stride] =
|
||||
(UWORD8) (gd_alpha * gau1_colors[0][i] +
|
||||
(1. - gd_alpha) * pu1_buf[k + j * i4_stride] + 0.5);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void isvce_msv_dump_visualisation(mode_stat_visualiser_t *ps_mode_stat_visualiser)
|
||||
{
|
||||
WORD32 i;
|
||||
|
||||
FILE *ps_output_file = ps_mode_stat_visualiser->ps_output_file;
|
||||
yuv_buf_props_t *ps_frame_buf = &ps_mode_stat_visualiser->s_frame_buf;
|
||||
|
||||
for(i = 0; i < NUM_COMPONENTS; i++)
|
||||
{
|
||||
UWORD8 u1_is_chroma = (((COMPONENT_TYPE) i) != Y);
|
||||
UWORD32 u4_wd = ps_frame_buf->u4_width >> u1_is_chroma;
|
||||
UWORD32 u4_ht = ps_frame_buf->u4_height >> u1_is_chroma;
|
||||
UWORD32 u4_size = u4_wd * u4_ht;
|
||||
|
||||
ASSERT(u4_wd == ps_frame_buf->as_component_bufs[i].i4_data_stride);
|
||||
|
||||
fwrite(ps_frame_buf->as_component_bufs[i].pv_data, sizeof(UWORD8), u4_size, ps_output_file);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
72
encoder/svc/isvce_mode_stat_visualiser.h
Normal file
72
encoder/svc/isvce_mode_stat_visualiser.h
Normal file
|
|
@ -0,0 +1,72 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
* @file
|
||||
* isvce_mode_stat_visualiser.h
|
||||
*
|
||||
* @brief
|
||||
* Contains function declarations for function declared in
|
||||
* isvce_mode_stat_visualiser.c
|
||||
*
|
||||
* @author
|
||||
* ittiam
|
||||
*
|
||||
* @remarks
|
||||
* None
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef _ISVCE_MODE_STAT_VISUALISER_H_
|
||||
#define _ISVCE_MODE_STAT_VISUALISER_H_
|
||||
#if ENABLE_MODE_STAT_VISUALISER
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#include "ih264_typedefs.h"
|
||||
#include "isvc_structs.h"
|
||||
#include "isvce_structs.h"
|
||||
|
||||
typedef struct mode_stat_visualiser_t
|
||||
{
|
||||
FILE *ps_output_file;
|
||||
|
||||
yuv_buf_props_t s_frame_buf;
|
||||
|
||||
} mode_stat_visualiser_t;
|
||||
|
||||
extern UWORD32 isvce_get_msv_ctxt_size(UWORD32 u4_wd, UWORD32 u4_ht);
|
||||
|
||||
extern void isvce_msv_ctxt_init(isvce_codec_t *ps_codec, iv_mem_rec_t *ps_mem_rec);
|
||||
|
||||
extern void isvce_msv_ctxt_delete(mode_stat_visualiser_t *ps_mode_stat_visualiser);
|
||||
|
||||
extern void isvce_msv_get_input_frame(mode_stat_visualiser_t *ps_mode_stat_visualiser,
|
||||
isvce_inp_buf_t *ps_inp_buf);
|
||||
|
||||
extern void isvce_msv_dump_visualisation(mode_stat_visualiser_t *ps_mode_stat_visualiser);
|
||||
|
||||
extern void isvce_msv_set_mode(mode_stat_visualiser_t *ps_mode_stat_visualiser,
|
||||
isvce_mb_info_t *ps_mb_info, coordinates_t *ps_mb_pos);
|
||||
#endif
|
||||
|
||||
#endif
|
||||
124
encoder/svc/isvce_nalu_stat_aggregator.c
Normal file
124
encoder/svc/isvce_nalu_stat_aggregator.c
Normal file
|
|
@ -0,0 +1,124 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
* @file
|
||||
* isvce_nalu_stat_aggregator.c
|
||||
*
|
||||
* @brief
|
||||
* Contains objects used for aggregating nalu statistics
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "ih264_typedefs.h"
|
||||
#include "iv2.h"
|
||||
#include "isvce_structs.h"
|
||||
#include "isvce_nalu_stat_aggregator.h"
|
||||
|
||||
void isvce_nalu_info_au_init(nalu_descriptors_t *ps_nalu_descriptor, UWORD8 u1_num_spatial_layers)
|
||||
{
|
||||
WORD32 i;
|
||||
|
||||
for(i = 0; i < u1_num_spatial_layers; i++)
|
||||
{
|
||||
ps_nalu_descriptor[i].u1_num_nalus = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void isvce_nalu_info_csv_translator(nalu_descriptors_t *ps_nalu_descriptor,
|
||||
isvce_nalu_info_buf_t *ps_csv_buf)
|
||||
{
|
||||
char ac_csv_string[MAX_BYTES_PER_NALU_INFO];
|
||||
WORD32 i;
|
||||
|
||||
WORD64 i8_num_bytes_available = ps_csv_buf->u4_buf_size - ps_csv_buf->u4_num_bytes;
|
||||
|
||||
for(i = 0; i < ps_nalu_descriptor->u1_num_nalus; i++)
|
||||
{
|
||||
if(ps_nalu_descriptor->as_nalu_info[i].b_is_vcl_nal)
|
||||
{
|
||||
snprintf(ac_csv_string, MAX_BYTES_PER_NALU_INFO, "%d,%u,%d,%d,%d,%d,%d\n",
|
||||
ps_nalu_descriptor->as_nalu_info[i].e_nalu_type,
|
||||
(UWORD32) (ps_nalu_descriptor->as_nalu_info[i].i8_num_bits / 8),
|
||||
ps_nalu_descriptor->as_nalu_info[i].u1_spatial_layer_id,
|
||||
ps_nalu_descriptor->as_nalu_info[i].u1_temporal_layer_id,
|
||||
ps_nalu_descriptor->as_nalu_info[i].b_is_idr, 1, 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
snprintf(ac_csv_string, MAX_BYTES_PER_NALU_INFO, "%d,%u,%d,%d,%d,%d,%d\n",
|
||||
ps_nalu_descriptor->as_nalu_info[i].e_nalu_type,
|
||||
(UWORD32) (ps_nalu_descriptor->as_nalu_info[i].i8_num_bits / 8), -1, -1, -1,
|
||||
-1, -1);
|
||||
}
|
||||
|
||||
snprintf((char *) (ps_csv_buf->pu1_buf + ps_csv_buf->u4_num_bytes), i8_num_bytes_available,
|
||||
"%s", ac_csv_string);
|
||||
|
||||
ps_csv_buf->u4_num_bytes = (UWORD32) strlen((char *) ps_csv_buf->pu1_buf);
|
||||
i8_num_bytes_available = ps_csv_buf->u4_buf_size - ps_csv_buf->u4_num_bytes;
|
||||
|
||||
ASSERT(i8_num_bytes_available >= 0);
|
||||
}
|
||||
}
|
||||
|
||||
nalu_info_t *isvce_get_next_nalu_info_buf(nalu_descriptors_t *ps_nalu_descriptor)
|
||||
{
|
||||
return &ps_nalu_descriptor->as_nalu_info[ps_nalu_descriptor->u1_num_nalus];
|
||||
}
|
||||
|
||||
void isvce_nalu_info_buf_init(nalu_info_t *ps_nalu_info, WORD64 i8_init_bits,
|
||||
NAL_UNIT_TYPE_T e_nalu_type, UWORD8 u1_spatial_layer_id,
|
||||
UWORD8 u1_temporal_layer_id, UWORD8 u1_num_slices, bool b_is_idr)
|
||||
{
|
||||
ps_nalu_info->e_nalu_type = e_nalu_type;
|
||||
ps_nalu_info->i8_num_bits = i8_init_bits;
|
||||
ps_nalu_info->b_is_idr = b_is_idr;
|
||||
|
||||
switch(e_nalu_type)
|
||||
{
|
||||
case NAL_SLICE_NON_IDR:
|
||||
case NAL_SLICE_IDR:
|
||||
case NAL_CODED_SLICE_EXTENSION:
|
||||
{
|
||||
ps_nalu_info->b_is_vcl_nal = true;
|
||||
ps_nalu_info->u1_spatial_layer_id = u1_spatial_layer_id;
|
||||
ps_nalu_info->u1_temporal_layer_id = u1_temporal_layer_id;
|
||||
ps_nalu_info->u1_num_slices = u1_num_slices;
|
||||
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
ps_nalu_info->b_is_vcl_nal = false;
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void isvce_update_nalu_count(nalu_descriptors_t *ps_nalu_descriptor)
|
||||
{
|
||||
ps_nalu_descriptor->u1_num_nalus++;
|
||||
}
|
||||
99
encoder/svc/isvce_nalu_stat_aggregator.h
Normal file
99
encoder/svc/isvce_nalu_stat_aggregator.h
Normal file
|
|
@ -0,0 +1,99 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
* @file
|
||||
* isvce_nalu_stat_aggregator.h
|
||||
*
|
||||
* @brief
|
||||
* Contains objects used for aggregating nalu statistics
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef _ISVCE_NALU_STAT_AGGREGATOR_H_
|
||||
#define _ISVCE_NALU_STAT_AGGREGATOR_H_
|
||||
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "ih264_typedefs.h"
|
||||
#include "isvce.h"
|
||||
#include "isvc_defs.h"
|
||||
#include "isvce_defs.h"
|
||||
|
||||
/* Macros */
|
||||
/* +1 for '\0' */
|
||||
#define MAX_BYTES_PER_NALU_INFO (45 + 1)
|
||||
|
||||
/* SPS + (MAX_NUM_SPATIAL_LAYERS - 1) * SUBSET_SPS +
|
||||
* MAX_NUM_SPATIAL_LAYERS * PPS + */
|
||||
/* 1 PREFIX_NALU + 1 SLICE_[NON|]IDR + (MAX_NUM_SPATIAL_LAYERS - 1) *
|
||||
* CODED_SLICE_EXTENSION */
|
||||
#define MAX_NALU_PER_LAYER 10
|
||||
|
||||
/* Structs */
|
||||
typedef struct nalu_info_t
|
||||
{
|
||||
NAL_UNIT_TYPE_T e_nalu_type;
|
||||
|
||||
WORD64 i8_num_bits;
|
||||
|
||||
bool b_is_vcl_nal;
|
||||
|
||||
bool b_is_idr;
|
||||
|
||||
UWORD8 u1_spatial_layer_id;
|
||||
|
||||
UWORD8 u1_temporal_layer_id;
|
||||
|
||||
UWORD8 u1_num_slices;
|
||||
} nalu_info_t;
|
||||
|
||||
typedef struct nalu_descriptors_t
|
||||
{
|
||||
nalu_info_t as_nalu_info[MAX_NALU_PER_LAYER];
|
||||
|
||||
UWORD8 u1_num_nalus;
|
||||
|
||||
} nalu_descriptors_t;
|
||||
|
||||
/* Function declarations */
|
||||
static FORCEINLINE UWORD32 isvce_get_nalu_info_buf_size(UWORD8 u1_num_spatial_layers)
|
||||
{
|
||||
return MAX_NALU_PER_LAYER * u1_num_spatial_layers * MAX_BYTES_PER_NALU_INFO;
|
||||
}
|
||||
|
||||
extern void isvce_nalu_info_au_init(nalu_descriptors_t *ps_nalu_descriptor,
|
||||
UWORD8 u1_num_spatial_layers);
|
||||
|
||||
extern void isvce_nalu_info_csv_translator(nalu_descriptors_t *ps_nalu_descriptor,
|
||||
isvce_nalu_info_buf_t *ps_csv_buf);
|
||||
|
||||
extern nalu_info_t *isvce_get_next_nalu_info_buf(nalu_descriptors_t *ps_nalu_descriptor);
|
||||
|
||||
extern void isvce_nalu_info_buf_init(nalu_info_t *ps_nalu_info, WORD64 i8_init_bytes,
|
||||
NAL_UNIT_TYPE_T e_nalu_type, UWORD8 u1_spatial_layer_id,
|
||||
UWORD8 u1_temporal_layer_id, UWORD8 u1_num_slices,
|
||||
bool b_is_idr);
|
||||
|
||||
extern void isvce_update_nalu_count(nalu_descriptors_t *ps_nalu_descriptor);
|
||||
|
||||
#endif
|
||||
156
encoder/svc/isvce_pred_structs.h
Normal file
156
encoder/svc/isvce_pred_structs.h
Normal file
|
|
@ -0,0 +1,156 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
* @file
|
||||
* isvce_pred_structs.h
|
||||
*
|
||||
* @brief
|
||||
* Contains struct definition used for prediction
|
||||
*
|
||||
* @author
|
||||
* ittiam
|
||||
*
|
||||
* @remarks
|
||||
* None
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef _ISVCE_PRED_STRUCTS_H_
|
||||
#define _ISVCE_PRED_STRUCTS_H_
|
||||
|
||||
#include "ih264_typedefs.h"
|
||||
#include "isvc_defs.h"
|
||||
#include "isvc_structs.h"
|
||||
#include "isvce_defs.h"
|
||||
|
||||
/**
|
||||
* PU information
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
/**
|
||||
* Motion Vector
|
||||
*/
|
||||
mv_t s_mv;
|
||||
|
||||
/**
|
||||
* Ref index
|
||||
*/
|
||||
WORD8 i1_ref_idx;
|
||||
|
||||
} isvce_enc_pu_mv_t;
|
||||
|
||||
/*
|
||||
* Total Pu info for an MB
|
||||
*/
|
||||
typedef struct isvce_enc_pu_t
|
||||
{
|
||||
/* Array with ME info for all lists */
|
||||
isvce_enc_pu_mv_t as_me_info[NUM_PRED_DIRS];
|
||||
|
||||
UWORD8 au1_mvp_idx[NUM_PRED_DIRS];
|
||||
|
||||
/**
|
||||
* PU X position in terms of min PU (4x4) units
|
||||
*/
|
||||
UWORD8 u1_pos_x_in_4x4;
|
||||
|
||||
/**
|
||||
* PU Y position in terms of min PU (4x4) units
|
||||
*/
|
||||
UWORD8 u1_pos_y_in_4x4;
|
||||
|
||||
/**
|
||||
* PU width in pixels = (u1_wd_in_4x4_m1 + 1) << 2
|
||||
*/
|
||||
UWORD8 u1_wd_in_4x4_m1;
|
||||
|
||||
/**
|
||||
* PU height in pixels = (u1_ht_in_4x4_m1 + 1) << 2
|
||||
*/
|
||||
UWORD8 u1_ht_in_4x4_m1;
|
||||
|
||||
/**
|
||||
* PRED_L0, PRED_L1, PRED_BI
|
||||
*/
|
||||
UWORD8 u1_pred_mode;
|
||||
|
||||
} isvce_enc_pu_t;
|
||||
|
||||
typedef struct intra4x4_mode_data_t
|
||||
{
|
||||
UWORD8 u1_predicted_mode;
|
||||
|
||||
UWORD8 u1_mode;
|
||||
|
||||
} intra4x4_mode_data_t;
|
||||
|
||||
typedef intra4x4_mode_data_t intra8x8_mode_data_t;
|
||||
|
||||
typedef struct intra16x16_mode_data_t
|
||||
{
|
||||
UWORD8 u1_mode;
|
||||
|
||||
} intra16x16_mode_data_t;
|
||||
|
||||
typedef struct enc_intra_pu_t
|
||||
{
|
||||
intra4x4_mode_data_t as_i4x4_mode_data[MAX_TU_IN_MB];
|
||||
|
||||
intra8x8_mode_data_t as_i8x8_mode_data[MIN_TU_IN_MB];
|
||||
|
||||
intra16x16_mode_data_t s_i16x16_mode_data;
|
||||
|
||||
UWORD8 u1_chroma_intra_mode;
|
||||
|
||||
} enc_intra_pu_t;
|
||||
|
||||
typedef struct isvce_mb_info_t
|
||||
{
|
||||
isvce_enc_pu_t as_pu[ENC_MAX_PU_IN_MB];
|
||||
|
||||
enc_intra_pu_t s_intra_pu;
|
||||
|
||||
UWORD32 u4_cbp;
|
||||
|
||||
UWORD32 u4_csbp;
|
||||
|
||||
UWORD32 u4_res_csbp;
|
||||
|
||||
UWORD16 u2_mb_type;
|
||||
|
||||
WORD32 i4_mb_distortion;
|
||||
|
||||
UWORD8 u1_base_mode_flag;
|
||||
|
||||
UWORD8 u1_residual_prediction_flag;
|
||||
|
||||
UWORD8 u1_tx_size;
|
||||
|
||||
UWORD8 u1_mb_qp;
|
||||
|
||||
UWORD8 u1_is_intra;
|
||||
|
||||
} isvce_mb_info_t;
|
||||
|
||||
#endif
|
||||
2794
encoder/svc/isvce_process.c
Normal file
2794
encoder/svc/isvce_process.c
Normal file
File diff suppressed because it is too large
Load diff
285
encoder/svc/isvce_process.h
Normal file
285
encoder/svc/isvce_process.h
Normal file
|
|
@ -0,0 +1,285 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
* @file
|
||||
* isvce_process.h
|
||||
*
|
||||
* @brief
|
||||
* Contains functions for codec thread
|
||||
*
|
||||
* @author
|
||||
* ittiam
|
||||
*
|
||||
* @remarks
|
||||
* None
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef _ISVCE_PROCESS_H_
|
||||
#define _ISVCE_PROCESS_H_
|
||||
|
||||
/*****************************************************************************/
|
||||
/* Function Declarations */
|
||||
/*****************************************************************************/
|
||||
|
||||
/**
|
||||
******************************************************************************
|
||||
*
|
||||
* @brief This function generates sps, pps set on request
|
||||
*
|
||||
* @par Description
|
||||
* When the encoder is set in header generation mode, the following function
|
||||
* is called. This generates sps and pps headers and returns the control back
|
||||
* to caller.
|
||||
*
|
||||
* @param[in] ps_codec
|
||||
* pointer to codec context
|
||||
*
|
||||
* @return success or failure error code
|
||||
*
|
||||
******************************************************************************
|
||||
*/
|
||||
IH264E_ERROR_T isvce_generate_sps_pps(isvce_codec_t *ps_codec, isvce_inp_buf_t *ps_inp_buf);
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief initialize entropy context.
|
||||
*
|
||||
* @par Description:
|
||||
* Before invoking the call to perform to entropy coding the entropy context
|
||||
* associated with the job needs to be initialized. This involves the start
|
||||
* mb address, end mb address, slice index and the pointer to location at
|
||||
* which the mb residue info and mb header info are packed.
|
||||
*
|
||||
* @param[in] ps_proc
|
||||
* Pointer to the current process context
|
||||
*
|
||||
* @returns error status
|
||||
*
|
||||
* @remarks none
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
IH264E_ERROR_T isvce_init_entropy_ctxt(isvce_process_ctxt_t *ps_proc);
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief entry point for entropy coding
|
||||
*
|
||||
* @par Description
|
||||
* This function calls lower level functions to perform entropy coding for a
|
||||
* group (n rows) of mb's. After encoding 1 row of mb's, the function takes
|
||||
* back the control, updates the ctxt and calls lower level functions again.
|
||||
* This process is repeated till all the rows or group of mb's (which ever is
|
||||
* minimum) are coded
|
||||
*
|
||||
* @param[in] ps_proc
|
||||
* process context
|
||||
*
|
||||
* @returns error status
|
||||
*
|
||||
* @remarks
|
||||
* NOTE : It is assumed that this routine is invoked at the start of a slice,
|
||||
* so the slice header is generated by default.
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
IH264E_ERROR_T isvce_entropy(isvce_process_ctxt_t *ps_proc);
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief Packs header information of a mb in to a buffer
|
||||
*
|
||||
* @par Description:
|
||||
* After the deciding the mode info of a macroblock, the syntax elements
|
||||
* associated with the mb are packed and stored. The entropy thread unpacks
|
||||
* this buffer and generates the end bit stream.
|
||||
*
|
||||
* @param[in] ps_proc
|
||||
* Pointer to the current process context
|
||||
*
|
||||
* @returns error status
|
||||
*
|
||||
* @remarks none
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
IH264E_ERROR_T isvce_pack_header_data(isvce_process_ctxt_t *ps_proc);
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief update process context after encoding an mb. This involves preserving
|
||||
* the current mb information for later use, initialize the proc ctxt elements to
|
||||
* encode next mb.
|
||||
*
|
||||
* @par Description:
|
||||
* This function performs house keeping tasks after encoding an mb.
|
||||
* After encoding an mb, various elements of the process context needs to be
|
||||
* updated to encode the next mb. For instance, the source, recon and reference
|
||||
* pointers, mb indices have to be adjusted to the next mb. The slice index of
|
||||
* the current mb needs to be updated. If mb qp modulation is enabled, then if
|
||||
* the qp changes the quant param structure needs to be updated. Also to
|
||||
*encoding the next mb, the current mb info is used as part of mode prediction or
|
||||
*mv prediction. Hence the current mb info has to preserved at top/top left/left
|
||||
* locations.
|
||||
*
|
||||
* @param[in] ps_proc
|
||||
* Pointer to the current process context
|
||||
*
|
||||
* @returns none
|
||||
*
|
||||
* @remarks none
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
WORD32 isvce_update_proc_ctxt(isvce_process_ctxt_t *ps_proc);
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief initialize process context.
|
||||
*
|
||||
* @par Description:
|
||||
* Before dispatching the current job to process thread, the process context
|
||||
* associated with the job is initialized. Usually every job aims to encode one
|
||||
* row of mb's. Basing on the row indices provided by the job, the process
|
||||
* context's buffer ptrs, slice indices and other elements that are necessary
|
||||
* during core-coding are initialized.
|
||||
*
|
||||
* @param[in] ps_proc
|
||||
* Pointer to the current process context
|
||||
*
|
||||
* @returns error status
|
||||
*
|
||||
* @remarks none
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
IH264E_ERROR_T isvce_init_proc_ctxt(isvce_process_ctxt_t *ps_proc);
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief This function performs luma & chroma padding
|
||||
*
|
||||
* @par Description:
|
||||
*
|
||||
* @param[in] ps_proc
|
||||
* Process context corresponding to the job
|
||||
*
|
||||
* @param[in] pu1_curr_pic_luma
|
||||
* Pointer to luma buffer
|
||||
*
|
||||
* @param[in] pu1_curr_pic_chroma
|
||||
* Pointer to chroma buffer
|
||||
*
|
||||
* @param[in] i4_mb_x
|
||||
* mb index x
|
||||
*
|
||||
* @param[in] i4_mb_y
|
||||
* mb index y
|
||||
*
|
||||
* @param[in] i4_pad_ht
|
||||
* number of rows to be padded
|
||||
*
|
||||
* @returns error status
|
||||
*
|
||||
* @remarks none
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
IH264E_ERROR_T isvce_pad_recon_buffer(isvce_process_ctxt_t *ps_proc, UWORD8 *pu1_curr_pic_luma,
|
||||
WORD32 i4_luma_stride, UWORD8 *pu1_curr_pic_chroma,
|
||||
WORD32 i4_chroma_stride, WORD32 i4_mb_x, WORD32 i4_mb_y,
|
||||
WORD32 i4_pad_ht);
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief This function performs luma half pel planes generation
|
||||
*
|
||||
* @par Description:
|
||||
*
|
||||
* @param[in] ps_proc
|
||||
* Process context corresponding to the job
|
||||
*
|
||||
* @returns error status
|
||||
*
|
||||
* @remarks none
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
IH264E_ERROR_T isvce_halfpel_generation(isvce_process_ctxt_t *ps_proc, UWORD8 *pu1_curr_pic_luma,
|
||||
WORD32 i4_mb_x, WORD32 i4_mb_y);
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief This function performs luma & chroma core coding for a set of mb's.
|
||||
*
|
||||
* @par Description:
|
||||
* The mb to be coded is taken and is evaluated over a predefined set of modes
|
||||
* (intra (i16, i4, i8)/inter (mv, skip)) for best cost. The mode with least
|
||||
*cost is selected and using intra/inter prediction filters, prediction is
|
||||
*carried out. The deviation between src and pred signal constitutes error
|
||||
*signal. This error signal is transformed (hierarchical transform if necessary)
|
||||
*and quantized. The quantized residue is packed in to entropy buffer for entropy
|
||||
*coding. This is repeated for all the mb's enlisted under the job.
|
||||
*
|
||||
* @param[in] ps_proc
|
||||
* Process context corresponding to the job
|
||||
*
|
||||
* @returns error status
|
||||
*
|
||||
* @remarks none
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
WORD32 isvce_process(isvce_process_ctxt_t *ps_proc);
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* entry point of a spawned encoder thread
|
||||
*
|
||||
* @par Description:
|
||||
* The encoder thread dequeues a proc/entropy job from the encoder queue and
|
||||
* calls necessary routines.
|
||||
*
|
||||
* @param[in] pv_proc
|
||||
* Process context corresponding to the thread
|
||||
*
|
||||
* @returns error status
|
||||
*
|
||||
* @remarks
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
WORD32 isvce_process_thread(void *pv_proc);
|
||||
|
||||
#endif
|
||||
716
encoder/svc/isvce_rate_control.c
Normal file
716
encoder/svc/isvce_rate_control.c
Normal file
|
|
@ -0,0 +1,716 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
* @file
|
||||
* isvce_rate_control.c
|
||||
*
|
||||
* @brief
|
||||
* Contains api function definitions for h264 rate control
|
||||
*
|
||||
* @author
|
||||
* ittiam
|
||||
*
|
||||
* @par List of Functions:
|
||||
* - isvce_rc_init()
|
||||
* - isvce_rc_get_picture_details()
|
||||
* - isvce_rc_pre_enc()
|
||||
* - isvce_update_rc_mb_info()
|
||||
* - isvce_rc_get_buffer_status()
|
||||
* - isvce_rc_post_enc()
|
||||
* - isvce_update_rc_bits_info()
|
||||
*
|
||||
* @remarks
|
||||
* None
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
/*****************************************************************************/
|
||||
/* File Includes */
|
||||
/*****************************************************************************/
|
||||
|
||||
#include "ih264_typedefs.h"
|
||||
#include "irc_datatypes.h"
|
||||
#include "iv2.h"
|
||||
#include "ive2.h"
|
||||
#include "isvce.h"
|
||||
#include "isvc_defs.h"
|
||||
#include "isvc_macros.h"
|
||||
#include "isvc_structs.h"
|
||||
#include "isvc_trans_quant_itrans_iquant.h"
|
||||
#include "isvc_inter_pred_filters.h"
|
||||
#include "isvc_mem_fns.h"
|
||||
#include "ih264_padding.h"
|
||||
#include "ih264_intra_pred_filters.h"
|
||||
#include "ih264_deblk_edge_filters.h"
|
||||
#include "isvc_common_tables.h"
|
||||
#include "isvc_cabac_tables.h"
|
||||
#include "isvce_defs.h"
|
||||
#include "isvce_globals.h"
|
||||
#include "irc_mem_req_and_acq.h"
|
||||
#include "irc_cntrl_param.h"
|
||||
#include "irc_frame_info_collector.h"
|
||||
#include "irc_rate_control_api.h"
|
||||
#include "ih264e_time_stamp.h"
|
||||
#include "ih264e_modify_frm_rate.h"
|
||||
#include "isvce_rate_control.h"
|
||||
#include "ih264e_error.h"
|
||||
#include "ih264e_time_stamp.h"
|
||||
#include "ih264e_bitstream.h"
|
||||
#include "ime_distortion_metrics.h"
|
||||
#include "ime_defs.h"
|
||||
#include "ime_structs.h"
|
||||
#include "isvce_cabac_structs.h"
|
||||
#include "isvce_structs.h"
|
||||
#include "ih264e_utils.h"
|
||||
#include "irc_trace_support.h"
|
||||
|
||||
/*****************************************************************************/
|
||||
/* Function Definitions */
|
||||
/*****************************************************************************/
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* This function initializes rate control context and variables
|
||||
*
|
||||
* @par Description
|
||||
* This function initializes rate control type, source and target frame rate,
|
||||
* average and peak bitrate, intra-inter frame interval and initial
|
||||
* quantization parameter
|
||||
*
|
||||
* @param[in] pv_rc_api
|
||||
* Handle to rate control api
|
||||
*
|
||||
* @param[in] pv_frame_time
|
||||
* Handle to frame time context
|
||||
*
|
||||
* @param[in] pv_time_stamp
|
||||
* Handle to time stamp context
|
||||
*
|
||||
* @param[in] pv_pd_frm_rate
|
||||
* Handle to pull down frame time context
|
||||
*
|
||||
* @param[in] u4_max_frm_rate
|
||||
* Maximum frame rate
|
||||
*
|
||||
* @param[in] u4_src_frm_rate
|
||||
* Source frame rate
|
||||
*
|
||||
* @param[in] u4_tgt_frm_rate
|
||||
* Target frame rate
|
||||
*
|
||||
* @param[in] e_rate_control_type
|
||||
* Rate control type
|
||||
*
|
||||
* @param[in] u4_avg_bit_rate
|
||||
* Average bit rate
|
||||
*
|
||||
* @param[in] u4_peak_bit_rate
|
||||
* Peak bit rate
|
||||
*
|
||||
* @param[in] u4_max_delay
|
||||
* Maximum delay between frames
|
||||
*
|
||||
* @param[in] u4_intra_frame_interval
|
||||
* Intra frame interval
|
||||
*
|
||||
* @param[in] pu1_init_qp
|
||||
* Initial qp
|
||||
*
|
||||
* @param[in] i4_max_inter_frm_int
|
||||
* Maximum inter frame interval
|
||||
*
|
||||
* @param[in] pu1_min_max_qp
|
||||
* Array of min/max qp
|
||||
*
|
||||
* @param[in] u1_profile_level
|
||||
* Encoder profile level
|
||||
*
|
||||
* @returns none
|
||||
*
|
||||
* @remarks
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
void isvce_rc_init(void *pv_rc_api, void *pv_frame_time, void *pv_time_stamp, void *pv_pd_frm_rate,
|
||||
UWORD32 u4_max_frm_rate, UWORD32 u4_src_frm_rate, UWORD32 u4_tgt_frm_rate,
|
||||
rc_type_e e_rate_control_type, UWORD32 u4_avg_bit_rate, UWORD32 u4_peak_bit_rate,
|
||||
UWORD32 u4_max_delay, UWORD32 u4_intra_frame_interval, WORD32 i4_inter_frm_int,
|
||||
UWORD8 *pu1_init_qp, WORD32 i4_max_inter_frm_int, UWORD8 *pu1_min_max_qp,
|
||||
UWORD8 u1_profile_level)
|
||||
{
|
||||
// UWORD8 u1_is_mb_level_rc_on = 0;
|
||||
UWORD32 au4_peak_bit_rate[2] = {0, 0};
|
||||
UWORD32 u4_min_bit_rate = 0;
|
||||
WORD32 i4_is_gop_closed = 1;
|
||||
// WORD32 i4_use_est_intra_sad = 1;
|
||||
UWORD32 u4_src_ticks = 0;
|
||||
UWORD32 u4_tgt_ticks = 0;
|
||||
UWORD8 u1_level_idx = ih264e_get_lvl_idx(u1_profile_level);
|
||||
UWORD32 u4_max_cpb_size = 1200 * gas_isvc_lvl_tbl[u1_level_idx].u4_max_cpb_size;
|
||||
|
||||
/* Fill the params needed for the RC init */
|
||||
if(e_rate_control_type == CBR_NLDRC)
|
||||
{
|
||||
au4_peak_bit_rate[0] = u4_avg_bit_rate;
|
||||
au4_peak_bit_rate[1] = u4_avg_bit_rate;
|
||||
}
|
||||
else
|
||||
{
|
||||
au4_peak_bit_rate[0] = u4_peak_bit_rate;
|
||||
au4_peak_bit_rate[1] = u4_peak_bit_rate;
|
||||
}
|
||||
|
||||
/* Initialize frame time computation module*/
|
||||
ih264e_init_frame_time(pv_frame_time, u4_src_frm_rate, /* u4_src_frm_rate */
|
||||
u4_tgt_frm_rate); /* u4_tgt_frm_rate */
|
||||
|
||||
/* Initialize the pull_down frame rate */
|
||||
ih264e_init_pd_frm_rate(pv_pd_frm_rate, u4_src_frm_rate); /* u4_input_frm_rate */
|
||||
|
||||
/* Initialize time stamp structure */
|
||||
ih264e_init_time_stamp(pv_time_stamp, u4_max_frm_rate, /* u4_max_frm_rate */
|
||||
u4_src_frm_rate); /* u4_src_frm_rate */
|
||||
|
||||
u4_src_ticks = ih264e_frame_time_get_src_ticks(pv_frame_time);
|
||||
u4_tgt_ticks = ih264e_frame_time_get_tgt_ticks(pv_frame_time);
|
||||
|
||||
/* Init max_inter_frame int */
|
||||
i4_max_inter_frm_int = (i4_inter_frm_int == 1) ? 2 : (i4_inter_frm_int + 2);
|
||||
|
||||
/* Initialize the rate control */
|
||||
irc_initialise_rate_control(
|
||||
pv_rc_api, /* RC handle */
|
||||
e_rate_control_type, /* RC algo type */
|
||||
0, /* MB activity on/off */
|
||||
u4_avg_bit_rate, /* Avg Bitrate */
|
||||
au4_peak_bit_rate, /* Peak bitrate array[2]:[I][P] */
|
||||
u4_min_bit_rate, /* Min Bitrate */
|
||||
u4_src_frm_rate, /* Src frame_rate */
|
||||
u4_max_delay, /* Max buffer delay */
|
||||
u4_intra_frame_interval, /* Intra frm_interval */
|
||||
i4_inter_frm_int, /* Inter frame interval */
|
||||
pu1_init_qp, /* Init QP array[3]:[I][P][B] */
|
||||
u4_max_cpb_size, /* Max VBV/CPB Buffer Size */
|
||||
i4_max_inter_frm_int, /* Max inter frm_interval */
|
||||
i4_is_gop_closed, /* Open/Closed GOP */
|
||||
pu1_min_max_qp, /* Min-max QP
|
||||
array[6]:[Imax][Imin][Pmax][Pmin][Bmax][Bmin] */
|
||||
0, /* How to calc the I-frame estimated_sad */
|
||||
u4_src_ticks, /* Src_ticks = LCM(src_frm_rate,tgt_frm_rate)/src_frm_rate
|
||||
*/
|
||||
u4_tgt_ticks); /* Tgt_ticks = LCM(src_frm_rate,tgt_frm_rate)/tgt_frm_rate
|
||||
*/
|
||||
}
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief Function to get picture details
|
||||
*
|
||||
* @par Description
|
||||
* This function returns the Picture type(I/P/B)
|
||||
*
|
||||
* @param[in] pv_rc_api
|
||||
* Handle to Rate control api
|
||||
*
|
||||
* @returns
|
||||
* Picture type
|
||||
*
|
||||
* @remarks none
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
picture_type_e isvce_rc_get_picture_details(void *pv_rc_api, WORD32 *pi4_pic_id,
|
||||
WORD32 *pi4_pic_disp_order_no)
|
||||
{
|
||||
picture_type_e e_rc_pic_type = P_PIC;
|
||||
|
||||
irc_get_picture_details(pv_rc_api, pi4_pic_id, pi4_pic_disp_order_no, &e_rc_pic_type);
|
||||
|
||||
return (e_rc_pic_type);
|
||||
}
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief Function to get rate control output before encoding
|
||||
*
|
||||
* @par Description
|
||||
* This function is called before queing the current frame. It decides if we
|
||||
*should skip the current iput buffer due to frame rate mismatch. It also updates
|
||||
*RC about the acehivble frame rate
|
||||
*
|
||||
* @param[in] ps_rate_control_api
|
||||
* Handle to rate control api
|
||||
*
|
||||
* @param[in] ps_pd_frm_rate
|
||||
* Handle to pull down frm rate context
|
||||
*
|
||||
* @param[in] ps_time_stamp
|
||||
* Handle to time stamp context
|
||||
*
|
||||
* @param[in] ps_frame_time
|
||||
* Handle to frame time context
|
||||
*
|
||||
* @param[in] i4_delta_time_stamp
|
||||
* Time stamp difference between frames
|
||||
*
|
||||
* @param[in] i4_total_mb_in_frame
|
||||
* Total Macro Blocks in frame
|
||||
*
|
||||
* @param[in/out] pe_vop_coding_type
|
||||
* Picture coding type(I/P/B)
|
||||
*
|
||||
* @param[in/out] pu1_frame_qp
|
||||
* QP for current frame
|
||||
*
|
||||
* @returns
|
||||
* Skip or queue the current frame
|
||||
*
|
||||
* @remarks
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
WORD32 isvce_update_rc_framerates(void *ps_rate_control_api, void *ps_pd_frm_rate,
|
||||
void *ps_time_stamp, void *ps_frame_time)
|
||||
{
|
||||
WORD8 i4_skip_src = 0;
|
||||
UWORD32 u4_src_not_skipped_for_dts = 0;
|
||||
|
||||
/* Update the time stamp for the current frame */
|
||||
ih264e_update_time_stamp(ps_time_stamp);
|
||||
|
||||
/* Check if a src not needs to be skipped */
|
||||
i4_skip_src = ih264e_should_src_be_skipped(ps_frame_time, 1, &u4_src_not_skipped_for_dts);
|
||||
|
||||
if(i4_skip_src)
|
||||
{
|
||||
/***********************************************************************
|
||||
*Based on difference in source and target frame rate frames are skipped
|
||||
***********************************************************************/
|
||||
/*update the missing frames frm_rate with 0 */
|
||||
ih264e_update_pd_frm_rate(ps_pd_frm_rate, 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
WORD32 i4_avg_frm_rate, i4_source_frame_rate;
|
||||
|
||||
i4_source_frame_rate = ih264e_frame_time_get_src_frame_rate(ps_frame_time);
|
||||
|
||||
/* Update the frame rate of the frame present with the tgt_frm_rate */
|
||||
/* If the frm was not skipped due to delta_time_stamp, update the
|
||||
frame_rate with double the tgt_frame_rate value, so that it makes
|
||||
up for one of the frames skipped by the application */
|
||||
ih264e_update_pd_frm_rate(ps_pd_frm_rate, i4_source_frame_rate);
|
||||
|
||||
/* Based on the update get the average frame rate */
|
||||
i4_avg_frm_rate = ih264e_get_pd_avg_frm_rate(ps_pd_frm_rate);
|
||||
|
||||
/* Call the RC library function to change the frame_rate to the
|
||||
actually achieved frm_rate */
|
||||
irc_change_frm_rate_for_bit_alloc(ps_rate_control_api, i4_avg_frm_rate);
|
||||
}
|
||||
|
||||
return (i4_skip_src);
|
||||
}
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief Function to update mb info for rate control context
|
||||
*
|
||||
* @par Description
|
||||
* After encoding a mb, information such as mb type, qp used, mb distortion
|
||||
* resulted in encoding the block and so on needs to be preserved for modeling
|
||||
* RC. This is preserved via this function call.
|
||||
*
|
||||
* @param[in] ps_frame_info
|
||||
* Handle Frame info context
|
||||
*
|
||||
* @param[in] ps_proc
|
||||
* Process context
|
||||
*
|
||||
* @returns
|
||||
*
|
||||
* @remarks
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
void isvce_update_rc_mb_info(frame_info_t *ps_frame_info, void *pv_proc)
|
||||
{
|
||||
/* proc ctxt */
|
||||
isvce_process_ctxt_t *ps_proc = pv_proc;
|
||||
|
||||
/* is intra or inter */
|
||||
WORD32 mb_type = !ps_proc->ps_mb_info->u1_is_intra;
|
||||
|
||||
/* distortion */
|
||||
ps_frame_info->tot_mb_sad[mb_type] += ps_proc->i4_mb_distortion;
|
||||
|
||||
/* qp */
|
||||
ps_frame_info->qp_sum[mb_type] += gau1_h264_to_mpeg2_qmap[ps_proc->u1_mb_qp];
|
||||
|
||||
/* mb cnt */
|
||||
ps_frame_info->num_mbs[mb_type]++;
|
||||
|
||||
/* cost */
|
||||
if(ps_proc->ps_mb_info->u1_is_intra)
|
||||
{
|
||||
ps_frame_info->intra_mb_cost_sum += ps_proc->i4_mb_cost;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief Function to get rate control buffer status
|
||||
*
|
||||
* @par Description
|
||||
* This function is used to get buffer status(underflow/overflow) by rate
|
||||
* control module
|
||||
*
|
||||
* @param[in] pv_rc_api
|
||||
* Handle to rate control api context
|
||||
*
|
||||
* @param[in] i4_total_frame_bits
|
||||
* Total frame bits
|
||||
*
|
||||
* @param[in] u1_pic_type
|
||||
* Picture type
|
||||
*
|
||||
* @param[in] pi4_num_bits_to_prevent_vbv_underflow
|
||||
* Number of bits to prevent underflow
|
||||
*
|
||||
* @param[out] pu1_is_enc_buf_overflow
|
||||
* Buffer overflow indication flag
|
||||
*
|
||||
* @param[out] pu1_is_enc_buf_underflow
|
||||
* Buffer underflow indication flag
|
||||
*
|
||||
* @returns
|
||||
*
|
||||
* @remarks
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
void isvce_rc_get_buffer_status(void *pv_rc_api, WORD32 i4_total_frame_bits,
|
||||
picture_type_e e_pic_type,
|
||||
WORD32 *pi4_num_bits_to_prevent_vbv_underflow,
|
||||
UWORD8 *pu1_is_enc_buf_overflow, UWORD8 *pu1_is_enc_buf_underflow)
|
||||
{
|
||||
vbv_buf_status_e e_vbv_buf_status = VBV_NORMAL;
|
||||
|
||||
e_vbv_buf_status = irc_get_buffer_status(pv_rc_api, i4_total_frame_bits, e_pic_type,
|
||||
pi4_num_bits_to_prevent_vbv_underflow);
|
||||
|
||||
if(e_vbv_buf_status == VBV_OVERFLOW)
|
||||
{
|
||||
*pu1_is_enc_buf_underflow = 1;
|
||||
*pu1_is_enc_buf_overflow = 0;
|
||||
}
|
||||
else if(e_vbv_buf_status == VBV_UNDERFLOW)
|
||||
{
|
||||
*pu1_is_enc_buf_underflow = 0;
|
||||
*pu1_is_enc_buf_overflow = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
*pu1_is_enc_buf_underflow = 0;
|
||||
*pu1_is_enc_buf_overflow = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief Function to update rate control module after encoding
|
||||
*
|
||||
* @par Description
|
||||
* This function is used to update the rate control module after the current
|
||||
* frame encoding is done with details such as bits consumed, SAD for I/P/B,
|
||||
* intra cost ,mb type and other
|
||||
*
|
||||
* @param[in] ps_rate_control_api
|
||||
* Handle to rate control api context
|
||||
*
|
||||
* @param[in] ps_frame_info
|
||||
* Handle to frame info context
|
||||
*
|
||||
* @param[in] ps_pd_frm_rate
|
||||
* Handle to pull down frame rate context
|
||||
*
|
||||
* @param[in] ps_time_stamp
|
||||
* Handle to time stamp context
|
||||
*
|
||||
* @param[in] ps_frame_time
|
||||
* Handle to frame time context
|
||||
*
|
||||
* @param[in] i4_total_mb_in_frame
|
||||
* Total mb in frame
|
||||
*
|
||||
* @param[in] pe_vop_coding_type
|
||||
* Picture coding type
|
||||
*
|
||||
* @param[in] i4_is_first_frame
|
||||
* Is first frame
|
||||
*
|
||||
* @param[in] pi4_is_post_encode_skip
|
||||
* Post encoding skip flag
|
||||
*
|
||||
* @param[in] u1_frame_qp
|
||||
* Frame qp
|
||||
*
|
||||
* @param[in] pi4_num_intra_in_prev_frame
|
||||
* Numberf of intra mbs in previous frame
|
||||
*
|
||||
* @param[in] pi4_avg_activity
|
||||
* Average activity
|
||||
*
|
||||
* @returns
|
||||
*
|
||||
* @remarks
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
WORD32 isvce_rc_post_enc(void *ps_rate_control_api, frame_info_t *ps_frame_info,
|
||||
void *ps_pd_frm_rate, void *ps_time_stamp, void *ps_frame_time,
|
||||
WORD32 i4_total_mb_in_frame, picture_type_e *pe_vop_coding_type,
|
||||
WORD32 i4_is_first_frame, WORD32 *pi4_is_post_encode_skip,
|
||||
UWORD8 u1_frame_qp, WORD32 *pi4_num_intra_in_prev_frame,
|
||||
WORD32 *pi4_avg_activity
|
||||
#if ENABLE_RE_ENC_AS_SKIP
|
||||
,
|
||||
UWORD8 *u1_is_post_enc_skip
|
||||
#endif
|
||||
)
|
||||
{
|
||||
/* Variables for the update_frm_level_info */
|
||||
WORD32 ai4_tot_mb_in_type[MAX_MB_TYPE];
|
||||
WORD32 ai4_tot_mb_type_qp[MAX_MB_TYPE] = {0, 0};
|
||||
WORD32 ai4_mb_type_sad[MAX_MB_TYPE] = {0, 0};
|
||||
WORD32 ai4_mb_type_tex_bits[MAX_MB_TYPE] = {0, 0};
|
||||
WORD32 i4_total_frame_bits = 0;
|
||||
WORD32 i4_total_hdr_bits = 0;
|
||||
WORD32 i4_total_texturebits;
|
||||
WORD32 i4_avg_mb_activity = 0;
|
||||
WORD32 i4_intra_frm_cost = 0;
|
||||
UWORD8 u1_is_scd = 0;
|
||||
WORD32 i4_cbr_bits_to_stuff = 0;
|
||||
UWORD32 u4_num_intra_in_prev_frame = *pi4_num_intra_in_prev_frame;
|
||||
|
||||
UNUSED(ps_pd_frm_rate);
|
||||
UNUSED(ps_time_stamp);
|
||||
UNUSED(ps_frame_time);
|
||||
UNUSED(u1_frame_qp);
|
||||
UNUSED(i4_is_first_frame);
|
||||
/* Accumulate RC stats */
|
||||
ai4_tot_mb_in_type[MB_TYPE_INTRA] = irc_fi_get_total_mb(ps_frame_info, MB_TYPE_INTRA);
|
||||
ai4_tot_mb_in_type[MB_TYPE_INTER] = irc_fi_get_total_mb(ps_frame_info, MB_TYPE_INTER);
|
||||
ai4_tot_mb_type_qp[MB_TYPE_INTRA] = irc_fi_get_total_mb_qp(ps_frame_info, MB_TYPE_INTRA);
|
||||
ai4_tot_mb_type_qp[MB_TYPE_INTER] = irc_fi_get_total_mb_qp(ps_frame_info, MB_TYPE_INTER);
|
||||
ai4_mb_type_sad[MB_TYPE_INTRA] = irc_fi_get_total_mb_sad(ps_frame_info, MB_TYPE_INTRA);
|
||||
ai4_mb_type_sad[MB_TYPE_INTER] = irc_fi_get_total_mb_sad(ps_frame_info, MB_TYPE_INTER);
|
||||
i4_intra_frm_cost = irc_fi_get_total_intra_mb_cost(ps_frame_info);
|
||||
i4_avg_mb_activity = irc_fi_get_avg_activity(ps_frame_info);
|
||||
i4_total_hdr_bits = irc_fi_get_total_header_bits(ps_frame_info);
|
||||
i4_total_texturebits = irc_fi_get_total_mb_texture_bits(ps_frame_info, MB_TYPE_INTRA);
|
||||
i4_total_texturebits += irc_fi_get_total_mb_texture_bits(ps_frame_info, MB_TYPE_INTER);
|
||||
i4_total_frame_bits = i4_total_hdr_bits + i4_total_texturebits;
|
||||
|
||||
*pi4_avg_activity = i4_avg_mb_activity;
|
||||
|
||||
/* Texture bits are not accumulated. Hence subtracting hdr bits from total
|
||||
* bits */
|
||||
ai4_mb_type_tex_bits[MB_TYPE_INTRA] = 0;
|
||||
ai4_mb_type_tex_bits[MB_TYPE_INTER] = i4_total_frame_bits - i4_total_hdr_bits;
|
||||
|
||||
/* Set post encode skip to zero */
|
||||
pi4_is_post_encode_skip[0] = 0;
|
||||
|
||||
/* For NLDRC, get the buffer status for stuffing or skipping */
|
||||
if(irc_get_rc_type(ps_rate_control_api) == CBR_NLDRC)
|
||||
{
|
||||
WORD32 i4_get_num_bit_to_prevent_vbv_overflow;
|
||||
UWORD8 u1_enc_buf_overflow, u1_enc_buf_underflow;
|
||||
|
||||
/* Getting the buffer status */
|
||||
isvce_rc_get_buffer_status(ps_rate_control_api, i4_total_frame_bits, pe_vop_coding_type[0],
|
||||
&i4_get_num_bit_to_prevent_vbv_overflow, &u1_enc_buf_overflow,
|
||||
&u1_enc_buf_underflow);
|
||||
|
||||
/* We skip the frame if decoder buffer is underflowing. But we never skip
|
||||
* first I frame */
|
||||
#if !DISABLE_POST_ENC_SKIP
|
||||
if((u1_enc_buf_overflow == 1) && (i4_is_first_frame != 1))
|
||||
// if ((u1_enc_buf_overflow == 1) && (i4_is_first_frame != 0))
|
||||
{
|
||||
irc_post_encode_frame_skip(ps_rate_control_api, (picture_type_e) pe_vop_coding_type[0]);
|
||||
// i4_total_frame_bits = imp4_write_skip_frame_header(ps_enc);
|
||||
i4_total_frame_bits = 0;
|
||||
|
||||
*pi4_is_post_encode_skip = 1;
|
||||
|
||||
/* Adjust the GOP if in case we skipped an I-frame */
|
||||
if(*pe_vop_coding_type == I_PIC) irc_force_I_frame(ps_rate_control_api);
|
||||
|
||||
/* Since this frame is skipped by writing 7 bytes header, we say this is a
|
||||
* P frame */
|
||||
// *pe_vop_coding_type = P;
|
||||
|
||||
/* Getting the buffer status again,to check if it underflows */
|
||||
irc_get_buffer_status(ps_rate_control_api, i4_total_frame_bits,
|
||||
(picture_type_e) pe_vop_coding_type[0],
|
||||
&i4_get_num_bit_to_prevent_vbv_overflow);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if ENABLE_RE_ENC_AS_SKIP
|
||||
/* Check for VBV constraints - post encode skip */
|
||||
if(u1_enc_buf_overflow == 1 && (pe_vop_coding_type[0] != I_PIC))
|
||||
{
|
||||
*u1_is_post_enc_skip = 1;
|
||||
|
||||
ai4_tot_mb_in_type[MB_TYPE_INTER] += ai4_tot_mb_in_type[MB_TYPE_INTRA];
|
||||
ai4_tot_mb_in_type[MB_TYPE_INTRA] = 0;
|
||||
ai4_tot_mb_type_qp[MB_TYPE_INTER] += ai4_tot_mb_type_qp[MB_TYPE_INTRA];
|
||||
ai4_tot_mb_type_qp[MB_TYPE_INTRA] = 0;
|
||||
|
||||
ai4_mb_type_sad[MB_TYPE_INTER] += ai4_mb_type_sad[MB_TYPE_INTRA];
|
||||
ai4_mb_type_sad[MB_TYPE_INTRA] = 0;
|
||||
|
||||
i4_intra_frm_cost = 0;
|
||||
|
||||
i4_total_hdr_bits = 0;
|
||||
i4_total_texturebits = 0;
|
||||
i4_total_frame_bits = i4_total_hdr_bits + i4_total_texturebits;
|
||||
|
||||
ai4_mb_type_tex_bits[MB_TYPE_INTRA] = 0;
|
||||
ai4_mb_type_tex_bits[MB_TYPE_INTER] = i4_total_frame_bits - i4_total_hdr_bits;
|
||||
|
||||
/* Getting the buffer status again,to check if it underflows */
|
||||
irc_get_buffer_status(ps_rate_control_api, i4_total_frame_bits,
|
||||
(picture_type_e) pe_vop_coding_type[0],
|
||||
&i4_get_num_bit_to_prevent_vbv_overflow);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* In this case we stuff bytes as buffer is overflowing */
|
||||
if(u1_enc_buf_underflow == 1)
|
||||
{
|
||||
/* The stuffing function is directly pulled out from split controller
|
||||
workspace. encode_vop_data() function makes sure alignment data is
|
||||
dumped at the end of a frame. Split controller was identifying this
|
||||
alignment byte, overwriting it with the stuff data and then finally
|
||||
aligning the buffer. Here every thing is inside the DSP. So, ideally
|
||||
encode_vop_data needn't align, and we can start stuffing directly. But
|
||||
in that case, it'll break the logic for a normal frame. Hence for
|
||||
simplicity, not changing this part since it is ok to align and then
|
||||
overwrite since stuffing is not done for every frame */
|
||||
i4_cbr_bits_to_stuff = irc_get_bits_to_stuff(ps_rate_control_api, i4_total_frame_bits,
|
||||
pe_vop_coding_type[0]);
|
||||
|
||||
/* Just add extra 32 bits to make sure we don't stuff lesser */
|
||||
i4_cbr_bits_to_stuff += 32;
|
||||
|
||||
/* We can not stuff more than the outbuf size. So have a check here */
|
||||
/* Add stuffed bits to total bits */
|
||||
i4_total_frame_bits += i4_cbr_bits_to_stuff;
|
||||
}
|
||||
}
|
||||
|
||||
/* If number of intra MBs are more than 2/3rd of total MBs, assume it as a
|
||||
* scene change */
|
||||
if((ai4_tot_mb_in_type[MB_TYPE_INTRA] > ((2 * i4_total_mb_in_frame) / 3)) &&
|
||||
(*pe_vop_coding_type == P_PIC) &&
|
||||
(ai4_tot_mb_in_type[MB_TYPE_INTRA] > ((11 * (WORD32) u4_num_intra_in_prev_frame) / 10)))
|
||||
{
|
||||
u1_is_scd = 1;
|
||||
}
|
||||
|
||||
/* Update num intra mbs of this frame */
|
||||
if(pi4_is_post_encode_skip[0] == 0)
|
||||
{
|
||||
*pi4_num_intra_in_prev_frame = ai4_tot_mb_in_type[MB_TYPE_INTRA];
|
||||
}
|
||||
|
||||
/* Reset intra count to zero, if u encounter an I frame */
|
||||
if(*pe_vop_coding_type == I_PIC)
|
||||
{
|
||||
*pi4_num_intra_in_prev_frame = 0;
|
||||
}
|
||||
|
||||
/* Do an update of rate control after post encode */
|
||||
irc_update_frame_level_info(ps_rate_control_api, /* RC state */
|
||||
pe_vop_coding_type[0], /* PIC type */
|
||||
ai4_mb_type_sad, /* SAD for [Intra/Inter] */
|
||||
i4_total_frame_bits, /* Total frame bits */
|
||||
i4_total_hdr_bits, /* header bits for */
|
||||
ai4_mb_type_tex_bits, /* for MB[Intra/Inter] */
|
||||
ai4_tot_mb_type_qp, /* for MB[Intra/Inter] */
|
||||
ai4_tot_mb_in_type, /* for MB[Intra/Inter] */
|
||||
i4_avg_mb_activity, /* Average mb activity in frame */
|
||||
u1_is_scd, /* Is a scene change detected */
|
||||
0, /* Pre encode skip */
|
||||
(WORD32) i4_intra_frm_cost, /* Intra cost for frame */
|
||||
0); /* Not done outside */
|
||||
|
||||
return (i4_cbr_bits_to_stuff >> 3);
|
||||
}
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief Function to update bits consumed info to rate control context
|
||||
*
|
||||
* @par Description
|
||||
* Function to update bits consume info to rate control context
|
||||
*
|
||||
* @param[in] ps_frame_info
|
||||
* Frame info context
|
||||
*
|
||||
* @param[in] ps_entropy
|
||||
* Entropy context
|
||||
*
|
||||
* @returns
|
||||
* total bits consumed by the frame
|
||||
*
|
||||
* @remarks
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
void isvce_update_rc_bits_info(frame_info_t *ps_frame_info, void *pv_entropy)
|
||||
{
|
||||
isvce_entropy_ctxt_t *ps_entropy = pv_entropy;
|
||||
|
||||
ps_frame_info->mb_header_bits[MB_TYPE_INTRA] += ps_entropy->u4_header_bits[MB_TYPE_INTRA];
|
||||
|
||||
ps_frame_info->mb_texture_bits[MB_TYPE_INTRA] += ps_entropy->u4_residue_bits[MB_TYPE_INTRA];
|
||||
|
||||
ps_frame_info->mb_header_bits[MB_TYPE_INTER] += ps_entropy->u4_header_bits[MB_TYPE_INTER];
|
||||
|
||||
ps_frame_info->mb_texture_bits[MB_TYPE_INTER] += ps_entropy->u4_residue_bits[MB_TYPE_INTER];
|
||||
|
||||
return;
|
||||
}
|
||||
330
encoder/svc/isvce_rate_control.h
Normal file
330
encoder/svc/isvce_rate_control.h
Normal file
|
|
@ -0,0 +1,330 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
* @file
|
||||
* isvce_rate_control.h
|
||||
*
|
||||
* @brief
|
||||
* This file contains function declarations of api functions for h264 rate
|
||||
* control
|
||||
*
|
||||
* @author
|
||||
* ittiam
|
||||
*
|
||||
* @remarks
|
||||
* None
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef _ISVCE_RATE_CONTROL_H_
|
||||
#define _ISVCE_RATE_CONTROL_H_
|
||||
|
||||
#if ENABLE_RE_ENC_AS_SKIP
|
||||
#include "isvce_structs.h"
|
||||
#endif
|
||||
|
||||
/*****************************************************************************/
|
||||
/* Function Declarations */
|
||||
/*****************************************************************************/
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* This function initializes rate control context and variables
|
||||
*
|
||||
* @par Description
|
||||
* This function initializes rate control type, source and target frame rate,
|
||||
* average and peak bitrate, intra-inter frame interval and initial
|
||||
* quantization parameter
|
||||
*
|
||||
* @param[in] pv_rc_api
|
||||
* Handle to rate control api
|
||||
*
|
||||
* @param[in] pv_frame_time
|
||||
* Handle to frame time context
|
||||
*
|
||||
* @param[in] pv_time_stamp
|
||||
* Handle to time stamp context
|
||||
*
|
||||
* @param[in] pv_pd_frm_rate
|
||||
* Handle to pull down frame time context
|
||||
*
|
||||
* @param[in] u4_max_frm_rate
|
||||
* Maximum frame rate
|
||||
*
|
||||
* @param[in] u4_src_frm_rate
|
||||
* Source frame rate
|
||||
*
|
||||
* @param[in] u4_tgt_frm_rate
|
||||
* Target frame rate
|
||||
*
|
||||
* @param[in] e_rate_control_type
|
||||
* Rate control type
|
||||
*
|
||||
* @param[in] u4_avg_bit_rate
|
||||
* Average bit rate
|
||||
*
|
||||
* @param[in] u4_peak_bit_rate
|
||||
* Peak bit rate
|
||||
*
|
||||
* @param[in] u4_max_delay
|
||||
* Maximum delay between frames
|
||||
*
|
||||
* @param[in] u4_intra_frame_interval
|
||||
* Intra frame interval
|
||||
*
|
||||
* @param[in] i4_inter_frm_int
|
||||
* Inter frame interval
|
||||
*
|
||||
* @param[in] pu1_init_qp
|
||||
* Initial qp
|
||||
*
|
||||
* @param[in] i4_max_inter_frm_int
|
||||
* Maximum inter frame interval
|
||||
*
|
||||
* @param[in] pu1_min_max_qp
|
||||
* Array of min/max qp
|
||||
*
|
||||
* @param[in] u1_profile_level
|
||||
* Encoder profile level
|
||||
*
|
||||
* @returns none
|
||||
*
|
||||
* @remarks
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
void isvce_rc_init(void *pv_rc_api, void *pv_frame_time, void *pv_time_stamp, void *pv_pd_frm_rate,
|
||||
UWORD32 u4_max_frm_rate, UWORD32 u4_src_frm_rate, UWORD32 u4_tgt_frm_rate,
|
||||
rc_type_e e_rate_control_type, UWORD32 u4_avg_bit_rate, UWORD32 u4_peak_bit_rate,
|
||||
UWORD32 u4_max_delay, UWORD32 u4_intra_frame_interval, WORD32 i4_inter_frm_int,
|
||||
UWORD8 *pu1_init_qp, WORD32 i4_max_inter_frm_int, UWORD8 *pu1_min_max_qp,
|
||||
UWORD8 u1_profile_level);
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief Function to get picture details
|
||||
*
|
||||
* @par Description
|
||||
* This function returns the Picture type(I/P/B)
|
||||
*
|
||||
* @param[in] pv_rc_api
|
||||
* Handle to Rate control api
|
||||
*
|
||||
* @returns
|
||||
* Picture type
|
||||
*
|
||||
* @remarks none
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
picture_type_e isvce_rc_get_picture_details(void *pv_rc_api, WORD32 *pi4_pic_id,
|
||||
WORD32 *pi4_pic_disp_order_no);
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief Function to set frame rate inside RC.
|
||||
*
|
||||
* @par Description
|
||||
* This function is called before encoding the current frame and gets the qp
|
||||
* for the current frame from rate control module
|
||||
*
|
||||
* @param[in] ps_rate_control_api
|
||||
* Handle to rate control api
|
||||
*
|
||||
* @param[in] ps_pd_frm_rate
|
||||
* Handle to pull down frm rate context
|
||||
*
|
||||
* @param[in] ps_time_stamp
|
||||
* Handle to time stamp context
|
||||
*
|
||||
* @param[in] ps_frame_time
|
||||
* Handle to frame time context
|
||||
*
|
||||
* @returns
|
||||
* Skip or encode the current frame
|
||||
*
|
||||
* @remarks
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
WORD32 isvce_update_rc_framerates(void *ps_rate_control_api, void *ps_pd_frm_rate,
|
||||
void *ps_time_stamp, void *ps_frame_time);
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief Function to update mb info for rate control context
|
||||
*
|
||||
* @par Description
|
||||
* After encoding a mb, information such as mb type, qp used, mb distortion
|
||||
* resulted in encoding the block and so on needs to be preserved for modelling
|
||||
* RC. This is preserved via this function call.
|
||||
*
|
||||
* @param[in] ps_frame_info
|
||||
* Handle Frame info context
|
||||
*
|
||||
* @param[in] ps_proc
|
||||
* Process context
|
||||
*
|
||||
* @returns
|
||||
*
|
||||
* @remarks
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
void isvce_update_rc_mb_info(frame_info_t *ps_frame_info, void *pv_proc);
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief Function to get rate control buffer status
|
||||
*
|
||||
* @par Description
|
||||
* This function is used to get buffer status(underflow/overflow) by rate
|
||||
* control module
|
||||
*
|
||||
* @param[in] pv_rc_api
|
||||
* Handle to rate control api context
|
||||
*
|
||||
* @param[in] i4_total_frame_bits
|
||||
* Total frame bits
|
||||
*
|
||||
* @param[in] u1_pic_type
|
||||
* Picture type
|
||||
*
|
||||
* @param[in] pi4_num_bits_to_prevent_vbv_underflow
|
||||
* Number of bits to prevent underflow
|
||||
*
|
||||
* @param[out] pu1_is_enc_buf_overflow
|
||||
* Buffer overflow indication flag
|
||||
*
|
||||
* @param[out] pu1_is_enc_buf_underflow
|
||||
* Buffer underflow indication flag
|
||||
*
|
||||
* @returns
|
||||
*
|
||||
* @remarks
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
void isvce_rc_get_buffer_status(void *pv_rc_api, WORD32 i4_total_frame_bits,
|
||||
picture_type_e e_pic_type,
|
||||
WORD32 *pi4_num_bits_to_prevent_vbv_underflow,
|
||||
UWORD8 *pu1_is_enc_buf_overflow, UWORD8 *pu1_is_enc_buf_underflow);
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief Function to update rate control module after encoding
|
||||
*
|
||||
* @par Description
|
||||
* This function is used to update the rate control module after the current
|
||||
* frame encoding is done with details such as bits consumed, SAD for I/P/B,
|
||||
* intra cost ,mb type and other
|
||||
*
|
||||
* @param[in] ps_rate_control_api
|
||||
* Handle to rate control api context
|
||||
*
|
||||
* @param[in] ps_frame_info
|
||||
* Handle to frame info context
|
||||
*
|
||||
* @param[in] ps_pd_frm_rate
|
||||
* Handle to pull down frame rate context
|
||||
*
|
||||
* @param[in] ps_time_stamp
|
||||
* Handle to time stamp context
|
||||
*
|
||||
* @param[in] ps_frame_time
|
||||
* Handle to frame time context
|
||||
*
|
||||
* @param[in] i4_total_mb_in_frame
|
||||
* Total mb in frame
|
||||
*
|
||||
* @param[in] pe_vop_coding_type
|
||||
* Picture coding type
|
||||
*
|
||||
* @param[in] i4_is_first_frame
|
||||
* Is first frame
|
||||
*
|
||||
* @param[in] pi4_is_post_encode_skip
|
||||
* Post encoding skip flag
|
||||
*
|
||||
* @param[in] u1_frame_qp
|
||||
* Frame qp
|
||||
*
|
||||
* @param[in] pi4_num_intra_in_prev_frame
|
||||
* Number of intra mbs in previous frame
|
||||
*
|
||||
* @param[in] pi4_avg_activity
|
||||
* Average activity
|
||||
*
|
||||
* @returns
|
||||
*
|
||||
* @remarks
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
#if ENABLE_RE_ENC_AS_SKIP
|
||||
WORD32 isvce_rc_post_enc(void *ps_rate_control_api, frame_info_t *ps_frame_info,
|
||||
void *ps_pd_frm_rate, void *ps_time_stamp, void *ps_frame_time,
|
||||
WORD32 i4_total_mb_in_frame, picture_type_e *pe_vop_coding_type,
|
||||
WORD32 i4_is_first_frame, WORD32 *pi4_is_post_encode_skip,
|
||||
UWORD8 u1_frame_qp, WORD32 *pi4_num_intra_in_prev_frame,
|
||||
WORD32 *pi4_avg_activity, UWORD8 *u1_is_post_enc_skip);
|
||||
#else
|
||||
WORD32 isvce_rc_post_enc(void *ps_rate_control_api, frame_info_t *ps_frame_info,
|
||||
void *ps_pd_frm_rate, void *ps_time_stamp, void *ps_frame_time,
|
||||
WORD32 i4_total_mb_in_frame, picture_type_e *pe_vop_coding_type,
|
||||
WORD32 i4_is_first_frame, WORD32 *pi4_is_post_encode_skip,
|
||||
UWORD8 u1_frame_qp, WORD32 *pi4_num_intra_in_prev_frame,
|
||||
WORD32 *pi4_avg_activity);
|
||||
|
||||
#endif
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief Function to update bits consumed info to rate control context
|
||||
*
|
||||
* @par Description
|
||||
* Function to update bits consume info to rate control context
|
||||
*
|
||||
* @param[in] ps_frame_info
|
||||
* Frame info context
|
||||
*
|
||||
* @param[in] ps_entropy
|
||||
* Entropy context
|
||||
*
|
||||
* @returns
|
||||
* total bits consumed by the frame
|
||||
*
|
||||
* @remarks
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
void isvce_update_rc_bits_info(frame_info_t *ps_frame_info, void *pv_entropy);
|
||||
|
||||
#endif
|
||||
325
encoder/svc/isvce_rc_mem_interface.c
Normal file
325
encoder/svc/isvce_rc_mem_interface.c
Normal file
|
|
@ -0,0 +1,325 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
|
||||
/**
|
||||
******************************************************************************
|
||||
* @file
|
||||
* isvce_rc_mem_interface.c
|
||||
*
|
||||
* @brief
|
||||
* This file contains api function definitions for rate control memtabs
|
||||
*
|
||||
* @author
|
||||
* ittiam
|
||||
*
|
||||
* List of Functions
|
||||
* - fill_memtab()
|
||||
* - use_or_fill_base()
|
||||
* - isvce_map_rc_mem_recs_to_itt_api()
|
||||
* - isvce_map_itt_mem_rec_to_rc_mem_rec()
|
||||
* - isvce_get_rate_control_mem_tab()
|
||||
*
|
||||
* @remarks
|
||||
* None
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
/*****************************************************************************/
|
||||
/* File Includes */
|
||||
/*****************************************************************************/
|
||||
|
||||
/* System include files */
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
#include <stdarg.h>
|
||||
#include <math.h>
|
||||
|
||||
/* User Include Files */
|
||||
#include "ih264e_config.h"
|
||||
#include "ih264_typedefs.h"
|
||||
#include "ih264_size_defs.h"
|
||||
#include "iv2.h"
|
||||
#include "ive2.h"
|
||||
#include "ime_distortion_metrics.h"
|
||||
#include "ime_defs.h"
|
||||
#include "ime_structs.h"
|
||||
#include "isvce.h"
|
||||
#include "ithread.h"
|
||||
#include "isvc_defs.h"
|
||||
#include "ih264_debug.h"
|
||||
#include "isvc_macros.h"
|
||||
#include "ih264_platform_macros.h"
|
||||
#include "ih264_error.h"
|
||||
#include "isvc_structs.h"
|
||||
#include "isvc_trans_quant_itrans_iquant.h"
|
||||
#include "isvc_inter_pred_filters.h"
|
||||
#include "isvc_mem_fns.h"
|
||||
#include "ih264_padding.h"
|
||||
#include "ih264_intra_pred_filters.h"
|
||||
#include "ih264_deblk_edge_filters.h"
|
||||
#include "isvc_common_tables.h"
|
||||
#include "ih264_list.h"
|
||||
#include "isvc_cabac_tables.h"
|
||||
#include "ih264e_error.h"
|
||||
#include "isvce_defs.h"
|
||||
#include "ih264e_bitstream.h"
|
||||
#include "irc_cntrl_param.h"
|
||||
#include "irc_frame_info_collector.h"
|
||||
#include "isvce_rate_control.h"
|
||||
#include "isvce_cabac_structs.h"
|
||||
#include "isvce_structs.h"
|
||||
#include "ih264e_master.h"
|
||||
#include "ih264_buf_mgr.h"
|
||||
#include "ih264_dpb_mgr.h"
|
||||
#include "isvce_utils.h"
|
||||
#include "ih264e_platform_macros.h"
|
||||
#include "ih264_cavlc_tables.h"
|
||||
#include "ih264e_statistics.h"
|
||||
#include "ih264e_trace.h"
|
||||
#include "ih264e_fmt_conv.h"
|
||||
#include "isvce_cavlc.h"
|
||||
#include "ih264e_rc_mem_interface.h"
|
||||
#include "isvce_rc_mem_interface.h"
|
||||
#include "ih264e_time_stamp.h"
|
||||
#include "irc_common.h"
|
||||
#include "irc_rd_model.h"
|
||||
#include "irc_est_sad.h"
|
||||
#include "irc_fixed_point_error_bits.h"
|
||||
#include "irc_vbr_storage_vbv.h"
|
||||
#include "irc_picture_type.h"
|
||||
#include "irc_bit_allocation.h"
|
||||
#include "irc_mb_model_based.h"
|
||||
#include "irc_cbr_buffer_control.h"
|
||||
#include "irc_vbr_str_prms.h"
|
||||
#include "irc_rate_control_api.h"
|
||||
#include "irc_rate_control_api_structs.h"
|
||||
#include "ih264e_modify_frm_rate.h"
|
||||
|
||||
/*****************************************************************************/
|
||||
/* Function Definitions */
|
||||
/*****************************************************************************/
|
||||
|
||||
/**
|
||||
******************************************************************************
|
||||
*
|
||||
* @brief This function maps rc mem records structure to encoder lib mem records
|
||||
* structure
|
||||
*
|
||||
* @par Description
|
||||
* This function maps rc mem records structure to encoder lib mem records
|
||||
* structure
|
||||
*
|
||||
* @param[in] ps_mem
|
||||
* pointer to encoder lib mem records
|
||||
*
|
||||
* @param[in] rc_memtab
|
||||
* pointer to rc mem records
|
||||
*
|
||||
* @param[in] num_mem_recs
|
||||
* number of memory records
|
||||
*
|
||||
* @return void
|
||||
*
|
||||
******************************************************************************
|
||||
*/
|
||||
static void isvce_map_rc_mem_recs_to_itt_api(iv_mem_rec_t *ps_mem, itt_memtab_t *rc_memtab,
|
||||
UWORD32 num_mem_recs)
|
||||
{
|
||||
UWORD32 j;
|
||||
UWORD32 Size, align;
|
||||
|
||||
for(j = 0; j < num_mem_recs; j++)
|
||||
{
|
||||
Size = rc_memtab->u4_size;
|
||||
align = rc_memtab->i4_alignment;
|
||||
|
||||
/* we always ask for external persistent cacheable memory */
|
||||
FILL_MEMTAB(ps_mem, j, Size, align, IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM);
|
||||
|
||||
rc_memtab++;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief This function maps encoder lib mem records structure to RC memory
|
||||
* records structure
|
||||
*
|
||||
* @par Description
|
||||
* This function maps encoder lib mem records structure to RC memory
|
||||
* records structure
|
||||
*
|
||||
* @param[in] ps_mem
|
||||
* pointer to encoder lib mem records
|
||||
*
|
||||
* @param[in] rc_memtab
|
||||
* pointer to rc mem records
|
||||
*
|
||||
* @param[in] num_mem_recs
|
||||
* Number of memory records
|
||||
|
||||
* @returns none
|
||||
*
|
||||
* @remarks
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
static void isvce_map_itt_mem_rec_to_rc_mem_rec(iv_mem_rec_t *ps_mem, itt_memtab_t *rc_memtab,
|
||||
UWORD32 num_mem_recs)
|
||||
{
|
||||
UWORD32 i;
|
||||
|
||||
for(i = 0; i < num_mem_recs; i++)
|
||||
{
|
||||
rc_memtab->i4_alignment = ps_mem->u4_mem_alignment;
|
||||
rc_memtab->u4_size = ps_mem->u4_mem_size;
|
||||
rc_memtab->pv_base = ps_mem->pv_base;
|
||||
|
||||
/* only DDR memory is available */
|
||||
rc_memtab->e_mem_region = DDR;
|
||||
rc_memtab->e_usage = PERSISTENT;
|
||||
|
||||
rc_memtab++;
|
||||
ps_mem++;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
******************************************************************************
|
||||
*
|
||||
* @brief Get memtabs for rate control
|
||||
*
|
||||
* @par Description
|
||||
* This routine is used to Get/init memtabs for rate control
|
||||
*
|
||||
* @param[in] pv_rate_control
|
||||
* pointer to rate control context (handle)
|
||||
*
|
||||
* @param[in] ps_mem
|
||||
* pointer to encoder lib mem records
|
||||
*
|
||||
* @param[in] e_func_type
|
||||
* enum that dictates fill memory records or Init memory records
|
||||
*
|
||||
* @return total number of mem records
|
||||
*
|
||||
******************************************************************************
|
||||
*/
|
||||
WORD32 isvce_get_rate_control_mem_tab(void *pv_rate_control, iv_mem_rec_t *ps_mem,
|
||||
ITT_FUNC_TYPE_E e_func_type)
|
||||
{
|
||||
itt_memtab_t as_itt_memtab[NUM_SVCE_RC_MEMTABS];
|
||||
WORD32 i4_num_memtab = 0, j = 0;
|
||||
void *refptr2[RC_MEM_CNT];
|
||||
void **refptr1[RC_MEM_CNT];
|
||||
isvce_rate_control_ctxt_t *ps_rate_control = pv_rate_control;
|
||||
|
||||
for(j = 0; j < RC_MEM_CNT; j++) refptr1[j] = &(refptr2[j]);
|
||||
|
||||
j = 0;
|
||||
|
||||
if(e_func_type == USE_BASE || e_func_type == FILL_BASE)
|
||||
{
|
||||
refptr1[RC_MEM_FRAME_TIME] = &ps_rate_control->pps_frame_time;
|
||||
refptr1[RC_MEM_TIME_STAMP] = &ps_rate_control->pps_time_stamp;
|
||||
refptr1[RC_MEM_FRAME_RATE] = &ps_rate_control->pps_pd_frm_rate;
|
||||
refptr1[RC_MEM_API_L0] = &ps_rate_control->apps_rate_control_api[0];
|
||||
refptr1[RC_MEM_API_L1] = &ps_rate_control->apps_rate_control_api[1];
|
||||
refptr1[RC_MEM_API_L2] = &ps_rate_control->apps_rate_control_api[2];
|
||||
}
|
||||
|
||||
/* Get the total number of memtabs used by Frame time Module */
|
||||
i4_num_memtab = ih264e_frame_time_get_init_free_memtab(
|
||||
(frame_time_t **) refptr1[RC_MEM_FRAME_TIME], NULL, GET_NUM_MEMTAB);
|
||||
/* Few extra steps during init */
|
||||
isvce_map_itt_mem_rec_to_rc_mem_rec((&ps_mem[j]), as_itt_memtab + j, i4_num_memtab);
|
||||
/* Fill the memtabs used by Frame time Module */
|
||||
i4_num_memtab = ih264e_frame_time_get_init_free_memtab(
|
||||
(frame_time_t **) refptr1[RC_MEM_FRAME_TIME], as_itt_memtab + j, e_func_type);
|
||||
/* Mapping ittiam memtabs to App. memtabs */
|
||||
isvce_map_rc_mem_recs_to_itt_api((&ps_mem[j]), as_itt_memtab + j, i4_num_memtab);
|
||||
j += i4_num_memtab;
|
||||
|
||||
/* Get the total number of memtabs used by Time stamp Module */
|
||||
i4_num_memtab = ih264e_time_stamp_get_init_free_memtab(
|
||||
(time_stamp_t **) refptr1[RC_MEM_TIME_STAMP], NULL, GET_NUM_MEMTAB);
|
||||
/* Few extra steps during init */
|
||||
isvce_map_itt_mem_rec_to_rc_mem_rec((&ps_mem[j]), as_itt_memtab + j, i4_num_memtab);
|
||||
/* Fill the memtabs used by Time Stamp Module */
|
||||
i4_num_memtab = ih264e_time_stamp_get_init_free_memtab(
|
||||
(time_stamp_t **) refptr1[RC_MEM_TIME_STAMP], as_itt_memtab + j, e_func_type);
|
||||
/* Mapping ittiam memtabs to App. memtabs */
|
||||
isvce_map_rc_mem_recs_to_itt_api((&ps_mem[j]), as_itt_memtab + j, i4_num_memtab);
|
||||
j += i4_num_memtab;
|
||||
|
||||
/* Get the total number of memtabs used by Frame rate Module */
|
||||
i4_num_memtab = ih264e_pd_frm_rate_get_init_free_memtab(
|
||||
(pd_frm_rate_t **) refptr1[RC_MEM_FRAME_RATE], NULL, GET_NUM_MEMTAB);
|
||||
/* Few extra steps during init */
|
||||
isvce_map_itt_mem_rec_to_rc_mem_rec((&ps_mem[j]), as_itt_memtab + j, i4_num_memtab);
|
||||
/* Fill the memtabs used by Frame Rate Module */
|
||||
i4_num_memtab = ih264e_pd_frm_rate_get_init_free_memtab(
|
||||
(pd_frm_rate_t **) refptr1[RC_MEM_FRAME_RATE], as_itt_memtab + j, e_func_type);
|
||||
/* Mapping ittiam memtabs to App. memtabs */
|
||||
isvce_map_rc_mem_recs_to_itt_api((&ps_mem[j]), as_itt_memtab + j, i4_num_memtab);
|
||||
j += i4_num_memtab;
|
||||
|
||||
/* Get the total number of memtabs used by Rate Controller */
|
||||
i4_num_memtab = irc_rate_control_num_fill_use_free_memtab(
|
||||
(rate_control_api_t **) refptr1[RC_MEM_API_L0], NULL, GET_NUM_MEMTAB);
|
||||
/* Few extra steps during init */
|
||||
isvce_map_itt_mem_rec_to_rc_mem_rec((&ps_mem[j]), as_itt_memtab + j, i4_num_memtab);
|
||||
/* Fill the memtabs used by Rate Controller */
|
||||
i4_num_memtab = irc_rate_control_num_fill_use_free_memtab(
|
||||
(rate_control_api_t **) refptr1[RC_MEM_API_L0], as_itt_memtab + j, e_func_type);
|
||||
/* Mapping ittiam memtabs to App. memtabs */
|
||||
isvce_map_rc_mem_recs_to_itt_api((&ps_mem[j]), as_itt_memtab + j, i4_num_memtab);
|
||||
j += i4_num_memtab;
|
||||
|
||||
/* Get the total number of memtabs used by Rate Controller */
|
||||
i4_num_memtab = irc_rate_control_num_fill_use_free_memtab(
|
||||
(rate_control_api_t **) refptr1[RC_MEM_API_L1], NULL, GET_NUM_MEMTAB);
|
||||
/* Few extra steps during init */
|
||||
isvce_map_itt_mem_rec_to_rc_mem_rec((&ps_mem[j]), as_itt_memtab + j, i4_num_memtab);
|
||||
/* Fill the memtabs used by Rate Controller */
|
||||
i4_num_memtab = irc_rate_control_num_fill_use_free_memtab(
|
||||
(rate_control_api_t **) refptr1[RC_MEM_API_L1], as_itt_memtab + j, e_func_type);
|
||||
/* Mapping ittiam memtabs to App. memtabs */
|
||||
isvce_map_rc_mem_recs_to_itt_api((&ps_mem[j]), as_itt_memtab + j, i4_num_memtab);
|
||||
j += i4_num_memtab;
|
||||
|
||||
/* Get the total number of memtabs used by Rate Controller */
|
||||
i4_num_memtab = irc_rate_control_num_fill_use_free_memtab(
|
||||
(rate_control_api_t **) refptr1[RC_MEM_API_L2], NULL, GET_NUM_MEMTAB);
|
||||
/* Few extra steps during init */
|
||||
isvce_map_itt_mem_rec_to_rc_mem_rec((&ps_mem[j]), as_itt_memtab + j, i4_num_memtab);
|
||||
/* Fill the memtabs used by Rate Controller */
|
||||
i4_num_memtab = irc_rate_control_num_fill_use_free_memtab(
|
||||
(rate_control_api_t **) refptr1[RC_MEM_API_L2], as_itt_memtab + j, e_func_type);
|
||||
/* Mapping ittiam memtabs to App. memtabs */
|
||||
isvce_map_rc_mem_recs_to_itt_api((&ps_mem[j]), as_itt_memtab + j, i4_num_memtab);
|
||||
j += i4_num_memtab;
|
||||
|
||||
return j; /* Total MemTabs Needed by Rate Control Module */
|
||||
}
|
||||
77
encoder/svc/isvce_rc_mem_interface.h
Normal file
77
encoder/svc/isvce_rc_mem_interface.h
Normal file
|
|
@ -0,0 +1,77 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
|
||||
/**
|
||||
******************************************************************************
|
||||
* @file
|
||||
* isvce_rc_mem_interface.h
|
||||
*
|
||||
* @brief
|
||||
* This file contains function declaration and structures for rate control
|
||||
* memtabs
|
||||
*
|
||||
* @author
|
||||
* ittiam
|
||||
*
|
||||
* @remarks
|
||||
* The rate control library is a global library across various codecs. It
|
||||
* anticipates certain structures definitions. Those definitions are to be
|
||||
* imported from global workspace. Instead of that, the structures needed for
|
||||
* rc library are copied in to this file and exported to rc library. If the
|
||||
* structures / enums / ... in the global workspace change, this file also needs
|
||||
* to be modified accordingly.
|
||||
*
|
||||
******************************************************************************
|
||||
*/
|
||||
#ifndef _ISVCE_RC_MEM_INTERFACE_H_
|
||||
#define _ISVCE_RC_MEM_INTERFACE_H_
|
||||
|
||||
#include "ih264e_rc_mem_interface.h"
|
||||
|
||||
/**
|
||||
***************************************************************************
|
||||
* Enum to hold mem records in RC
|
||||
****************************************************************************
|
||||
*/
|
||||
typedef enum RC_MEM_TYPES_T
|
||||
{
|
||||
RC_MEM_FRAME_TIME,
|
||||
|
||||
RC_MEM_TIME_STAMP,
|
||||
|
||||
RC_MEM_FRAME_RATE,
|
||||
|
||||
RC_MEM_API_L0,
|
||||
|
||||
RC_MEM_API_L1,
|
||||
|
||||
RC_MEM_API_L2,
|
||||
|
||||
RC_MEM_CNT
|
||||
|
||||
/*
|
||||
* Do not add anything below
|
||||
*/
|
||||
} RC_MEM_TYPES_T;
|
||||
|
||||
extern WORD32 isvce_get_rate_control_mem_tab(void *pv_rate_control, iv_mem_rec_t *ps_mem,
|
||||
ITT_FUNC_TYPE_E e_func_type);
|
||||
|
||||
#endif
|
||||
286
encoder/svc/isvce_rc_utils.c
Normal file
286
encoder/svc/isvce_rc_utils.c
Normal file
|
|
@ -0,0 +1,286 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
* @file
|
||||
* isvce_rc_utils.c
|
||||
*
|
||||
* @brief
|
||||
* Contains get gpp function required by the SVC encoder
|
||||
*
|
||||
* @author
|
||||
* ittiam
|
||||
*
|
||||
* @par List of Functions:
|
||||
* - isvce_get_gpp()
|
||||
* - isvce_rc_utils_init()
|
||||
* - isvce_get_rc_utils_data_size()
|
||||
* - isvce_compute_gpp()
|
||||
* - isvce_get_gpp_function_selector()
|
||||
*
|
||||
* @remarks
|
||||
* None
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#include "ih264_typedefs.h"
|
||||
#include "ih264_macros.h"
|
||||
#include "isvc_structs.h"
|
||||
#include "isvce_rc_utils.h"
|
||||
#include "isvce_rc_utils_private_defs.h"
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* get gpp function
|
||||
*
|
||||
* @par Description:
|
||||
* computes gradient per pixel value for a given frame
|
||||
*
|
||||
* @param[in] ps_input_buf
|
||||
* pointer to yuv buffer properties
|
||||
*
|
||||
* @returns
|
||||
* calculated gpp value
|
||||
*
|
||||
* @remarks
|
||||
* none
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
static DOUBLE isvce_get_gpp(yuv_buf_props_t *ps_input_buf)
|
||||
{
|
||||
UWORD32 i, j;
|
||||
|
||||
DOUBLE d_gpp_y = 0;
|
||||
DOUBLE d_gpp_u = 0;
|
||||
DOUBLE d_gpp_v = 0;
|
||||
|
||||
DOUBLE d_gpp = 0;
|
||||
|
||||
UWORD32 u4_width = ps_input_buf->u4_width;
|
||||
UWORD32 u4_height = ps_input_buf->u4_height;
|
||||
|
||||
UWORD8 *pu1_input_buf = (UWORD8 *) ps_input_buf->as_component_bufs[0].pv_data;
|
||||
WORD32 i4_input_stride = ps_input_buf->as_component_bufs[0].i4_data_stride;
|
||||
|
||||
for(i = 0; i < u4_height - 1; i++)
|
||||
{
|
||||
for(j = 0; j < u4_width - 1; j++)
|
||||
{
|
||||
UWORD8 u1_cur_pix = pu1_input_buf[j];
|
||||
UWORD8 u1_bot_pix = pu1_input_buf[i4_input_stride + j];
|
||||
UWORD8 u1_right_pix = pu1_input_buf[j + 1];
|
||||
|
||||
d_gpp_y += (ABS(u1_cur_pix - u1_bot_pix) + ABS(u1_cur_pix - u1_right_pix));
|
||||
}
|
||||
pu1_input_buf += i4_input_stride;
|
||||
}
|
||||
|
||||
pu1_input_buf = (UWORD8 *) ps_input_buf->as_component_bufs[1].pv_data;
|
||||
i4_input_stride = ps_input_buf->as_component_bufs[1].i4_data_stride;
|
||||
|
||||
for(i = 0; i < (u4_height >> 1) - 1; i++)
|
||||
{
|
||||
for(j = 0; j < u4_width - 2; j += 2)
|
||||
{
|
||||
UWORD8 u1_cur_pix = pu1_input_buf[j];
|
||||
UWORD8 u1_bot_pix = pu1_input_buf[i4_input_stride + j];
|
||||
UWORD8 u1_right_pix = pu1_input_buf[j + 2];
|
||||
|
||||
d_gpp_u += (ABS(u1_cur_pix - u1_bot_pix) + ABS(u1_cur_pix - u1_right_pix));
|
||||
|
||||
u1_cur_pix = pu1_input_buf[j + 1];
|
||||
u1_bot_pix = pu1_input_buf[i4_input_stride + j + 1];
|
||||
u1_right_pix = pu1_input_buf[j + 2 + 1];
|
||||
|
||||
d_gpp_v += (ABS(u1_cur_pix - u1_bot_pix) + ABS(u1_cur_pix - u1_right_pix));
|
||||
}
|
||||
pu1_input_buf += i4_input_stride;
|
||||
}
|
||||
|
||||
d_gpp_y /= (u4_width * u4_height);
|
||||
d_gpp_u /= ((u4_width >> 1) * (u4_height >> 1));
|
||||
d_gpp_v /= ((u4_width >> 1) * (u4_height >> 1));
|
||||
|
||||
d_gpp = (DOUBLE) ((4 * d_gpp_y) + d_gpp_u + d_gpp_v) / 6;
|
||||
|
||||
return d_gpp;
|
||||
}
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* gets the memory size required for compute gpp
|
||||
*
|
||||
* @par Description:
|
||||
* returns the memory required by the rc utils context and state structs
|
||||
* for allocation.
|
||||
*
|
||||
* @returns
|
||||
*
|
||||
* @remarks
|
||||
*
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
UWORD32 isvce_get_rc_utils_data_size() { return sizeof(svc_rc_utils_state_t); }
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* compute gpp process
|
||||
*
|
||||
* @par Description:
|
||||
* calls the function to compute gpp
|
||||
*
|
||||
* @param[in] ps_svc_rc_utils_ctxt
|
||||
* pointer to svc rc utils context
|
||||
*
|
||||
* @param[in] ps_input_buf
|
||||
* pointer to yuv buffer properties
|
||||
*
|
||||
* @returns
|
||||
* calculated gpp value
|
||||
*
|
||||
* @remarks
|
||||
* none
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
DOUBLE isvce_compute_gpp(svc_rc_utils_ctxt_t *ps_svc_rc_utils_ctxt, yuv_buf_props_t *ps_input_buf)
|
||||
{
|
||||
svc_rc_utils_state_t *ps_rc_utils_state =
|
||||
(svc_rc_utils_state_t *) ps_svc_rc_utils_ctxt->pv_rc_utils_state;
|
||||
|
||||
return ps_rc_utils_state->pf_get_gpp(ps_input_buf);
|
||||
}
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* selects which function to call for get gpp based on e_arch
|
||||
*
|
||||
* @par Description:
|
||||
*
|
||||
* @param[in] ps_rc_utils_state
|
||||
* pointer to svc rc utils state
|
||||
*
|
||||
* @param[in] e_arch
|
||||
* architecure type
|
||||
*
|
||||
* @returns
|
||||
*
|
||||
* @remarks
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
static void isvce_get_gpp_function_selector(svc_rc_utils_state_t *ps_rc_utils_state,
|
||||
IV_ARCH_T e_arch)
|
||||
{
|
||||
switch(e_arch)
|
||||
{
|
||||
#if defined(X86)
|
||||
case ARCH_X86_SSE42:
|
||||
{
|
||||
ps_rc_utils_state->pf_get_gpp = isvce_get_gpp_sse42;
|
||||
|
||||
break;
|
||||
}
|
||||
#elif defined(ARMV8)
|
||||
case ARCH_ARM_A53:
|
||||
case ARCH_ARM_A57:
|
||||
case ARCH_ARM_V8_NEON:
|
||||
{
|
||||
ps_rc_utils_state->pf_get_gpp = isvce_get_gpp_neon;
|
||||
|
||||
break;
|
||||
}
|
||||
#elif !defined(DISABLE_NEON)
|
||||
case ARCH_ARM_A9Q:
|
||||
case ARCH_ARM_A9A:
|
||||
case ARCH_ARM_A9:
|
||||
case ARCH_ARM_A7:
|
||||
case ARCH_ARM_A5:
|
||||
case ARCH_ARM_A15:
|
||||
{
|
||||
ps_rc_utils_state->pf_get_gpp = isvce_get_gpp_neon;
|
||||
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
default:
|
||||
{
|
||||
ps_rc_utils_state->pf_get_gpp = isvce_get_gpp;
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* initializes the rc utils context
|
||||
*
|
||||
* @par Description:
|
||||
* initializes the rc utils context
|
||||
*
|
||||
* @param[in] ps_svc_rc_utils_ctxt
|
||||
* pointer to svc rc utils context
|
||||
*
|
||||
* @param[in] ps_mem_rec
|
||||
* pointer to memory allocated to compute gpp process
|
||||
*
|
||||
* @param[in] e_arch
|
||||
* architecure type
|
||||
*
|
||||
* @returns
|
||||
*
|
||||
* @remarks
|
||||
* none
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
void isvce_rc_utils_init(svc_rc_utils_ctxt_t *ps_svc_rc_utils_ctxt, iv_mem_rec_t *ps_mem_rec,
|
||||
IV_ARCH_T e_arch)
|
||||
{
|
||||
svc_rc_utils_state_t *ps_rc_utils_state;
|
||||
|
||||
UWORD8 *pu1_buf = (UWORD8 *) ps_mem_rec->pv_base;
|
||||
|
||||
ps_rc_utils_state = (svc_rc_utils_state_t *) pu1_buf;
|
||||
|
||||
ps_svc_rc_utils_ctxt->pv_rc_utils_state = ps_rc_utils_state;
|
||||
|
||||
isvce_get_gpp_function_selector(ps_rc_utils_state, e_arch);
|
||||
}
|
||||
134
encoder/svc/isvce_rc_utils.h
Normal file
134
encoder/svc/isvce_rc_utils.h
Normal file
|
|
@ -0,0 +1,134 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
* @file
|
||||
* isvce_rc_utils.h
|
||||
*
|
||||
* @brief
|
||||
* Contains get gpp function required by the SVC encoder
|
||||
*
|
||||
* @author
|
||||
* ittiam
|
||||
*
|
||||
* @par List of Functions:
|
||||
* - isvce_rc_utils_init()
|
||||
* - isvce_get_rc_utils_data_size()
|
||||
* - isvce_compute_gpp()
|
||||
*
|
||||
* @remarks
|
||||
* None
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef _ISVCE_RC_UTILS_H_
|
||||
#define _ISVCE_RC_UTILS_H_
|
||||
|
||||
#include "ih264_typedefs.h"
|
||||
#include "isvc_structs.h"
|
||||
|
||||
typedef struct
|
||||
{
|
||||
/**
|
||||
* pointer to the state of rc utils
|
||||
*/
|
||||
void *pv_rc_utils_state;
|
||||
|
||||
} svc_rc_utils_ctxt_t;
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* initializes the rc utils context
|
||||
*
|
||||
* @par Description:
|
||||
* initializes the rc utils context
|
||||
*
|
||||
* @param[in] ps_svc_rc_utils_ctxt
|
||||
* pointer to svc rc utils context
|
||||
*
|
||||
* @param[in] ps_mem_rec
|
||||
* pointer to memory allocated to compute gpp process
|
||||
*
|
||||
* @param[in] e_arch
|
||||
* architecure type
|
||||
*
|
||||
* @returns
|
||||
*
|
||||
* @remarks
|
||||
* none
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
extern void isvce_rc_utils_init(svc_rc_utils_ctxt_t *ps_svc_rc_utils_ctxt, iv_mem_rec_t *ps_mem_rec,
|
||||
IV_ARCH_T e_arch);
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* gets the memory size required for compute gpp
|
||||
*
|
||||
* @par Description:
|
||||
* returns the memory required by the rc utils context and state structs
|
||||
* for allocation.
|
||||
*
|
||||
* @returns
|
||||
*
|
||||
* @remarks
|
||||
*
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
extern UWORD32 isvce_get_rc_utils_data_size();
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
*
|
||||
* @brief
|
||||
* compute gpp process
|
||||
*
|
||||
* @par Description:
|
||||
* calls the function to compute gpp
|
||||
*
|
||||
* @param[in] ps_svc_rc_utils_ctxt
|
||||
* pointer to svc rc utils context
|
||||
*
|
||||
* @param[in] ps_input_buf
|
||||
* pointer to yuv buffer properties
|
||||
*
|
||||
* @returns
|
||||
* calculated gpp value
|
||||
*
|
||||
* @remarks
|
||||
* none
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
extern DOUBLE isvce_compute_gpp(svc_rc_utils_ctxt_t *ps_svc_rc_utils_ctxt,
|
||||
yuv_buf_props_t *ps_input_buf);
|
||||
|
||||
#endif
|
||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue