From bb0f31cb6beeeadd7d4a36c64475563e0cddbd67 Mon Sep 17 00:00:00 2001 From: Ashwin Natesan Date: Mon, 30 Jan 2023 14:16:47 +0530 Subject: [PATCH] Encoder: SVC encoding support added Added support for encoding 'Scalable Baseline' profile, corresponding to profile_idc of 83 in 'Rec. ITU-T H.264 (11/2007)'. Bug: 248891908 Test: svcenc -c enc.cfg Change-Id: Ib12ca4c4a8c0e674738ae2af01558a08cefe0929 --- Android.bp | 348 +- CMakeLists.txt | 24 +- common/arm/svc/isvc_intra_sampling_neon.c | 485 ++ .../arm/svc/isvc_iquant_itrans_recon_neon.c | 1783 +++++ common/arm/svc/isvc_mem_fns_neon.c | 151 + common/arm/svc/isvc_resi_trans_quant_neon.c | 1085 +++ common/ih264_cabac_tables.h | 9 +- common/ih264_defs.h | 85 +- common/ih264_size_defs.h | 2 + common/svc/isvc_cabac_tables.c | 6542 +++++++++++++++++ common/svc/isvc_cabac_tables.h | 57 + common/svc/isvc_common_tables.c | 81 + common/svc/isvc_common_tables.h | 50 + common/svc/isvc_defs.h | 88 + common/svc/isvc_inter_pred_filters.h | 219 + common/svc/isvc_intra_resample.c | 3257 ++++++++ common/svc/isvc_intra_resample.h | 251 + common/svc/isvc_iquant_itrans_recon.c | 1094 +++ common/svc/isvc_macros.h | 37 + common/svc/isvc_mem_fns.c | 317 + common/svc/isvc_mem_fns.h | 109 + common/svc/isvc_resi_trans_quant.c | 840 +++ common/svc/isvc_structs.h | 335 + common/svc/isvc_trans_quant_itrans_iquant.h | 253 + common/svccommon.cmake | 39 + common/x86/svc/isvc_intra_resample_sse42.c | 658 ++ .../svc/isvc_iquant_itrans_recon_dc_ssse3.c | 548 ++ .../x86/svc/isvc_iquant_itrans_recon_sse42.c | 2849 +++++++ .../x86/svc/isvc_iquant_itrans_recon_ssse3.c | 1291 ++++ common/x86/svc/isvc_mem_fns_sse42.c | 157 + common/x86/svc/isvc_mem_fns_ssse3.c | 435 ++ common/x86/svc/isvc_padding_ssse3.c | 294 + common/x86/svc/isvc_resi_trans_quant_sse42.c | 1881 +++++ encoder/arm/svc/isvce_downscaler_neon.c | 927 +++ encoder/arm/svc/isvce_function_selector.c | 157 + encoder/arm/svc/isvce_function_selector_a9q.c | 270 + encoder/arm/svc/isvce_function_selector_av8.c | 278 + encoder/arm/svc/isvce_platform_macros.h | 139 + encoder/arm/svc/isvce_rc_utils_neon.c | 625 ++ encoder/arm/svc/isvce_residual_pred_neon.c | 666 ++ encoder/irc_rate_control_api_structs.h | 9 +- encoder/riscv/svc/isvce_function_selector.c | 80 + encoder/riscv/svc/isvce_platform_macros.h | 103 + encoder/svc/irc_svc_rate_control_api.c | 116 + encoder/svc/irc_svc_rate_control_api.h | 46 + encoder/svc/isvce.h | 1023 +++ encoder/svc/isvce_api.c | 6054 +++++++++++++++ encoder/svc/isvce_cabac.c | 753 ++ encoder/svc/isvce_cabac.h | 380 + encoder/svc/isvce_cabac_encode.c | 2374 ++++++ encoder/svc/isvce_cabac_init.c | 215 + encoder/svc/isvce_cabac_structs.h | 142 + encoder/svc/isvce_cabac_utils.h | 88 + encoder/svc/isvce_cavlc.c | 2021 +++++ encoder/svc/isvce_cavlc.h | 126 + encoder/svc/isvce_core_coding.c | 2367 ++++++ encoder/svc/isvce_core_coding.h | 125 + encoder/svc/isvce_deblk.c | 1267 ++++ encoder/svc/isvce_deblk.h | 53 + encoder/svc/isvce_defs.h | 345 + encoder/svc/isvce_downscaler.c | 537 ++ encoder/svc/isvce_downscaler.h | 205 + encoder/svc/isvce_downscaler_private_defs.h | 124 + encoder/svc/isvce_encode.c | 790 ++ encoder/svc/isvce_encode.h | 41 + encoder/svc/isvce_encode_header.c | 2127 ++++++ encoder/svc/isvce_encode_header.h | 296 + encoder/svc/isvce_error.h | 70 + encoder/svc/isvce_fmt_conv.c | 145 + encoder/svc/isvce_fmt_conv.h | 48 + encoder/svc/isvce_function_selector_generic.c | 314 + encoder/svc/isvce_globals.c | 48 + encoder/svc/isvce_globals.h | 44 + encoder/svc/isvce_ibl_eval.c | 1378 ++++ encoder/svc/isvce_ibl_eval.h | 105 + encoder/svc/isvce_ibl_private_defs.h | 94 + encoder/svc/isvce_ilp_mv.c | 737 ++ encoder/svc/isvce_ilp_mv.h | 115 + encoder/svc/isvce_ilp_mv_private_defs.h | 68 + encoder/svc/isvce_ilp_mv_utils.h | 111 + encoder/svc/isvce_interface_structs.h | 116 + encoder/svc/isvce_intra_modes_eval.c | 2334 ++++++ encoder/svc/isvce_intra_modes_eval.h | 361 + encoder/svc/isvce_mc.c | 480 ++ encoder/svc/isvce_mc.h | 87 + encoder/svc/isvce_me.c | 2924 ++++++++ encoder/svc/isvce_me.h | 381 + encoder/svc/isvce_mode_stat_visualiser.c | 191 + encoder/svc/isvce_mode_stat_visualiser.h | 72 + encoder/svc/isvce_nalu_stat_aggregator.c | 124 + encoder/svc/isvce_nalu_stat_aggregator.h | 99 + encoder/svc/isvce_pred_structs.h | 156 + encoder/svc/isvce_process.c | 2794 +++++++ encoder/svc/isvce_process.h | 285 + encoder/svc/isvce_rate_control.c | 716 ++ encoder/svc/isvce_rate_control.h | 330 + encoder/svc/isvce_rc_mem_interface.c | 325 + encoder/svc/isvce_rc_mem_interface.h | 77 + encoder/svc/isvce_rc_utils.c | 286 + encoder/svc/isvce_rc_utils.h | 134 + encoder/svc/isvce_rc_utils_private_defs.h | 52 + encoder/svc/isvce_res_pred_private_defs.h | 124 + encoder/svc/isvce_residual_pred.c | 1950 +++++ encoder/svc/isvce_residual_pred.h | 97 + encoder/svc/isvce_structs.h | 2584 +++++++ encoder/svc/isvce_sub_pic_rc.c | 906 +++ encoder/svc/isvce_sub_pic_rc.h | 131 + encoder/svc/isvce_sub_pic_rc_private_defs.h | 256 + encoder/svc/isvce_utils.c | 4542 ++++++++++++ encoder/svc/isvce_utils.h | 234 + encoder/svc/libsvcenc.cmake | 127 + encoder/x86/svc/isvce_downscaler_sse42.c | 652 ++ encoder/x86/svc/isvce_function_selector.c | 136 + .../x86/svc/isvce_function_selector_sse42.c | 169 + .../x86/svc/isvce_function_selector_ssse3.c | 182 + encoder/x86/svc/isvce_platform_macros.h | 119 + encoder/x86/svc/isvce_rc_utils_sse42.c | 450 ++ encoder/x86/svc/isvce_residual_pred_sse42.c | 735 ++ fuzzer/Android.bp | 42 +- fuzzer/svc_enc_fuzzer.cmake | 2 + fuzzer/svc_enc_fuzzer.cpp | 1343 ++++ libavc_blocklist.txt | 1 + test/Android.bp | 56 +- test/svcenc/app.h | 417 ++ test/svcenc/enc.cfg | 47 + test/svcenc/input.c | 300 + test/svcenc/main.c | 3253 ++++++++ test/svcenc/output.c | 95 + test/svcenc/psnr.c | 245 + test/svcenc/psnr.h | 58 + test/svcenc/recon.c | 215 + test/svcenc/svcenc.cmake | 12 + 132 files changed, 85450 insertions(+), 154 deletions(-) create mode 100644 common/arm/svc/isvc_intra_sampling_neon.c create mode 100644 common/arm/svc/isvc_iquant_itrans_recon_neon.c create mode 100644 common/arm/svc/isvc_mem_fns_neon.c create mode 100644 common/arm/svc/isvc_resi_trans_quant_neon.c create mode 100644 common/svc/isvc_cabac_tables.c create mode 100644 common/svc/isvc_cabac_tables.h create mode 100644 common/svc/isvc_common_tables.c create mode 100644 common/svc/isvc_common_tables.h create mode 100644 common/svc/isvc_defs.h create mode 100644 common/svc/isvc_inter_pred_filters.h create mode 100644 common/svc/isvc_intra_resample.c create mode 100644 common/svc/isvc_intra_resample.h create mode 100644 common/svc/isvc_iquant_itrans_recon.c create mode 100644 common/svc/isvc_macros.h create mode 100644 common/svc/isvc_mem_fns.c create mode 100644 common/svc/isvc_mem_fns.h create mode 100644 common/svc/isvc_resi_trans_quant.c create mode 100644 common/svc/isvc_structs.h create mode 100644 common/svc/isvc_trans_quant_itrans_iquant.h create mode 100644 common/svccommon.cmake create mode 100644 common/x86/svc/isvc_intra_resample_sse42.c create mode 100644 common/x86/svc/isvc_iquant_itrans_recon_dc_ssse3.c create mode 100644 common/x86/svc/isvc_iquant_itrans_recon_sse42.c create mode 100644 common/x86/svc/isvc_iquant_itrans_recon_ssse3.c create mode 100644 common/x86/svc/isvc_mem_fns_sse42.c create mode 100644 common/x86/svc/isvc_mem_fns_ssse3.c create mode 100644 common/x86/svc/isvc_padding_ssse3.c create mode 100644 common/x86/svc/isvc_resi_trans_quant_sse42.c create mode 100644 encoder/arm/svc/isvce_downscaler_neon.c create mode 100644 encoder/arm/svc/isvce_function_selector.c create mode 100644 encoder/arm/svc/isvce_function_selector_a9q.c create mode 100644 encoder/arm/svc/isvce_function_selector_av8.c create mode 100644 encoder/arm/svc/isvce_platform_macros.h create mode 100644 encoder/arm/svc/isvce_rc_utils_neon.c create mode 100644 encoder/arm/svc/isvce_residual_pred_neon.c create mode 100644 encoder/riscv/svc/isvce_function_selector.c create mode 100644 encoder/riscv/svc/isvce_platform_macros.h create mode 100644 encoder/svc/irc_svc_rate_control_api.c create mode 100644 encoder/svc/irc_svc_rate_control_api.h create mode 100644 encoder/svc/isvce.h create mode 100644 encoder/svc/isvce_api.c create mode 100644 encoder/svc/isvce_cabac.c create mode 100644 encoder/svc/isvce_cabac.h create mode 100644 encoder/svc/isvce_cabac_encode.c create mode 100644 encoder/svc/isvce_cabac_init.c create mode 100644 encoder/svc/isvce_cabac_structs.h create mode 100644 encoder/svc/isvce_cabac_utils.h create mode 100644 encoder/svc/isvce_cavlc.c create mode 100644 encoder/svc/isvce_cavlc.h create mode 100644 encoder/svc/isvce_core_coding.c create mode 100644 encoder/svc/isvce_core_coding.h create mode 100644 encoder/svc/isvce_deblk.c create mode 100644 encoder/svc/isvce_deblk.h create mode 100644 encoder/svc/isvce_defs.h create mode 100644 encoder/svc/isvce_downscaler.c create mode 100644 encoder/svc/isvce_downscaler.h create mode 100644 encoder/svc/isvce_downscaler_private_defs.h create mode 100644 encoder/svc/isvce_encode.c create mode 100644 encoder/svc/isvce_encode.h create mode 100644 encoder/svc/isvce_encode_header.c create mode 100644 encoder/svc/isvce_encode_header.h create mode 100644 encoder/svc/isvce_error.h create mode 100644 encoder/svc/isvce_fmt_conv.c create mode 100644 encoder/svc/isvce_fmt_conv.h create mode 100644 encoder/svc/isvce_function_selector_generic.c create mode 100644 encoder/svc/isvce_globals.c create mode 100644 encoder/svc/isvce_globals.h create mode 100644 encoder/svc/isvce_ibl_eval.c create mode 100644 encoder/svc/isvce_ibl_eval.h create mode 100644 encoder/svc/isvce_ibl_private_defs.h create mode 100644 encoder/svc/isvce_ilp_mv.c create mode 100644 encoder/svc/isvce_ilp_mv.h create mode 100644 encoder/svc/isvce_ilp_mv_private_defs.h create mode 100644 encoder/svc/isvce_ilp_mv_utils.h create mode 100644 encoder/svc/isvce_interface_structs.h create mode 100644 encoder/svc/isvce_intra_modes_eval.c create mode 100644 encoder/svc/isvce_intra_modes_eval.h create mode 100644 encoder/svc/isvce_mc.c create mode 100644 encoder/svc/isvce_mc.h create mode 100644 encoder/svc/isvce_me.c create mode 100644 encoder/svc/isvce_me.h create mode 100644 encoder/svc/isvce_mode_stat_visualiser.c create mode 100644 encoder/svc/isvce_mode_stat_visualiser.h create mode 100644 encoder/svc/isvce_nalu_stat_aggregator.c create mode 100644 encoder/svc/isvce_nalu_stat_aggregator.h create mode 100644 encoder/svc/isvce_pred_structs.h create mode 100644 encoder/svc/isvce_process.c create mode 100644 encoder/svc/isvce_process.h create mode 100644 encoder/svc/isvce_rate_control.c create mode 100644 encoder/svc/isvce_rate_control.h create mode 100644 encoder/svc/isvce_rc_mem_interface.c create mode 100644 encoder/svc/isvce_rc_mem_interface.h create mode 100644 encoder/svc/isvce_rc_utils.c create mode 100644 encoder/svc/isvce_rc_utils.h create mode 100644 encoder/svc/isvce_rc_utils_private_defs.h create mode 100644 encoder/svc/isvce_res_pred_private_defs.h create mode 100644 encoder/svc/isvce_residual_pred.c create mode 100644 encoder/svc/isvce_residual_pred.h create mode 100644 encoder/svc/isvce_structs.h create mode 100644 encoder/svc/isvce_sub_pic_rc.c create mode 100644 encoder/svc/isvce_sub_pic_rc.h create mode 100644 encoder/svc/isvce_sub_pic_rc_private_defs.h create mode 100644 encoder/svc/isvce_utils.c create mode 100644 encoder/svc/isvce_utils.h create mode 100644 encoder/svc/libsvcenc.cmake create mode 100644 encoder/x86/svc/isvce_downscaler_sse42.c create mode 100644 encoder/x86/svc/isvce_function_selector.c create mode 100644 encoder/x86/svc/isvce_function_selector_sse42.c create mode 100644 encoder/x86/svc/isvce_function_selector_ssse3.c create mode 100644 encoder/x86/svc/isvce_platform_macros.h create mode 100644 encoder/x86/svc/isvce_rc_utils_sse42.c create mode 100644 encoder/x86/svc/isvce_residual_pred_sse42.c create mode 100644 fuzzer/svc_enc_fuzzer.cmake create mode 100644 fuzzer/svc_enc_fuzzer.cpp create mode 100644 test/svcenc/app.h create mode 100644 test/svcenc/enc.cfg create mode 100644 test/svcenc/input.c create mode 100644 test/svcenc/main.c create mode 100644 test/svcenc/output.c create mode 100644 test/svcenc/psnr.c create mode 100644 test/svcenc/psnr.h create mode 100644 test/svcenc/recon.c create mode 100644 test/svcenc/svcenc.cmake diff --git a/Android.bp b/Android.bp index 5cd62e2..ee57611 100644 --- a/Android.bp +++ b/Android.bp @@ -35,6 +35,17 @@ cc_library_headers { min_sdk_version: "29", } +cc_library_headers { + name: "libsvcenc_headers", + export_include_dirs: [ + "common", + "common/svc", + "encoder", + "encoder/svc" + ], + min_sdk_version: "29", +} + cc_library_headers { name: "libavcenc_headers", export_include_dirs: [ @@ -44,6 +55,106 @@ cc_library_headers { min_sdk_version: "29", } +cc_defaults { + name: "libavc_enc_defaults", + vendor_available: true, + host_supported: true, + shared_libs: [ + "liblog", + "libcutils", + ], + cflags: [ + "-DNDEBUG", + "-UHP_PL", + "-DN_MB_ENABLE", + "-fPIC", + "-O3", + "-Wall", + "-Werror", + "-Wno-error=constant-conversion", + ], + arch: { + arm: { + local_include_dirs: [ + "common/arm", + "encoder/arm", + ], + + cflags: [ + "-DARM", + // These will be overriden by armv7_a_neon + "-DDISABLE_NEON", + ], + + neon: { + cflags: [ + "-UDISABLE_NEON", + ], + }, + }, + + arm64: { + cflags: [ + "-DARMV8", + "-DARM", + ], + local_include_dirs: [ + "common/arm", + "common/armv8", + "encoder/arm", + "encoder/armv8", + ], + }, + + riscv64: { + local_include_dirs: [ + "common/riscv", + "encoder/riscv", + ], + }, + + x86: { + cflags: [ + "-DX86", + "-msse4.2", + ], + + local_include_dirs: [ + "encoder/x86", + "common/x86", + ], + }, + + x86_64: { + cflags: [ + "-DX86", + "-msse4.2", + ], + + local_include_dirs: [ + "encoder/x86", + "common/x86", + ], + }, + }, + + sanitize: { + integer_overflow: true, + misc_undefined: ["bounds"], + cfi: true, + config: { + cfi_assembly_support: true, + }, + blocklist: "libavc_blocklist.txt", + }, + + apex_available: [ + "//apex_available:platform", //due to libstagefright_soft_avcenc + "com.android.media.swcodec", + ], + min_sdk_version: "29", +} + cc_defaults { name: "libavc_mvc_dec_defaults", cflags: [ @@ -349,24 +460,7 @@ cc_library_static { cc_library_static { name: "libavcenc", - vendor_available: true, - host_supported: true, - shared_libs: [ - "liblog", - "libcutils", - ], - - cflags: [ - "-DNDEBUG", - "-UHP_PL", - "-DN_MB_ENABLE", - "-fPIC", - - "-O3", - "-Wall", - "-Werror", - "-Wno-error=constant-conversion", - ], + defaults: ["libavc_enc_defaults"], export_include_dirs: [ "common", @@ -435,23 +529,11 @@ cc_library_static { arch: { arm: { - local_include_dirs: [ - "encoder/arm", - "common/arm", - ], - srcs: [ "encoder/arm/ih264e_function_selector.c", "common/arm/ih264_arm_memory_barrier.s", ], - cflags: [ - "-DARM", - - // This will be overriden by armv7_a_neon - "-DDISABLE_NEON", - ], - neon: { srcs: [ "encoder/arm/ih264e_function_selector_a9q.c", @@ -479,25 +561,10 @@ cc_library_static { "encoder/arm/ih264e_fmt_conv.s", "encoder/arm/ime_distortion_metrics_a9q.s", ], - - cflags: [ - "-UDISABLE_NEON", - ], }, }, arm64: { - cflags: [ - "-DARMV8", - "-DARM", - ], - - local_include_dirs: [ - "encoder/arm", - "encoder/armv8", - "common/armv8", - ], - srcs: [ "encoder/arm/ih264e_function_selector.c", "encoder/arm/ih264e_function_selector_av8.c", @@ -525,27 +592,12 @@ cc_library_static { }, riscv64: { - local_include_dirs: [ - "common/riscv", - "encoder/riscv", - ], - srcs: [ "encoder/riscv/ih264e_function_selector.c", ], }, x86: { - cflags: [ - "-DX86", - "-msse4.2", - ], - - local_include_dirs: [ - "encoder/x86", - "common/x86", - ], - srcs: [ "encoder/x86/ih264e_function_selector.c", "encoder/x86/ih264e_function_selector_sse42.c", @@ -571,16 +623,6 @@ cc_library_static { }, x86_64: { - cflags: [ - "-DX86", - "-msse4.2", - ], - - local_include_dirs: [ - "encoder/x86", - "common/x86", - ], - srcs: [ "encoder/x86/ih264e_function_selector.c", "encoder/x86/ih264e_function_selector_sse42.c", @@ -605,21 +647,161 @@ cc_library_static { ], }, }, +} - sanitize: { - integer_overflow: true, - misc_undefined: ["bounds"], - cfi: true, - config: { - cfi_assembly_support: true, - }, - blocklist: "libavc_blocklist.txt", - }, - apex_available: [ - "//apex_available:platform", //due to libstagefright_soft_avcenc - "com.android.media.swcodec", +cc_library_static { + name: "libsvcenc", + defaults: ["libavc_enc_defaults"], + whole_static_libs: [ + "libavcenc", ], - min_sdk_version: "29", + + export_include_dirs: [ + "common", + "common/svc", + "encoder", + "encoder/svc", + ], + + srcs: [ + "common/svc/isvc_cabac_tables.c", + "common/svc/isvc_common_tables.c", + "common/svc/isvc_intra_resample.c", + "common/svc/isvc_iquant_itrans_recon.c", + "common/svc/isvc_mem_fns.c", + "common/svc/isvc_resi_trans_quant.c", + "encoder/svc/irc_svc_rate_control_api.c", + "encoder/svc/isvce_api.c", + "encoder/svc/isvce_cabac.c", + "encoder/svc/isvce_cabac_encode.c", + "encoder/svc/isvce_cabac_init.c", + "encoder/svc/isvce_cavlc.c", + "encoder/svc/isvce_core_coding.c", + "encoder/svc/isvce_deblk.c", + "encoder/svc/isvce_downscaler.c", + "encoder/svc/isvce_encode.c", + "encoder/svc/isvce_encode_header.c", + "encoder/svc/isvce_fmt_conv.c", + "encoder/svc/isvce_function_selector_generic.c", + "encoder/svc/isvce_globals.c", + "encoder/svc/isvce_ibl_eval.c", + "encoder/svc/isvce_ilp_mv.c", + "encoder/svc/isvce_intra_modes_eval.c", + "encoder/svc/isvce_mc.c", + "encoder/svc/isvce_me.c", + "encoder/svc/isvce_mode_stat_visualiser.c", + "encoder/svc/isvce_nalu_stat_aggregator.c", + "encoder/svc/isvce_process.c", + "encoder/svc/isvce_rate_control.c", + "encoder/svc/isvce_rc_mem_interface.c", + "encoder/svc/isvce_rc_utils.c", + "encoder/svc/isvce_residual_pred.c", + "encoder/svc/isvce_sub_pic_rc.c", + "encoder/svc/isvce_utils.c", + ], + + arch: { + arm: { + local_include_dirs: [ + "common/arm/svc", + "encoder/arm/svc", + ], + + srcs: [ + "encoder/arm/svc/isvce_function_selector.c", + ], + + neon: { + srcs: [ + "encoder/arm/svc/isvce_function_selector_a9q.c", + "common/arm/svc/isvc_intra_sampling_neon.c", + "common/arm/svc/isvc_iquant_itrans_recon_neon.c", + "common/arm/svc/isvc_mem_fns_neon.c", + "common/arm/svc/isvc_resi_trans_quant_neon.c", + "encoder/arm/svc/isvce_downscaler_neon.c", + "encoder/arm/svc/isvce_rc_utils_neon.c", + "encoder/arm/svc/isvce_residual_pred_neon.c", + ], + }, + }, + + arm64: { + local_include_dirs: [ + "common/arm/svc", + "encoder/arm/svc", + ], + + srcs: [ + "encoder/arm/svc/isvce_function_selector.c", + "encoder/arm/svc/isvce_function_selector_av8.c", + "common/arm/svc/isvc_intra_sampling_neon.c", + "common/arm/svc/isvc_iquant_itrans_recon_neon.c", + "common/arm/svc/isvc_mem_fns_neon.c", + "common/arm/svc/isvc_resi_trans_quant_neon.c", + "encoder/arm/svc/isvce_downscaler_neon.c", + "encoder/arm/svc/isvce_rc_utils_neon.c", + "encoder/arm/svc/isvce_residual_pred_neon.c", + ], + }, + + riscv64: { + local_include_dirs: [ + "encoder/riscv/svc", + ], + + srcs: [ + "encoder/riscv/svc/isvce_function_selector.c", + ], + }, + + x86: { + local_include_dirs: [ + "encoder/x86/svc", + "common/x86/svc", + ], + + srcs: [ + "common/x86/svc/isvc_intra_resample_sse42.c", + "common/x86/svc/isvc_iquant_itrans_recon_dc_ssse3.c", + "common/x86/svc/isvc_iquant_itrans_recon_sse42.c", + "common/x86/svc/isvc_iquant_itrans_recon_ssse3.c", + "common/x86/svc/isvc_mem_fns_sse42.c", + "common/x86/svc/isvc_mem_fns_ssse3.c", + "common/x86/svc/isvc_padding_ssse3.c", + "common/x86/svc/isvc_resi_trans_quant_sse42.c", + "encoder/x86/svc/isvce_downscaler_sse42.c", + "encoder/x86/svc/isvce_function_selector.c", + "encoder/x86/svc/isvce_function_selector_sse42.c", + "encoder/x86/svc/isvce_function_selector_ssse3.c", + "encoder/x86/svc/isvce_rc_utils_sse42.c", + "encoder/x86/svc/isvce_residual_pred_sse42.c", + ], + }, + + x86_64: { + local_include_dirs: [ + "encoder/x86/svc", + "common/x86/svc", + ], + + srcs: [ + "common/x86/svc/isvc_intra_resample_sse42.c", + "common/x86/svc/isvc_iquant_itrans_recon_dc_ssse3.c", + "common/x86/svc/isvc_iquant_itrans_recon_sse42.c", + "common/x86/svc/isvc_iquant_itrans_recon_ssse3.c", + "common/x86/svc/isvc_mem_fns_sse42.c", + "common/x86/svc/isvc_mem_fns_ssse3.c", + "common/x86/svc/isvc_padding_ssse3.c", + "common/x86/svc/isvc_resi_trans_quant_sse42.c", + "encoder/x86/svc/isvce_downscaler_sse42.c", + "encoder/x86/svc/isvce_function_selector.c", + "encoder/x86/svc/isvce_function_selector_sse42.c", + "encoder/x86/svc/isvce_function_selector_ssse3.c", + "encoder/x86/svc/isvce_rc_utils_sse42.c", + "encoder/x86/svc/isvce_residual_pred_sse42.c", + ], + }, + }, } subdirs = ["test"] diff --git a/CMakeLists.txt b/CMakeLists.txt index 75fa4d4..286b1a7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,6 +4,8 @@ enable_language(ASM) set(AVC_ROOT "${CMAKE_CURRENT_SOURCE_DIR}") set(AVC_CONFIG_DIR "${CMAKE_CURRENT_BINARY_DIR}") +option(ENABLE_MVC "Enables svcenc and svcdec builds" OFF) +option(ENABLE_SVC "Enables svcenc and svcdec builds" OFF) if("${AVC_ROOT}" STREQUAL "${AVC_CONFIG_DIR}") message( @@ -36,13 +38,29 @@ libavc_set_link_libraries() include("${AVC_ROOT}/common/common.cmake") include("${AVC_ROOT}/decoder/libavcdec.cmake") -include("${AVC_ROOT}/decoder/mvc/libmvcdec.cmake") +if (${ENABLE_MVC}) + include("${AVC_ROOT}/decoder/mvc/libmvcdec.cmake") +endif() include("${AVC_ROOT}/encoder/libavcenc.cmake") +if (${ENABLE_SVC}) + include("${AVC_ROOT}/common/svccommon.cmake") + include("${AVC_ROOT}/encoder/svc/libsvcenc.cmake") +endif() include("${AVC_ROOT}/test/decoder/avcdec.cmake") -include("${AVC_ROOT}/test/mvcdec/mvcdec.cmake") +if (${ENABLE_MVC}) + include("${AVC_ROOT}/test/mvcdec/mvcdec.cmake") +endif() include("${AVC_ROOT}/test/encoder/avcenc.cmake") +if (${ENABLE_SVC}) + include("${AVC_ROOT}/test/svcenc/svcenc.cmake") +endif() include("${AVC_ROOT}/fuzzer/avc_dec_fuzzer.cmake") -include("${AVC_ROOT}/fuzzer/mvc_dec_fuzzer.cmake") +if (${ENABLE_MVC}) + include("${AVC_ROOT}/fuzzer/mvc_dec_fuzzer.cmake") +endif() include("${AVC_ROOT}/fuzzer/avc_enc_fuzzer.cmake") +if (${ENABLE_SVC}) + include("${AVC_ROOT}/fuzzer/svc_enc_fuzzer.cmake") +endif() diff --git a/common/arm/svc/isvc_intra_sampling_neon.c b/common/arm/svc/isvc_intra_sampling_neon.c new file mode 100644 index 0000000..661a21e --- /dev/null +++ b/common/arm/svc/isvc_intra_sampling_neon.c @@ -0,0 +1,485 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ +/** + * ******************************************************************************* + * * @file + * isvc_intra_sampling_neon.c + * + * @brief + * neon variants of intra sampling functions used by IBL mode + * + * ******************************************************************************* + */ + +#include +#include + +#include "ih264_typedefs.h" +#include "isvc_intra_resample.h" + +void isvc_interpolate_base_luma_dyadic_neon(UWORD8 *pu1_inp_buf, WORD16 *pi2_tmp_filt_buf, + UWORD8 *pu1_out_buf, WORD32 i4_out_stride) +{ + WORD32 i4_y; + WORD16 i4_coeff_0, i4_coeff_1, i4_coeff_2, i4_coeff_3; + WORD32 i4_filt_stride, i4_src_stride; + UWORD8 *pu1_inp = pu1_inp_buf; + UWORD8 *pu1_out = pu1_out_buf; + WORD16 *pi2_tmp = pi2_tmp_filt_buf; + + int16x4_t i4_rslt_vert_16x4_1, i4_rslt_vert_16x4_2; + uint8x8_t i4_samp_vert_8x8_0, i4_samp_vert_8x8_1, i4_samp_vert_8x8_2, i4_samp_vert_8x8_3; + int16x8_t i4_rslt_vert_16x8_0, i4_rslt_vert_16x8_2; + + /* Horizontal interpolation */ + int32x4_t i4_rslt_horz_r0_1, i4_rslt_horz_r1_1, i4_rslt_horz_r0_2, i4_rslt_horz_r1_2; + uint16x4_t i4_rslt_horz_r0_1_tmp, i4_rslt_horz_r1_1_tmp, i4_rslt_horz_r0_2_tmp, + i4_rslt_horz_r1_2_tmp; + uint16x8_t rslt_16x8_t_1, rslt_16x8_t_2; + + int16x4_t i4_samp_horz_16x4_0, i4_samp_horz_16x4_1, i4_samp_horz_16x4_2, i4_samp_horz_16x4_3, + i4_samp_horz_16x4_4; + int16x4_t i4_samp_horz_16x4_5, i4_samp_horz_16x4_6, i4_samp_horz_16x4_7, i4_samp_horz_16x4_8; + int16_t i4_coeff_c0 = -3; + int16_t i4_coeff_c1 = 28; + int16_t i4_coeff_c2 = 8; + int16_t i4_coeff_c3 = -1; + int32x4x2_t i4_rslt_horz_r0_tmp32, i4_rslt_horz_r1_tmp32; + int32x4_t const_512_32x4 = vdupq_n_s32(512); + + /* Filter coefficient values for phase 4 */ + i4_coeff_0 = -3; + i4_coeff_1 = 28; + i4_coeff_2 = 8; + i4_coeff_3 = -1; + + i4_filt_stride = 12; + i4_src_stride = DYADIC_REF_W_Y; + + /* Vertical interpolation */ + { + /* First 64 bits*/ + i4_samp_vert_8x8_0 = vld1_u8((const UWORD8 *) pu1_inp); + pu1_inp += i4_src_stride; + i4_samp_vert_8x8_1 = vld1_u8((const UWORD8 *) pu1_inp); + pu1_inp += i4_src_stride; + i4_samp_vert_8x8_2 = vld1_u8((const UWORD8 *) pu1_inp); + pu1_inp += i4_src_stride; + i4_samp_vert_8x8_3 = vld1_u8((const UWORD8 *) pu1_inp); + pu1_inp += i4_src_stride; + + i4_rslt_vert_16x8_0 = + vmulq_n_s16(vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_0)), i4_coeff_3); + i4_rslt_vert_16x8_0 = vmlaq_n_s16( + i4_rslt_vert_16x8_0, vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_1)), i4_coeff_2); + i4_rslt_vert_16x8_0 = vmlaq_n_s16( + i4_rslt_vert_16x8_0, vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_2)), i4_coeff_1); + i4_rslt_vert_16x8_0 = vmlaq_n_s16( + i4_rslt_vert_16x8_0, vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_3)), i4_coeff_0); + + vst1q_s16(pi2_tmp, i4_rslt_vert_16x8_0); + pi2_tmp += i4_filt_stride; + + for(i4_y = 1; i4_y < 15; i4_y += 2) + { + i4_samp_vert_8x8_0 = i4_samp_vert_8x8_1; + i4_samp_vert_8x8_1 = i4_samp_vert_8x8_2; + i4_samp_vert_8x8_2 = i4_samp_vert_8x8_3; + i4_samp_vert_8x8_3 = vld1_u8((const UWORD8 *) pu1_inp); + + i4_rslt_vert_16x8_0 = + vmulq_n_s16(vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_0)), i4_coeff_0); + i4_rslt_vert_16x8_0 = + vmlaq_n_s16(i4_rslt_vert_16x8_0, + vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_1)), i4_coeff_1); + i4_rslt_vert_16x8_0 = + vmlaq_n_s16(i4_rslt_vert_16x8_0, + vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_2)), i4_coeff_2); + i4_rslt_vert_16x8_0 = + vmlaq_n_s16(i4_rslt_vert_16x8_0, + vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_3)), i4_coeff_3); + + i4_rslt_vert_16x8_2 = + vmulq_n_s16(vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_0)), i4_coeff_3); + i4_rslt_vert_16x8_2 = + vmlaq_n_s16(i4_rslt_vert_16x8_2, + vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_1)), i4_coeff_2); + i4_rslt_vert_16x8_2 = + vmlaq_n_s16(i4_rslt_vert_16x8_2, + vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_2)), i4_coeff_1); + i4_rslt_vert_16x8_2 = + vmlaq_n_s16(i4_rslt_vert_16x8_2, + vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_3)), i4_coeff_0); + + vst1q_s16(pi2_tmp, (i4_rslt_vert_16x8_0)); + pi2_tmp += i4_filt_stride; + vst1q_s16(pi2_tmp, (i4_rslt_vert_16x8_2)); + pi2_tmp += i4_filt_stride; + pu1_inp += i4_src_stride; + } + + /* y = 15, y_phase = 4 */ + i4_samp_vert_8x8_0 = i4_samp_vert_8x8_1; + i4_samp_vert_8x8_1 = i4_samp_vert_8x8_2; + i4_samp_vert_8x8_2 = i4_samp_vert_8x8_3; + i4_samp_vert_8x8_3 = vld1_u8((const UWORD8 *) pu1_inp); + + i4_rslt_vert_16x8_0 = + vmulq_n_s16(vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_0)), i4_coeff_0); + i4_rslt_vert_16x8_0 = vmlaq_n_s16( + i4_rslt_vert_16x8_0, vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_1)), i4_coeff_1); + i4_rslt_vert_16x8_0 = vmlaq_n_s16( + i4_rslt_vert_16x8_0, vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_2)), i4_coeff_2); + i4_rslt_vert_16x8_0 = vmlaq_n_s16( + i4_rslt_vert_16x8_0, vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_3)), i4_coeff_3); + + vst1q_s16(pi2_tmp, (i4_rslt_vert_16x8_0)); + } + + { + /* Remaining 32 bits */ + pu1_inp = pu1_inp_buf + 8; + pi2_tmp = pi2_tmp_filt_buf + 8; + + i4_samp_vert_8x8_0 = vld1_u8((const UWORD8 *) pu1_inp); + pu1_inp += i4_src_stride; + i4_samp_vert_8x8_1 = vld1_u8((const UWORD8 *) pu1_inp); + pu1_inp += i4_src_stride; + i4_samp_vert_8x8_2 = vld1_u8((const UWORD8 *) pu1_inp); + pu1_inp += i4_src_stride; + i4_samp_vert_8x8_3 = vld1_u8((const UWORD8 *) pu1_inp); + pu1_inp += i4_src_stride; + + i4_rslt_vert_16x4_1 = vmul_n_s16( + vreinterpret_s16_u16(vget_low_u16(vmovl_u8(i4_samp_vert_8x8_0))), i4_coeff_3); + i4_rslt_vert_16x4_1 = vmla_n_s16( + i4_rslt_vert_16x4_1, vreinterpret_s16_u16(vget_low_u16(vmovl_u8(i4_samp_vert_8x8_1))), + i4_coeff_2); + i4_rslt_vert_16x4_1 = vmla_n_s16( + i4_rslt_vert_16x4_1, vreinterpret_s16_u16(vget_low_u16(vmovl_u8(i4_samp_vert_8x8_2))), + i4_coeff_1); + i4_rslt_vert_16x4_1 = vmla_n_s16( + i4_rslt_vert_16x4_1, vreinterpret_s16_u16(vget_low_u16(vmovl_u8(i4_samp_vert_8x8_3))), + i4_coeff_0); + + vst1_s16(pi2_tmp, (i4_rslt_vert_16x4_1)); + pi2_tmp += i4_filt_stride; + + for(i4_y = 1; i4_y < 15; i4_y += 2) + { + i4_samp_vert_8x8_0 = i4_samp_vert_8x8_1; + i4_samp_vert_8x8_1 = i4_samp_vert_8x8_2; + i4_samp_vert_8x8_2 = i4_samp_vert_8x8_3; + i4_samp_vert_8x8_3 = vld1_u8((const UWORD8 *) pu1_inp); + + i4_rslt_vert_16x4_1 = vmul_n_s16( + vreinterpret_s16_u16(vget_low_u16(vmovl_u8(i4_samp_vert_8x8_0))), i4_coeff_0); + i4_rslt_vert_16x4_1 = vmla_n_s16( + i4_rslt_vert_16x4_1, + vreinterpret_s16_u16(vget_low_u16(vmovl_u8(i4_samp_vert_8x8_1))), i4_coeff_1); + i4_rslt_vert_16x4_1 = vmla_n_s16( + i4_rslt_vert_16x4_1, + vreinterpret_s16_u16(vget_low_u16(vmovl_u8(i4_samp_vert_8x8_2))), i4_coeff_2); + i4_rslt_vert_16x4_1 = vmla_n_s16( + i4_rslt_vert_16x4_1, + vreinterpret_s16_u16(vget_low_u16(vmovl_u8(i4_samp_vert_8x8_3))), i4_coeff_3); + + i4_rslt_vert_16x4_2 = vmul_n_s16( + vreinterpret_s16_u16(vget_low_u16(vmovl_u8(i4_samp_vert_8x8_0))), i4_coeff_3); + i4_rslt_vert_16x4_2 = vmla_n_s16( + i4_rslt_vert_16x4_2, + vreinterpret_s16_u16(vget_low_u16(vmovl_u8(i4_samp_vert_8x8_1))), i4_coeff_2); + i4_rslt_vert_16x4_2 = vmla_n_s16( + i4_rslt_vert_16x4_2, + vreinterpret_s16_u16(vget_low_u16(vmovl_u8(i4_samp_vert_8x8_2))), i4_coeff_1); + i4_rslt_vert_16x4_2 = vmla_n_s16( + i4_rslt_vert_16x4_2, + vreinterpret_s16_u16(vget_low_u16(vmovl_u8(i4_samp_vert_8x8_3))), i4_coeff_0); + + vst1_s16(pi2_tmp, (i4_rslt_vert_16x4_1)); + pi2_tmp += i4_filt_stride; + vst1_s16(pi2_tmp, (i4_rslt_vert_16x4_2)); + pi2_tmp += i4_filt_stride; + pu1_inp += i4_src_stride; + } + + i4_samp_vert_8x8_0 = i4_samp_vert_8x8_1; + i4_samp_vert_8x8_1 = i4_samp_vert_8x8_2; + i4_samp_vert_8x8_2 = i4_samp_vert_8x8_3; + i4_samp_vert_8x8_3 = vld1_u8((const UWORD8 *) pu1_inp); + + i4_rslt_vert_16x4_1 = vmul_n_s16( + vreinterpret_s16_u16(vget_low_u16(vmovl_u8(i4_samp_vert_8x8_0))), i4_coeff_0); + i4_rslt_vert_16x4_1 = vmla_n_s16( + i4_rslt_vert_16x4_1, vreinterpret_s16_u16(vget_low_u16(vmovl_u8(i4_samp_vert_8x8_1))), + i4_coeff_1); + i4_rslt_vert_16x4_1 = vmla_n_s16( + i4_rslt_vert_16x4_1, vreinterpret_s16_u16(vget_low_u16(vmovl_u8(i4_samp_vert_8x8_2))), + i4_coeff_2); + i4_rslt_vert_16x4_1 = vmla_n_s16( + i4_rslt_vert_16x4_1, vreinterpret_s16_u16(vget_low_u16(vmovl_u8(i4_samp_vert_8x8_3))), + i4_coeff_3); + + vst1_s16(pi2_tmp, (i4_rslt_vert_16x4_1)); + /* Reinitializing the ptrs */ + pu1_inp = pu1_inp_buf; + pi2_tmp = pi2_tmp_filt_buf; + } + + /* Horizontal interpolation */ + for(i4_y = 0; i4_y < 16; i4_y++) + { + i4_samp_horz_16x4_0 = vld1_s16(pi2_tmp); + i4_samp_horz_16x4_1 = vld1_s16(pi2_tmp + 1); + i4_samp_horz_16x4_2 = vld1_s16(pi2_tmp + 2); + i4_samp_horz_16x4_3 = vld1_s16(pi2_tmp + 3); + i4_samp_horz_16x4_4 = vld1_s16(pi2_tmp + 4); + i4_samp_horz_16x4_5 = vld1_s16(pi2_tmp + 5); + i4_samp_horz_16x4_6 = vld1_s16(pi2_tmp + 6); + i4_samp_horz_16x4_7 = vld1_s16(pi2_tmp + 7); + i4_samp_horz_16x4_8 = vld1_s16(pi2_tmp + 8); + + i4_rslt_horz_r0_1 = + vmull_n_s16(i4_samp_horz_16x4_0, i4_coeff_c3); /* a0c3 a1c3 a2c3 a3c3 */ + i4_rslt_horz_r0_1 = + vmlal_n_s16(i4_rslt_horz_r0_1, i4_samp_horz_16x4_1, + i4_coeff_c2); /* a0c0+a1c1 a1c0+a2c1 a2c0+a3c1 a3c0+a4c1 */ + i4_rslt_horz_r0_1 = vmlal_n_s16(i4_rslt_horz_r0_1, i4_samp_horz_16x4_2, i4_coeff_c1); + i4_rslt_horz_r0_1 = vmlal_n_s16(i4_rslt_horz_r0_1, i4_samp_horz_16x4_3, i4_coeff_c0); + /* i4_rslt_horz_r0_1 : contains res at even pos:0,2,4,6 */ + + i4_rslt_horz_r1_1 = + vmull_n_s16(i4_samp_horz_16x4_1, i4_coeff_c0); /* a0c0 a1c0 a2c0 a3c0 */ + i4_rslt_horz_r1_1 = + vmlal_n_s16(i4_rslt_horz_r1_1, i4_samp_horz_16x4_2, + i4_coeff_c1); /* a0c0+a1c1 a1c0+a2c1 a2c0+a3c1 a3c0+a4c1 */ + i4_rslt_horz_r1_1 = vmlal_n_s16(i4_rslt_horz_r1_1, i4_samp_horz_16x4_3, i4_coeff_c2); + i4_rslt_horz_r1_1 = vmlal_n_s16(i4_rslt_horz_r1_1, i4_samp_horz_16x4_4, i4_coeff_c3); + /* i4_rslt_horz_r1_1 : contains res at odd pos:1,3,5,7 */ + + i4_rslt_horz_r0_2 = + vmull_n_s16(i4_samp_horz_16x4_4, i4_coeff_c3); /* a0c3 a1c3 a2c3 a3c3 */ + i4_rslt_horz_r0_2 = + vmlal_n_s16(i4_rslt_horz_r0_2, i4_samp_horz_16x4_5, + i4_coeff_c2); /* a0c0+a1c1 a1c0+a2c1 a2c0+a3c1 a3c0+a4c1 */ + i4_rslt_horz_r0_2 = vmlal_n_s16(i4_rslt_horz_r0_2, i4_samp_horz_16x4_6, i4_coeff_c1); + i4_rslt_horz_r0_2 = vmlal_n_s16(i4_rslt_horz_r0_2, i4_samp_horz_16x4_7, i4_coeff_c0); + /* i4_rslt_horz_r0_1 : contains res at even pos:8,10,12,14 */ + + i4_rslt_horz_r1_2 = + vmull_n_s16(i4_samp_horz_16x4_5, i4_coeff_c0); /* a0c0 a1c0 a2c0 a3c0 */ + i4_rslt_horz_r1_2 = + vmlal_n_s16(i4_rslt_horz_r1_2, i4_samp_horz_16x4_6, + i4_coeff_c1); /* a0c0+a1c1 a1c0+a2c1 a2c0+a3c1 a3c0+a4c1 */ + i4_rslt_horz_r1_2 = vmlal_n_s16(i4_rslt_horz_r1_2, i4_samp_horz_16x4_7, i4_coeff_c2); + i4_rslt_horz_r1_2 = vmlal_n_s16(i4_rslt_horz_r1_2, i4_samp_horz_16x4_8, i4_coeff_c3); + /* i4_rslt_horz_r1_1 : contains res at odd pos:1,3,5,7 */ + + i4_rslt_horz_r0_tmp32 = vzipq_s32(i4_rslt_horz_r0_1, i4_rslt_horz_r1_1); + i4_rslt_horz_r1_tmp32 = vzipq_s32(i4_rslt_horz_r0_2, i4_rslt_horz_r1_2); + + i4_rslt_horz_r0_1 = vaddq_s32(i4_rslt_horz_r0_tmp32.val[0], const_512_32x4); + i4_rslt_horz_r1_1 = vaddq_s32(i4_rslt_horz_r0_tmp32.val[1], const_512_32x4); + i4_rslt_horz_r0_2 = vaddq_s32(i4_rslt_horz_r1_tmp32.val[0], const_512_32x4); + i4_rslt_horz_r1_2 = vaddq_s32(i4_rslt_horz_r1_tmp32.val[1], const_512_32x4); + + i4_rslt_horz_r0_1_tmp = vqshrun_n_s32(i4_rslt_horz_r0_1, 10); + i4_rslt_horz_r1_1_tmp = vqshrun_n_s32(i4_rslt_horz_r1_1, 10); + + i4_rslt_horz_r0_2_tmp = vqshrun_n_s32(i4_rslt_horz_r0_2, 10); + i4_rslt_horz_r1_2_tmp = vqshrun_n_s32(i4_rslt_horz_r1_2, 10); + + rslt_16x8_t_1 = vcombine_u16(i4_rslt_horz_r0_1_tmp, i4_rslt_horz_r1_1_tmp); + rslt_16x8_t_2 = vcombine_u16(i4_rslt_horz_r0_2_tmp, i4_rslt_horz_r1_2_tmp); + + vst1_u8(pu1_out, vqmovn_u16(rslt_16x8_t_1)); + vst1_u8(pu1_out + 8, vqmovn_u16(rslt_16x8_t_2)); + + pu1_out += i4_out_stride; + pi2_tmp += i4_filt_stride; + } +} + +void isvc_horz_interpol_chroma_dyadic_neon(WORD16 *pi2_tmp_filt_buf, UWORD8 *pu1_out_buf, + WORD32 i4_out_stride, WORD32 i4_phase_0, + WORD32 i4_phase_1) +{ + WORD32 i4_y; + WORD32 i4_coeff_0, i4_coeff_1, i4_coeff_2, i4_coeff_3; + UWORD8 *pu1_out = pu1_out_buf; + WORD16 *pi2_tmp = pi2_tmp_filt_buf; + WORD32 i4_filt_stride = 6; + WORD32 i4_dst_stride = i4_out_stride; + + int16x8_t i4_samp_horz_16x8_r0_0, i4_samp_horz_16x8_r0_1, i4_samp_horz_16x8_r0_2; + int16x8_t i4_samp_horz_16x8_r1_0, i4_samp_horz_16x8_r1_1, i4_samp_horz_16x8_r1_2; + int16x8_t i4_rslt_horz_r0_1, i4_rslt_horz_r0_2; + int16x8_t i4_rslt_horz_r1_1, i4_rslt_horz_r1_2; + + int16x8x2_t temp_horz_16x8_r0; + int16x8x2_t temp_horz_16x8_r1; + int16x8_t final_horz_16x8_r0_1; + int16x8_t final_horz_16x8_r1_1; + + uint8x16_t i4_out_horz_8x16_r0, i4_out_horz_8x16_r1; + uint8x16_t chroma_mask_8x16 = vreinterpretq_u8_u16(vdupq_n_u16(0x00ff)); + + i4_coeff_0 = 16 - i4_phase_0; + i4_coeff_1 = i4_phase_0; + i4_coeff_2 = 16 - i4_phase_1; + i4_coeff_3 = i4_phase_1; + + /* Horizontal interpolation */ + for(i4_y = 0; i4_y < 8; i4_y += 2) + { + i4_samp_horz_16x8_r0_0 = vld1q_s16(pi2_tmp); /* a0 a1 a2 a3 a4 a5 a6 a7 */ + i4_samp_horz_16x8_r0_1 = vld1q_s16(pi2_tmp + 1); /* a1 a2 a3 a4 */ + i4_samp_horz_16x8_r0_2 = vld1q_s16(pi2_tmp + 2); /* a2 a3 a4 a5 */ + + i4_samp_horz_16x8_r1_0 = vld1q_s16(pi2_tmp + i4_filt_stride); + i4_samp_horz_16x8_r1_1 = vld1q_s16(pi2_tmp + i4_filt_stride + 1); + i4_samp_horz_16x8_r1_2 = vld1q_s16(pi2_tmp + (i4_filt_stride + 2)); + + i4_rslt_horz_r0_1 = + vmulq_n_s16(i4_samp_horz_16x8_r0_0, i4_coeff_0); /* a0c0 a1c0 a2c0 a3c0 */ + i4_rslt_horz_r0_2 = + vmulq_n_s16(i4_samp_horz_16x8_r0_1, i4_coeff_2); /* a1c2 a2c2 a3c2 a4c2 */ + + i4_rslt_horz_r0_1 = vmlaq_n_s16(i4_rslt_horz_r0_1, i4_samp_horz_16x8_r0_1, + i4_coeff_1); /* a0c0+a1c1 a1c0+a2c1 a2c0+a3c1 a3c0+a4c1 */ + i4_rslt_horz_r0_2 = vmlaq_n_s16(i4_rslt_horz_r0_2, i4_samp_horz_16x8_r0_2, + i4_coeff_3); /* a1c2+a2c3 a2c2+a3c3 a3c2+a4c3 a4c2+a5c3 */ + + i4_rslt_horz_r1_1 = vmulq_n_s16(i4_samp_horz_16x8_r1_0, i4_coeff_0); + i4_rslt_horz_r1_2 = vmulq_n_s16(i4_samp_horz_16x8_r1_1, i4_coeff_2); + + i4_rslt_horz_r1_1 = vmlaq_n_s16(i4_rslt_horz_r1_1, i4_samp_horz_16x8_r1_1, i4_coeff_1); + i4_rslt_horz_r1_2 = vmlaq_n_s16(i4_rslt_horz_r1_2, i4_samp_horz_16x8_r1_2, i4_coeff_3); + + temp_horz_16x8_r0 = vzipq_s16(i4_rslt_horz_r0_1, i4_rslt_horz_r0_2); + temp_horz_16x8_r1 = vzipq_s16(i4_rslt_horz_r1_1, i4_rslt_horz_r1_2); + + final_horz_16x8_r0_1 = temp_horz_16x8_r0.val[0]; + final_horz_16x8_r1_1 = temp_horz_16x8_r1.val[0]; + + final_horz_16x8_r0_1 = vrshrq_n_s16(final_horz_16x8_r0_1, 8); + final_horz_16x8_r1_1 = vrshrq_n_s16(final_horz_16x8_r1_1, 8); + + i4_out_horz_8x16_r0 = vld1q_u8(pu1_out); + i4_out_horz_8x16_r1 = vld1q_u8(pu1_out + i4_dst_stride); + + i4_out_horz_8x16_r0 = vbslq_u8(chroma_mask_8x16, vreinterpretq_u8_s16(final_horz_16x8_r0_1), + i4_out_horz_8x16_r0); + i4_out_horz_8x16_r1 = vbslq_u8(chroma_mask_8x16, vreinterpretq_u8_s16(final_horz_16x8_r1_1), + i4_out_horz_8x16_r1); + + vst1q_u8(pu1_out, i4_out_horz_8x16_r0); + vst1q_u8(pu1_out + i4_dst_stride, i4_out_horz_8x16_r1); + + /* Incrementing ptr */ + pi2_tmp += (i4_filt_stride << 1); + pu1_out += (i4_dst_stride << 1); + } +} + +void isvc_vert_interpol_chroma_dyadic_neon(UWORD8 *pu1_inp_buf, WORD16 *pi2_tmp_filt_buf, + WORD32 i4_phase_0, WORD32 i4_phase_1) +{ + WORD32 i4_coeff_0, i4_coeff_1, i4_coeff_2, i4_coeff_3; + WORD32 i4_src_stride = DYADIC_REF_W_C; + UWORD8 *pu1_inp = pu1_inp_buf; + WORD16 *pi2_tmp = pi2_tmp_filt_buf; + + uint8x8_t i4_samp_vert_8x8_r0, i4_samp_vert_8x8_r1, i4_samp_vert_8x8_r2, i4_samp_vert_8x8_r3, + i4_samp_vert_8x8_r4, i4_samp_vert_8x8_r5; + + int16x8_t i4_rslt_vert_16x8_r0, i4_rslt_vert_16x8_r1, i4_rslt_vert_16x8_r2, + i4_rslt_vert_16x8_r3, i4_rslt_vert_16x8_r4, i4_rslt_vert_16x8_r5, i4_rslt_vert_16x8_r6, + i4_rslt_vert_16x8_r7; + + i4_coeff_0 = 16 - i4_phase_0; + i4_coeff_1 = i4_phase_0; + i4_coeff_2 = 16 - i4_phase_1; + i4_coeff_3 = i4_phase_1; + + /* Vertical interpolation */ + i4_samp_vert_8x8_r0 = vld1_u8(pu1_inp); + pu1_inp += i4_src_stride; + i4_samp_vert_8x8_r1 = vld1_u8(pu1_inp); + pu1_inp += i4_src_stride; + i4_samp_vert_8x8_r2 = vld1_u8(pu1_inp); + pu1_inp += i4_src_stride; + i4_samp_vert_8x8_r3 = vld1_u8(pu1_inp); + pu1_inp += i4_src_stride; + i4_samp_vert_8x8_r4 = vld1_u8(pu1_inp); + pu1_inp += i4_src_stride; + i4_samp_vert_8x8_r5 = vld1_u8(pu1_inp); + pu1_inp += i4_src_stride; + + i4_rslt_vert_16x8_r0 = + vmulq_n_s16(vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_r0)), i4_coeff_0); + i4_rslt_vert_16x8_r0 = vmlaq_n_s16( + i4_rslt_vert_16x8_r0, vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_r1)), i4_coeff_1); + vst1q_s16(pi2_tmp, i4_rslt_vert_16x8_r0); + + i4_rslt_vert_16x8_r1 = + vmulq_n_s16(vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_r1)), i4_coeff_2); + i4_rslt_vert_16x8_r1 = vmlaq_n_s16( + i4_rslt_vert_16x8_r1, vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_r2)), i4_coeff_3); + vst1q_s16(pi2_tmp + 6, i4_rslt_vert_16x8_r1); + + i4_rslt_vert_16x8_r2 = + vmulq_n_s16(vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_r1)), i4_coeff_0); + i4_rslt_vert_16x8_r2 = vmlaq_n_s16( + i4_rslt_vert_16x8_r2, vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_r2)), i4_coeff_1); + vst1q_s16(pi2_tmp + 12, i4_rslt_vert_16x8_r2); + + i4_rslt_vert_16x8_r3 = + vmulq_n_s16(vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_r2)), i4_coeff_2); + i4_rslt_vert_16x8_r3 = vmlaq_n_s16( + i4_rslt_vert_16x8_r3, vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_r3)), i4_coeff_3); + vst1q_s16(pi2_tmp + 18, i4_rslt_vert_16x8_r3); + + i4_rslt_vert_16x8_r4 = + vmulq_n_s16(vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_r2)), i4_coeff_0); + i4_rslt_vert_16x8_r4 = vmlaq_n_s16( + i4_rslt_vert_16x8_r4, vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_r3)), i4_coeff_1); + vst1q_s16(pi2_tmp + 24, i4_rslt_vert_16x8_r4); + + i4_rslt_vert_16x8_r5 = + vmulq_n_s16(vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_r3)), i4_coeff_2); + i4_rslt_vert_16x8_r5 = vmlaq_n_s16( + i4_rslt_vert_16x8_r5, vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_r4)), i4_coeff_3); + vst1q_s16(pi2_tmp + 30, i4_rslt_vert_16x8_r5); + + i4_rslt_vert_16x8_r6 = + vmulq_n_s16(vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_r3)), i4_coeff_0); + i4_rslt_vert_16x8_r6 = vmlaq_n_s16( + i4_rslt_vert_16x8_r6, vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_r4)), i4_coeff_1); + vst1q_s16(pi2_tmp + 36, i4_rslt_vert_16x8_r6); + + i4_rslt_vert_16x8_r7 = + vmulq_n_s16(vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_r4)), i4_coeff_2); + i4_rslt_vert_16x8_r7 = vmlaq_n_s16( + i4_rslt_vert_16x8_r7, vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_r5)), i4_coeff_3); + vst1_s16(pi2_tmp + 42, vget_low_s16(i4_rslt_vert_16x8_r7)); + vst1q_lane_s16(pi2_tmp + 46, i4_rslt_vert_16x8_r7, 4); + vst1q_lane_s16(pi2_tmp + 47, i4_rslt_vert_16x8_r7, 5); +} diff --git a/common/arm/svc/isvc_iquant_itrans_recon_neon.c b/common/arm/svc/isvc_iquant_itrans_recon_neon.c new file mode 100644 index 0000000..270adde --- /dev/null +++ b/common/arm/svc/isvc_iquant_itrans_recon_neon.c @@ -0,0 +1,1783 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ +/** + * ******************************************************************************* + * * @file + * isvc_iquant_itrans_recon_neon.c + * + * @brief + * neon variants of inverse transform and quantization functions + * + * ******************************************************************************* + */ +#include + +#include "ih264_typedefs.h" +#include "ih264_debug.h" +#include "ih264_defs.h" +#include "ih264_trans_macros.h" +#include "ih264_macros.h" +#include "ih264_platform_macros.h" +#include "ih264_trans_data.h" +#include "ih264_size_defs.h" +#include "isvc_structs.h" +#include "isvc_trans_quant_itrans_iquant.h" + +void isvc_iquant_itrans_recon_4x4_neon(buffer_container_t *ps_src, buffer_container_t *ps_pred, + buffer_container_t *ps_res_pred, buffer_container_t *ps_res, + buffer_container_t *ps_rec, + iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants, + WORD16 *pi2_tmp, WORD16 *pi2_dc_src, WORD32 i4_iq_start_idx, + UWORD8 u1_res_accumulate) +{ + WORD16 *pi2_src = (WORD16 *) ps_src->pv_data; + UWORD8 *pu1_pred = (UWORD8 *) ps_pred->pv_data; + UWORD8 *pu1_out = (UWORD8 *) ps_rec->pv_data; + WORD32 i4_pred_stride = ps_pred->i4_data_stride; + WORD32 i4_out_stride = ps_rec->i4_data_stride; + const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat; + const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat; + UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6; + + int16x4x4_t src_16x4x2; + int16x4x4_t iscal_16x4x2; + int16x4x4_t weigh_16x4x2; + + int16x4_t q0_16x4, q1_16x4, q2_16x4, q3_16x4; + int32x4_t q0_32x4, q1_32x4, q2_32x4, q3_32x4; + int16x4_t rq1_16x4, rq3_16x4; + int16x4_t x0_16x4, x1_16x4, x2_16x4, x3_16x4; + int16x4_t xx0_16x4, xx1_16x4, xx2_16x4, xx3_16x4; + int16x4x2_t xx0_16x4x2, xx1_16x4x2; + int32x2x2_t x0_32x2x2, x1_32x2x2; + int16x4_t weigh0_16x4, weigh1_16x4, weigh2_16x4, weigh3_16x4; + + uint8x8_t pred0_in, pred1_in, pred2_in, pred3_in; + int16x8_t pred0, pred1, pred2, pred3; + int16x8_t resd01_in, resd23_in; + int16x8_t pred01_in, pred23_in; + uint8x8_t pred01_un, pred23_un; + + int16x8_t pos_255_16x8 = vdupq_n_s16(((WORD16) UINT8_MAX)); + int16x8_t neg_255_16x8 = vdupq_n_s16(-((WORD16) UINT8_MAX)); + int32x4_t qp_div_6_32x4 = vdupq_n_s32(u4_qp_div_6); + + WORD16 rnd_factor = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0; + int32x4_t rnd_fact = vdupq_n_s32(rnd_factor); + + UNUSED(ps_res); + UNUSED(ps_res_pred); + UNUSED(u1_res_accumulate); + + src_16x4x2 = vld4_s16(pi2_src); + iscal_16x4x2 = vld4_s16((const int16_t *) pu2_iscal_mat); + weigh_16x4x2 = vld4_s16((const int16_t *) pu2_weigh_mat); + + weigh0_16x4 = vmul_s16(weigh_16x4x2.val[0], iscal_16x4x2.val[0]); + weigh1_16x4 = vmul_s16(weigh_16x4x2.val[1], iscal_16x4x2.val[1]); + weigh2_16x4 = vmul_s16(weigh_16x4x2.val[2], iscal_16x4x2.val[2]); + weigh3_16x4 = vmul_s16(weigh_16x4x2.val[3], iscal_16x4x2.val[3]); + + q0_32x4 = vmull_s16(weigh0_16x4, src_16x4x2.val[0]); + q1_32x4 = vmull_s16(weigh1_16x4, src_16x4x2.val[1]); + q2_32x4 = vmull_s16(weigh2_16x4, src_16x4x2.val[2]); + q3_32x4 = vmull_s16(weigh3_16x4, src_16x4x2.val[3]); + + q0_32x4 = vaddq_s32(q0_32x4, rnd_fact); + q1_32x4 = vaddq_s32(q1_32x4, rnd_fact); + q2_32x4 = vaddq_s32(q2_32x4, rnd_fact); + q3_32x4 = vaddq_s32(q3_32x4, rnd_fact); + + q0_32x4 = vshlq_s32(q0_32x4, qp_div_6_32x4); + q1_32x4 = vshlq_s32(q1_32x4, qp_div_6_32x4); + q2_32x4 = vshlq_s32(q2_32x4, qp_div_6_32x4); + q3_32x4 = vshlq_s32(q3_32x4, qp_div_6_32x4); + + q0_16x4 = vqshrn_n_s32(q0_32x4, 4); + q1_16x4 = vqshrn_n_s32(q1_32x4, 4); + q2_16x4 = vqshrn_n_s32(q2_32x4, 4); + q3_16x4 = vqshrn_n_s32(q3_32x4, 4); + + if(i4_iq_start_idx == 1) + { + q0_16x4 = vset_lane_s16(pi2_dc_src[0], q0_16x4, 0); + } + + rq1_16x4 = vshr_n_s16(q1_16x4, 1); + rq3_16x4 = vshr_n_s16(q3_16x4, 1); + + x0_16x4 = vadd_s16(q0_16x4, q2_16x4); + x1_16x4 = vsub_s16(q0_16x4, q2_16x4); + x2_16x4 = vsub_s16(rq1_16x4, q3_16x4); + x3_16x4 = vadd_s16(q1_16x4, rq3_16x4); + + xx0_16x4 = vadd_s16(x0_16x4, x3_16x4); + xx1_16x4 = vadd_s16(x1_16x4, x2_16x4); + xx2_16x4 = vsub_s16(x1_16x4, x2_16x4); + xx3_16x4 = vsub_s16(x0_16x4, x3_16x4); + + /* row 0 to row 3 */ + xx0_16x4x2 = vtrn_s16(xx0_16x4, xx1_16x4); + xx1_16x4x2 = vtrn_s16(xx2_16x4, xx3_16x4); + x0_32x2x2 = + vzip_s32(vreinterpret_s32_s16(xx0_16x4x2.val[0]), vreinterpret_s32_s16(xx1_16x4x2.val[0])); + x1_32x2x2 = + vzip_s32(vreinterpret_s32_s16(xx0_16x4x2.val[1]), vreinterpret_s32_s16(xx1_16x4x2.val[1])); + + x0_16x4 = vreinterpret_s16_s32(x0_32x2x2.val[0]); + x1_16x4 = vreinterpret_s16_s32(x1_32x2x2.val[0]); + x2_16x4 = vreinterpret_s16_s32(x0_32x2x2.val[1]); + x3_16x4 = vreinterpret_s16_s32(x1_32x2x2.val[1]); + + /* Store Horz transform output into temp */ + vst1_s16(pi2_tmp, x0_16x4); + vst1_s16(pi2_tmp + 4, x1_16x4); + vst1_s16(pi2_tmp + 8, x2_16x4); + vst1_s16(pi2_tmp + 12, x3_16x4); + + /* vertical inverse transform */ + rq1_16x4 = vshr_n_s16(x1_16x4, 1); + rq3_16x4 = vshr_n_s16(x3_16x4, 1); + + xx0_16x4 = vadd_s16(x0_16x4, x2_16x4); + xx1_16x4 = vsub_s16(x0_16x4, x2_16x4); + xx2_16x4 = vsub_s16(rq1_16x4, x3_16x4); + xx3_16x4 = vadd_s16(x1_16x4, rq3_16x4); + + x0_16x4 = vadd_s16(xx0_16x4, xx3_16x4); + x1_16x4 = vadd_s16(xx1_16x4, xx2_16x4); + x2_16x4 = vsub_s16(xx1_16x4, xx2_16x4); + x3_16x4 = vsub_s16(xx0_16x4, xx3_16x4); + + x0_16x4 = vrshr_n_s16(x0_16x4, 6); + x1_16x4 = vrshr_n_s16(x1_16x4, 6); + x2_16x4 = vrshr_n_s16(x2_16x4, 6); + x3_16x4 = vrshr_n_s16(x3_16x4, 6); + + resd01_in = vcombine_s16(x0_16x4, x1_16x4); + resd23_in = vcombine_s16(x2_16x4, x3_16x4); + + /* Saturate all values < -255 to -255 and retain the rest as it is */ + resd01_in = vmaxq_s16(resd01_in, neg_255_16x8); + resd23_in = vmaxq_s16(resd23_in, neg_255_16x8); + + /* Saturate all values > 255 to 255 and retain the rest as it is */ + resd01_in = vminq_s16(resd01_in, pos_255_16x8); + resd23_in = vminq_s16(resd23_in, pos_255_16x8); + + /* Load pred */ + pred0_in = vld1_u8((uint8_t *) pu1_pred); + pred1_in = vld1_u8((uint8_t *) pu1_pred + (i4_pred_stride)); + pred2_in = vld1_u8((uint8_t *) pu1_pred + (i4_pred_stride << 1)); + pred3_in = vld1_u8((uint8_t *) pu1_pred + (i4_pred_stride * 3)); + + pred0 = vreinterpretq_s16_u16(vmovl_u8(pred0_in)); + pred1 = vreinterpretq_s16_u16(vmovl_u8(pred1_in)); + pred2 = vreinterpretq_s16_u16(vmovl_u8(pred2_in)); + pred3 = vreinterpretq_s16_u16(vmovl_u8(pred3_in)); + + pred01_in = vcombine_s16(vget_low_s16(pred0), vget_low_s16(pred1)); + pred23_in = vcombine_s16(vget_low_s16(pred2), vget_low_s16(pred3)); + + /* Out pixel = pred + res */ + pred01_in = vaddq_s16(pred01_in, resd01_in); + pred23_in = vaddq_s16(pred23_in, resd23_in); + + /* Convert to 8 bit unsigned with saturation */ + pred01_un = vqmovun_s16(pred01_in); + pred23_un = vqmovun_s16(pred23_in); + + vst1_lane_u32((uint32_t *) (pu1_out), vreinterpret_u32_u8(pred01_un), 0); + vst1_lane_u32((uint32_t *) (pu1_out + i4_out_stride), vreinterpret_u32_u8(pred01_un), 1); + vst1_lane_u32((uint32_t *) (pu1_out + (i4_out_stride << 1)), vreinterpret_u32_u8(pred23_un), 0); + vst1_lane_u32((uint32_t *) (pu1_out + ((i4_out_stride << 1) + i4_out_stride)), + vreinterpret_u32_u8(pred23_un), 1); +} + +void isvc_iquant_itrans_recon_4x4_with_res_output_neon( + buffer_container_t *ps_src, buffer_container_t *ps_pred, buffer_container_t *ps_res_pred, + buffer_container_t *ps_res, buffer_container_t *ps_rec, + iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants, WORD16 *pi2_tmp, WORD16 *pi2_dc_src, + WORD32 i4_iq_start_idx, UWORD8 u1_res_accumulate) +{ + WORD16 *pi2_src = (WORD16 *) ps_src->pv_data; + WORD16 *pi2_res = (WORD16 *) ps_res->pv_data; + UWORD8 *pu1_pred = (UWORD8 *) ps_pred->pv_data; + UWORD8 *pu1_out = (UWORD8 *) ps_rec->pv_data; + WORD32 i4_res_stride = ps_res->i4_data_stride; + WORD32 i4_pred_stride = ps_pred->i4_data_stride; + WORD32 i4_out_stride = ps_rec->i4_data_stride; + const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat; + const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat; + UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6; + + int16x4x4_t src_16x4x2; + int16x4x4_t iscal_16x4x2; + int16x4x4_t weigh_16x4x2; + + int16x4_t q0_16x4, q1_16x4, q2_16x4, q3_16x4; + int32x4_t q0_32x4, q1_32x4, q2_32x4, q3_32x4; + int16x4_t rq1_16x4, rq3_16x4; + int16x4_t x0_16x4, x1_16x4, x2_16x4, x3_16x4; + int16x4_t xx0_16x4, xx1_16x4, xx2_16x4, xx3_16x4; + int16x4x2_t xx0_16x4x2, xx1_16x4x2; + int32x2x2_t x0_32x2x2, x1_32x2x2; + int16x4_t weigh0_16x4, weigh1_16x4, weigh2_16x4, weigh3_16x4; + + uint8x8_t pred0_in, pred1_in, pred2_in, pred3_in; + int16x8_t pred0, pred1, pred2, pred3; + int16x8_t resd01_in, resd23_in; + int16x8_t pred01_in, pred23_in; + uint8x8_t pred01_un, pred23_un; + + int16x4_t pos_255_16x4 = vdup_n_s16(((WORD16) UINT8_MAX)); + int16x4_t neg_255_16x4 = vdup_n_s16(-((WORD16) UINT8_MAX)); + int32x4_t qp_div_6_32x4 = vdupq_n_s32(u4_qp_div_6); + + WORD16 rnd_factor = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0; + int32x4_t rnd_fact = vdupq_n_s32(rnd_factor); + + UNUSED(ps_res_pred); + UNUSED(u1_res_accumulate); + + src_16x4x2 = vld4_s16(pi2_src); + iscal_16x4x2 = vld4_s16((const int16_t *) pu2_iscal_mat); + weigh_16x4x2 = vld4_s16((const int16_t *) pu2_weigh_mat); + + weigh0_16x4 = vmul_s16(weigh_16x4x2.val[0], iscal_16x4x2.val[0]); + weigh1_16x4 = vmul_s16(weigh_16x4x2.val[1], iscal_16x4x2.val[1]); + weigh2_16x4 = vmul_s16(weigh_16x4x2.val[2], iscal_16x4x2.val[2]); + weigh3_16x4 = vmul_s16(weigh_16x4x2.val[3], iscal_16x4x2.val[3]); + + q0_32x4 = vmull_s16(weigh0_16x4, src_16x4x2.val[0]); + q1_32x4 = vmull_s16(weigh1_16x4, src_16x4x2.val[1]); + q2_32x4 = vmull_s16(weigh2_16x4, src_16x4x2.val[2]); + q3_32x4 = vmull_s16(weigh3_16x4, src_16x4x2.val[3]); + + q0_32x4 = vaddq_s32(q0_32x4, rnd_fact); + q1_32x4 = vaddq_s32(q1_32x4, rnd_fact); + q2_32x4 = vaddq_s32(q2_32x4, rnd_fact); + q3_32x4 = vaddq_s32(q3_32x4, rnd_fact); + + q0_32x4 = vshlq_s32(q0_32x4, qp_div_6_32x4); + q1_32x4 = vshlq_s32(q1_32x4, qp_div_6_32x4); + q2_32x4 = vshlq_s32(q2_32x4, qp_div_6_32x4); + q3_32x4 = vshlq_s32(q3_32x4, qp_div_6_32x4); + + q0_16x4 = vqshrn_n_s32(q0_32x4, 4); + q1_16x4 = vqshrn_n_s32(q1_32x4, 4); + q2_16x4 = vqshrn_n_s32(q2_32x4, 4); + q3_16x4 = vqshrn_n_s32(q3_32x4, 4); + + if(i4_iq_start_idx == 1) + { + q0_16x4 = vset_lane_s16(pi2_dc_src[0], q0_16x4, 0); + } + + rq1_16x4 = vshr_n_s16(q1_16x4, 1); + rq3_16x4 = vshr_n_s16(q3_16x4, 1); + + x0_16x4 = vadd_s16(q0_16x4, q2_16x4); + x1_16x4 = vsub_s16(q0_16x4, q2_16x4); + x2_16x4 = vsub_s16(rq1_16x4, q3_16x4); + x3_16x4 = vadd_s16(q1_16x4, rq3_16x4); + + xx0_16x4 = vadd_s16(x0_16x4, x3_16x4); + xx1_16x4 = vadd_s16(x1_16x4, x2_16x4); + xx2_16x4 = vsub_s16(x1_16x4, x2_16x4); + xx3_16x4 = vsub_s16(x0_16x4, x3_16x4); + + /* row 0 to row 3 */ + xx0_16x4x2 = vtrn_s16(xx0_16x4, xx1_16x4); + xx1_16x4x2 = vtrn_s16(xx2_16x4, xx3_16x4); + x0_32x2x2 = + vzip_s32(vreinterpret_s32_s16(xx0_16x4x2.val[0]), vreinterpret_s32_s16(xx1_16x4x2.val[0])); + x1_32x2x2 = + vzip_s32(vreinterpret_s32_s16(xx0_16x4x2.val[1]), vreinterpret_s32_s16(xx1_16x4x2.val[1])); + + x0_16x4 = vreinterpret_s16_s32(x0_32x2x2.val[0]); + x1_16x4 = vreinterpret_s16_s32(x1_32x2x2.val[0]); + x2_16x4 = vreinterpret_s16_s32(x0_32x2x2.val[1]); + x3_16x4 = vreinterpret_s16_s32(x1_32x2x2.val[1]); + + /* Store Horz transform output into temp */ + vst1_s16(pi2_tmp, x0_16x4); + vst1_s16(pi2_tmp + 4, x1_16x4); + vst1_s16(pi2_tmp + 8, x2_16x4); + vst1_s16(pi2_tmp + 12, x3_16x4); + + /* vertical inverse transform */ + rq1_16x4 = vshr_n_s16(x1_16x4, 1); + rq3_16x4 = vshr_n_s16(x3_16x4, 1); + + xx0_16x4 = vadd_s16(x0_16x4, x2_16x4); + xx1_16x4 = vsub_s16(x0_16x4, x2_16x4); + xx2_16x4 = vsub_s16(rq1_16x4, x3_16x4); + xx3_16x4 = vadd_s16(x1_16x4, rq3_16x4); + + x0_16x4 = vadd_s16(xx0_16x4, xx3_16x4); + x1_16x4 = vadd_s16(xx1_16x4, xx2_16x4); + x2_16x4 = vsub_s16(xx1_16x4, xx2_16x4); + x3_16x4 = vsub_s16(xx0_16x4, xx3_16x4); + + x0_16x4 = vrshr_n_s16(x0_16x4, 6); + x1_16x4 = vrshr_n_s16(x1_16x4, 6); + x2_16x4 = vrshr_n_s16(x2_16x4, 6); + x3_16x4 = vrshr_n_s16(x3_16x4, 6); + + /* Saturate all values < -255 to -255 and retain the rest as it is */ + x0_16x4 = vmax_s16(x0_16x4, neg_255_16x4); + x1_16x4 = vmax_s16(x1_16x4, neg_255_16x4); + x2_16x4 = vmax_s16(x2_16x4, neg_255_16x4); + x3_16x4 = vmax_s16(x3_16x4, neg_255_16x4); + + /* Saturate all values > 255 to 255 and retain the rest as it is */ + x0_16x4 = vmin_s16(x0_16x4, pos_255_16x4); + x1_16x4 = vmin_s16(x1_16x4, pos_255_16x4); + x2_16x4 = vmin_s16(x2_16x4, pos_255_16x4); + x3_16x4 = vmin_s16(x3_16x4, pos_255_16x4); + + vst1_s16(pi2_res, x0_16x4); + vst1_s16(pi2_res + i4_res_stride, x1_16x4); + vst1_s16(pi2_res + (i4_res_stride << 1), x2_16x4); + vst1_s16(pi2_res + (i4_res_stride << 1) + i4_res_stride, x3_16x4); + + resd01_in = vcombine_s16(x0_16x4, x1_16x4); + resd23_in = vcombine_s16(x2_16x4, x3_16x4); + + /* Load pred */ + pred0_in = vld1_u8((uint8_t *) pu1_pred); + pred1_in = vld1_u8((uint8_t *) pu1_pred + (i4_pred_stride)); + pred2_in = vld1_u8((uint8_t *) pu1_pred + (i4_pred_stride << 1)); + pred3_in = vld1_u8((uint8_t *) pu1_pred + (i4_pred_stride * 3)); + + pred0 = vreinterpretq_s16_u16(vmovl_u8(pred0_in)); + pred1 = vreinterpretq_s16_u16(vmovl_u8(pred1_in)); + pred2 = vreinterpretq_s16_u16(vmovl_u8(pred2_in)); + pred3 = vreinterpretq_s16_u16(vmovl_u8(pred3_in)); + + pred01_in = vcombine_s16(vget_low_s16(pred0), vget_low_s16(pred1)); + pred23_in = vcombine_s16(vget_low_s16(pred2), vget_low_s16(pred3)); + + /* Out pixel = pred + res */ + pred01_in = vaddq_s16(pred01_in, resd01_in); + pred23_in = vaddq_s16(pred23_in, resd23_in); + + /* Convert to 8 bit unsigned with saturation */ + pred01_un = vqmovun_s16(pred01_in); + pred23_un = vqmovun_s16(pred23_in); + + vst1_lane_u32((uint32_t *) (pu1_out), vreinterpret_u32_u8(pred01_un), 0); + vst1_lane_u32((uint32_t *) (pu1_out + i4_out_stride), vreinterpret_u32_u8(pred01_un), 1); + vst1_lane_u32((uint32_t *) (pu1_out + (i4_out_stride << 1)), vreinterpret_u32_u8(pred23_un), 0); + vst1_lane_u32((uint32_t *) (pu1_out + ((i4_out_stride << 1) + i4_out_stride)), + vreinterpret_u32_u8(pred23_un), 1); +} + +void isvc_iquant_itrans_recon_4x4_with_res_accumulate_neon( + buffer_container_t *ps_src, buffer_container_t *ps_pred, buffer_container_t *ps_res_pred, + buffer_container_t *ps_res, buffer_container_t *ps_rec, + iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants, WORD16 *pi2_tmp, WORD16 *pi2_dc_src, + WORD32 i4_iq_start_idx, UWORD8 u1_res_accumulate) +{ + WORD16 *pi2_src = (WORD16 *) ps_src->pv_data; + WORD16 *pi2_res = (WORD16 *) ps_res->pv_data; + WORD16 *pi2_res_pred = (WORD16 *) ps_res_pred->pv_data; + UWORD8 *pu1_pred = (UWORD8 *) ps_pred->pv_data; + UWORD8 *pu1_out = (UWORD8 *) ps_rec->pv_data; + WORD32 i4_res_stride = ps_res->i4_data_stride; + WORD32 i4_res_pred_stride = ps_res_pred->i4_data_stride; + WORD32 i4_pred_stride = ps_pred->i4_data_stride; + WORD32 i4_out_stride = ps_rec->i4_data_stride; + const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat; + const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat; + UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6; + + int16x4x4_t src_16x4x2; + int16x4x4_t iscal_16x4x2; + int16x4x4_t weigh_16x4x2; + + int16x4_t q0_16x4, q1_16x4, q2_16x4, q3_16x4; + int32x4_t q0_32x4, q1_32x4, q2_32x4, q3_32x4; + int16x4_t rq1_16x4, rq3_16x4; + int16x4_t x0_16x4, x1_16x4, x2_16x4, x3_16x4; + int16x4_t xx0_16x4, xx1_16x4, xx2_16x4, xx3_16x4; + int16x4x2_t xx0_16x4x2, xx1_16x4x2; + int32x2x2_t x0_32x2x2, x1_32x2x2; + int16x4_t weigh0_16x4, weigh1_16x4, weigh2_16x4, weigh3_16x4; + + uint8x8_t pred0_in, pred1_in, pred2_in, pred3_in; + int16x8_t pred0, pred1, pred2, pred3; + int16x4_t resd0_in, resd1_in, resd2_in, resd3_in; + int16x8_t resd01_in, resd23_in; + int16x8_t pred01_in, pred23_in; + uint8x8_t pred01_un, pred23_un; + + int32x4_t qp_div_6_32x4 = vdupq_n_s32(u4_qp_div_6); + + WORD16 rnd_factor = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0; + int32x4_t rnd_fact = vdupq_n_s32(rnd_factor); + int16x4_t pos_255 = vdup_n_s16(((WORD16) UINT8_MAX)); + int16x4_t neg_255 = vdup_n_s16(-((WORD16) UINT8_MAX)); + + UNUSED(u1_res_accumulate); + + src_16x4x2 = vld4_s16(pi2_src); + iscal_16x4x2 = vld4_s16((const int16_t *) pu2_iscal_mat); + weigh_16x4x2 = vld4_s16((const int16_t *) pu2_weigh_mat); + + weigh0_16x4 = vmul_s16(weigh_16x4x2.val[0], iscal_16x4x2.val[0]); + weigh1_16x4 = vmul_s16(weigh_16x4x2.val[1], iscal_16x4x2.val[1]); + weigh2_16x4 = vmul_s16(weigh_16x4x2.val[2], iscal_16x4x2.val[2]); + weigh3_16x4 = vmul_s16(weigh_16x4x2.val[3], iscal_16x4x2.val[3]); + + q0_32x4 = vmull_s16(weigh0_16x4, src_16x4x2.val[0]); + q1_32x4 = vmull_s16(weigh1_16x4, src_16x4x2.val[1]); + q2_32x4 = vmull_s16(weigh2_16x4, src_16x4x2.val[2]); + q3_32x4 = vmull_s16(weigh3_16x4, src_16x4x2.val[3]); + + q0_32x4 = vaddq_s32(q0_32x4, rnd_fact); + q1_32x4 = vaddq_s32(q1_32x4, rnd_fact); + q2_32x4 = vaddq_s32(q2_32x4, rnd_fact); + q3_32x4 = vaddq_s32(q3_32x4, rnd_fact); + + q0_32x4 = vshlq_s32(q0_32x4, qp_div_6_32x4); + q1_32x4 = vshlq_s32(q1_32x4, qp_div_6_32x4); + q2_32x4 = vshlq_s32(q2_32x4, qp_div_6_32x4); + q3_32x4 = vshlq_s32(q3_32x4, qp_div_6_32x4); + + q0_16x4 = vqshrn_n_s32(q0_32x4, 4); + q1_16x4 = vqshrn_n_s32(q1_32x4, 4); + q2_16x4 = vqshrn_n_s32(q2_32x4, 4); + q3_16x4 = vqshrn_n_s32(q3_32x4, 4); + + if(i4_iq_start_idx == 1) + { + q0_16x4 = vset_lane_s16(pi2_dc_src[0], q0_16x4, 0); + } + + rq1_16x4 = vshr_n_s16(q1_16x4, 1); + rq3_16x4 = vshr_n_s16(q3_16x4, 1); + + x0_16x4 = vadd_s16(q0_16x4, q2_16x4); + x1_16x4 = vsub_s16(q0_16x4, q2_16x4); + x2_16x4 = vsub_s16(rq1_16x4, q3_16x4); + x3_16x4 = vadd_s16(q1_16x4, rq3_16x4); + + xx0_16x4 = vadd_s16(x0_16x4, x3_16x4); + xx1_16x4 = vadd_s16(x1_16x4, x2_16x4); + xx2_16x4 = vsub_s16(x1_16x4, x2_16x4); + xx3_16x4 = vsub_s16(x0_16x4, x3_16x4); + + /* row 0 to row 3 */ + xx0_16x4x2 = vtrn_s16(xx0_16x4, xx1_16x4); + xx1_16x4x2 = vtrn_s16(xx2_16x4, xx3_16x4); + x0_32x2x2 = + vzip_s32(vreinterpret_s32_s16(xx0_16x4x2.val[0]), vreinterpret_s32_s16(xx1_16x4x2.val[0])); + x1_32x2x2 = + vzip_s32(vreinterpret_s32_s16(xx0_16x4x2.val[1]), vreinterpret_s32_s16(xx1_16x4x2.val[1])); + + x0_16x4 = vreinterpret_s16_s32(x0_32x2x2.val[0]); + x1_16x4 = vreinterpret_s16_s32(x1_32x2x2.val[0]); + x2_16x4 = vreinterpret_s16_s32(x0_32x2x2.val[1]); + x3_16x4 = vreinterpret_s16_s32(x1_32x2x2.val[1]); + + /* Store Horz transform output into temp */ + vst1_s16(pi2_tmp, x0_16x4); + vst1_s16(pi2_tmp + 4, x1_16x4); + vst1_s16(pi2_tmp + 8, x2_16x4); + vst1_s16(pi2_tmp + 12, x3_16x4); + + /* vertical inverse transform */ + rq1_16x4 = vshr_n_s16(x1_16x4, 1); + rq3_16x4 = vshr_n_s16(x3_16x4, 1); + + xx0_16x4 = vadd_s16(x0_16x4, x2_16x4); + xx1_16x4 = vsub_s16(x0_16x4, x2_16x4); + xx2_16x4 = vsub_s16(rq1_16x4, x3_16x4); + xx3_16x4 = vadd_s16(x1_16x4, rq3_16x4); + + x0_16x4 = vadd_s16(xx0_16x4, xx3_16x4); + x1_16x4 = vadd_s16(xx1_16x4, xx2_16x4); + x2_16x4 = vsub_s16(xx1_16x4, xx2_16x4); + x3_16x4 = vsub_s16(xx0_16x4, xx3_16x4); + + x0_16x4 = vrshr_n_s16(x0_16x4, 6); + x1_16x4 = vrshr_n_s16(x1_16x4, 6); + x2_16x4 = vrshr_n_s16(x2_16x4, 6); + x3_16x4 = vrshr_n_s16(x3_16x4, 6); + + /* Accumulating Res */ + + /* Load Res pred */ + resd0_in = vld1_s16((int16_t *) pi2_res_pred); + resd1_in = vld1_s16((int16_t *) pi2_res_pred + i4_res_pred_stride); + resd2_in = vld1_s16((int16_t *) pi2_res_pred + (i4_res_pred_stride * 2)); + resd3_in = vld1_s16((int16_t *) pi2_res_pred + (i4_res_pred_stride * 3)); + + /* Add res pred with res obtained */ + resd0_in = vadd_s16(resd0_in, x0_16x4); + resd1_in = vadd_s16(resd1_in, x1_16x4); + resd2_in = vadd_s16(resd2_in, x2_16x4); + resd3_in = vadd_s16(resd3_in, x3_16x4); + + /* Saturate all values < -255 to -255 and retain the rest as it is */ + resd0_in = vmax_s16(resd0_in, neg_255); + resd1_in = vmax_s16(resd1_in, neg_255); + resd2_in = vmax_s16(resd2_in, neg_255); + resd3_in = vmax_s16(resd3_in, neg_255); + + /* Saturate all values > 255 to 255 and retain the rest as it is */ + resd0_in = vmin_s16(resd0_in, pos_255); + resd1_in = vmin_s16(resd1_in, pos_255); + resd2_in = vmin_s16(resd2_in, pos_255); + resd3_in = vmin_s16(resd3_in, pos_255); + + vst1_s16(pi2_res, resd0_in); + vst1_s16(pi2_res + i4_res_stride, resd1_in); + vst1_s16(pi2_res + (i4_res_stride << 1), resd2_in); + vst1_s16(pi2_res + (i4_res_stride << 1) + i4_res_stride, resd3_in); + + resd01_in = vcombine_s16(resd0_in, resd1_in); + resd23_in = vcombine_s16(resd2_in, resd3_in); + + /* Load pred */ + pred0_in = vld1_u8((uint8_t *) pu1_pred); + pred1_in = vld1_u8((uint8_t *) pu1_pred + (i4_pred_stride)); + pred2_in = vld1_u8((uint8_t *) pu1_pred + (i4_pred_stride << 1)); + pred3_in = vld1_u8((uint8_t *) pu1_pred + (i4_pred_stride * 3)); + + pred0 = vreinterpretq_s16_u16(vmovl_u8(pred0_in)); + pred1 = vreinterpretq_s16_u16(vmovl_u8(pred1_in)); + pred2 = vreinterpretq_s16_u16(vmovl_u8(pred2_in)); + pred3 = vreinterpretq_s16_u16(vmovl_u8(pred3_in)); + + pred01_in = vcombine_s16(vget_low_s16(pred0), vget_low_s16(pred1)); + pred23_in = vcombine_s16(vget_low_s16(pred2), vget_low_s16(pred3)); + + /* Out pixel = pred + res */ + pred01_in = vaddq_s16(pred01_in, resd01_in); + pred23_in = vaddq_s16(pred23_in, resd23_in); + + /* Convert to 8 bit unsigned with saturation */ + pred01_un = vqmovun_s16(pred01_in); + pred23_un = vqmovun_s16(pred23_in); + + vst1_lane_u32((uint32_t *) (pu1_out), vreinterpret_u32_u8(pred01_un), 0); + vst1_lane_u32((uint32_t *) (pu1_out + i4_out_stride), vreinterpret_u32_u8(pred01_un), 1); + vst1_lane_u32((uint32_t *) (pu1_out + (i4_out_stride << 1)), vreinterpret_u32_u8(pred23_un), 0); + vst1_lane_u32((uint32_t *) (pu1_out + ((i4_out_stride << 1) + i4_out_stride)), + vreinterpret_u32_u8(pred23_un), 1); +} + +void isvc_iquant_itrans_recon_chroma_4x4_neon( + buffer_container_t *ps_src, buffer_container_t *ps_pred, buffer_container_t *ps_res_pred, + buffer_container_t *ps_res, buffer_container_t *ps_rec, + iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants, WORD16 *pi2_tmp, WORD16 *pi2_dc_src, + WORD32 i4_iq_start_idx, UWORD8 u1_res_accumulate) +{ + WORD16 *pi2_src = (WORD16 *) ps_src->pv_data; + UWORD8 *pu1_pred = (UWORD8 *) ps_pred->pv_data; + UWORD8 *pu1_out = (UWORD8 *) ps_rec->pv_data; + WORD32 i4_pred_stride = ps_pred->i4_data_stride; + WORD32 i4_out_stride = ps_rec->i4_data_stride; + const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat; + const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat; + UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6; + + WORD16 i2_rnd_factor = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0; + + int16x4x4_t src_16x4x2; + int16x4x4_t iscal_16x4x2; + int16x4x4_t weigh_16x4x2; + + int16x4_t q0_16x4, q1_16x4, q2_16x4, q3_16x4; + int32x4_t q0_32x4, q1_32x4, q2_32x4, q3_32x4; + int16x4_t rq1_16x4, rq3_16x4; + int16x4_t x0_16x4, x1_16x4, x2_16x4, x3_16x4; + int16x8_t x0_16x8, x1_16x8, x2_16x8, x3_16x8; + int16x4_t xx0_16x4, xx1_16x4, xx2_16x4, xx3_16x4; + int16x4x2_t xx0_16x4x2, xx1_16x4x2; + int32x2x2_t x0_32x2x2, x1_32x2x2; + int16x4_t weigh0_16x4, weigh1_16x4, weigh2_16x4, weigh3_16x4; + + uint8x8_t pred0_in, pred1_in, pred2_in, pred3_in; + int16x8_t pred0, pred1, pred2, pred3; + int16x8_t rec0, rec1, rec2, rec3; + uint8x8_t rec0_un, rec1_un, rec2_un, rec3_un; + uint8x8_t out0, out1, out2, out3; + + uint8x8_t chroma_mask_8x8 = vreinterpret_u8_u16(vdup_n_u16(0x00ff)); + + int16x4_t pos_255_16x4 = vdup_n_s16(((WORD16) UINT8_MAX)); + int16x4_t neg_255_16x4 = vdup_n_s16(-((WORD16) UINT8_MAX)); + int32x4_t qp_div_6_32x4 = vdupq_n_s32(u4_qp_div_6); + int32x4_t rnd_fact = vdupq_n_s32(i2_rnd_factor); + + UNUSED(i4_iq_start_idx); + UNUSED(ps_res); + UNUSED(ps_res_pred); + UNUSED(u1_res_accumulate); + + src_16x4x2 = vld4_s16(pi2_src); + iscal_16x4x2 = vld4_s16((const int16_t *) pu2_iscal_mat); + weigh_16x4x2 = vld4_s16((const int16_t *) pu2_weigh_mat); + + weigh0_16x4 = vmul_s16(weigh_16x4x2.val[0], iscal_16x4x2.val[0]); + weigh1_16x4 = vmul_s16(weigh_16x4x2.val[1], iscal_16x4x2.val[1]); + weigh2_16x4 = vmul_s16(weigh_16x4x2.val[2], iscal_16x4x2.val[2]); + weigh3_16x4 = vmul_s16(weigh_16x4x2.val[3], iscal_16x4x2.val[3]); + + q0_32x4 = vmull_s16(weigh0_16x4, src_16x4x2.val[0]); + q1_32x4 = vmull_s16(weigh1_16x4, src_16x4x2.val[1]); + q2_32x4 = vmull_s16(weigh2_16x4, src_16x4x2.val[2]); + q3_32x4 = vmull_s16(weigh3_16x4, src_16x4x2.val[3]); + + q0_32x4 = vaddq_s32(q0_32x4, rnd_fact); + q1_32x4 = vaddq_s32(q1_32x4, rnd_fact); + q2_32x4 = vaddq_s32(q2_32x4, rnd_fact); + q3_32x4 = vaddq_s32(q3_32x4, rnd_fact); + + q0_32x4 = vshlq_s32(q0_32x4, qp_div_6_32x4); + q1_32x4 = vshlq_s32(q1_32x4, qp_div_6_32x4); + q2_32x4 = vshlq_s32(q2_32x4, qp_div_6_32x4); + q3_32x4 = vshlq_s32(q3_32x4, qp_div_6_32x4); + + q0_16x4 = vqshrn_n_s32(q0_32x4, 4); + q1_16x4 = vqshrn_n_s32(q1_32x4, 4); + q2_16x4 = vqshrn_n_s32(q2_32x4, 4); + q3_16x4 = vqshrn_n_s32(q3_32x4, 4); + + q0_16x4 = vset_lane_s16(pi2_dc_src[0], q0_16x4, 0); + + rq1_16x4 = vshr_n_s16(q1_16x4, 1); + rq3_16x4 = vshr_n_s16(q3_16x4, 1); + + x0_16x4 = vadd_s16(q0_16x4, q2_16x4); + x1_16x4 = vsub_s16(q0_16x4, q2_16x4); + x2_16x4 = vsub_s16(rq1_16x4, q3_16x4); + x3_16x4 = vadd_s16(q1_16x4, rq3_16x4); + + xx0_16x4 = vadd_s16(x0_16x4, x3_16x4); + xx1_16x4 = vadd_s16(x1_16x4, x2_16x4); + xx2_16x4 = vsub_s16(x1_16x4, x2_16x4); + xx3_16x4 = vsub_s16(x0_16x4, x3_16x4); + + /* row 0 to row 3 */ + xx0_16x4x2 = vtrn_s16(xx0_16x4, xx1_16x4); + xx1_16x4x2 = vtrn_s16(xx2_16x4, xx3_16x4); + x0_32x2x2 = + vzip_s32(vreinterpret_s32_s16(xx0_16x4x2.val[0]), vreinterpret_s32_s16(xx1_16x4x2.val[0])); + x1_32x2x2 = + vzip_s32(vreinterpret_s32_s16(xx0_16x4x2.val[1]), vreinterpret_s32_s16(xx1_16x4x2.val[1])); + + x0_16x4 = vreinterpret_s16_s32(x0_32x2x2.val[0]); + x1_16x4 = vreinterpret_s16_s32(x1_32x2x2.val[0]); + x2_16x4 = vreinterpret_s16_s32(x0_32x2x2.val[1]); + x3_16x4 = vreinterpret_s16_s32(x1_32x2x2.val[1]); + + /* Store Horz transform output into temp */ + vst1_s16(pi2_tmp, x0_16x4); + vst1_s16(pi2_tmp + 4, x1_16x4); + vst1_s16(pi2_tmp + 8, x2_16x4); + vst1_s16(pi2_tmp + 12, x3_16x4); + + /* vertical inverse transform */ + rq1_16x4 = vshr_n_s16(x1_16x4, 1); + rq3_16x4 = vshr_n_s16(x3_16x4, 1); + + xx0_16x4 = vadd_s16(x0_16x4, x2_16x4); + xx1_16x4 = vsub_s16(x0_16x4, x2_16x4); + xx2_16x4 = vsub_s16(rq1_16x4, x3_16x4); + xx3_16x4 = vadd_s16(x1_16x4, rq3_16x4); + + x0_16x4 = vadd_s16(xx0_16x4, xx3_16x4); + x1_16x4 = vadd_s16(xx1_16x4, xx2_16x4); + x2_16x4 = vsub_s16(xx1_16x4, xx2_16x4); + x3_16x4 = vsub_s16(xx0_16x4, xx3_16x4); + + x0_16x4 = vrshr_n_s16(x0_16x4, 6); + x1_16x4 = vrshr_n_s16(x1_16x4, 6); + x2_16x4 = vrshr_n_s16(x2_16x4, 6); + x3_16x4 = vrshr_n_s16(x3_16x4, 6); + + /* Saturate all values < -255 to -255 and retain the rest as it is */ + x0_16x4 = vmax_s16(x0_16x4, neg_255_16x4); + x1_16x4 = vmax_s16(x1_16x4, neg_255_16x4); + x2_16x4 = vmax_s16(x2_16x4, neg_255_16x4); + x3_16x4 = vmax_s16(x3_16x4, neg_255_16x4); + + /* Saturate all values > 255 to 255 and retain the rest as it is */ + x0_16x4 = vmin_s16(x0_16x4, pos_255_16x4); + x1_16x4 = vmin_s16(x1_16x4, pos_255_16x4); + x2_16x4 = vmin_s16(x2_16x4, pos_255_16x4); + x3_16x4 = vmin_s16(x3_16x4, pos_255_16x4); + + x0_16x8 = vreinterpretq_s16_s32(vmovl_s16(x0_16x4)); + x1_16x8 = vreinterpretq_s16_s32(vmovl_s16(x1_16x4)); + x2_16x8 = vreinterpretq_s16_s32(vmovl_s16(x2_16x4)); + x3_16x8 = vreinterpretq_s16_s32(vmovl_s16(x3_16x4)); + + pred0_in = vld1_u8((uint8_t *) pu1_pred); + pred1_in = vld1_u8((uint8_t *) pu1_pred + (i4_pred_stride)); + pred2_in = vld1_u8((uint8_t *) pu1_pred + (i4_pred_stride << 1)); + pred3_in = vld1_u8((uint8_t *) pu1_pred + (i4_pred_stride * 3)); + + pred0 = vreinterpretq_s16_u16(vmovl_u8(pred0_in)); + pred1 = vreinterpretq_s16_u16(vmovl_u8(pred1_in)); + pred2 = vreinterpretq_s16_u16(vmovl_u8(pred2_in)); + pred3 = vreinterpretq_s16_u16(vmovl_u8(pred3_in)); + + /* Out pixel = pred + res */ + rec0 = vaddq_s16(pred0, x0_16x8); + rec1 = vaddq_s16(pred1, x1_16x8); + rec2 = vaddq_s16(pred2, x2_16x8); + rec3 = vaddq_s16(pred3, x3_16x8); + + out0 = vld1_u8(pu1_out); + out1 = vld1_u8(pu1_out + i4_out_stride); + out2 = vld1_u8(pu1_out + i4_out_stride * 2); + out3 = vld1_u8(pu1_out + i4_out_stride * 3); + + /* Convert to 8 bit unsigned with saturation */ + rec0_un = vqmovun_s16(rec0); + rec1_un = vqmovun_s16(rec1); + rec2_un = vqmovun_s16(rec2); + rec3_un = vqmovun_s16(rec3); + + /* Store in alternate postions */ + out0 = vbsl_u8(chroma_mask_8x8, rec0_un, out0); + out1 = vbsl_u8(chroma_mask_8x8, rec1_un, out1); + out2 = vbsl_u8(chroma_mask_8x8, rec2_un, out2); + out3 = vbsl_u8(chroma_mask_8x8, rec3_un, out3); + + vst1_u8((pu1_out), out0); + vst1_u8((pu1_out + i4_out_stride), out1); + vst1_u8((pu1_out + (i4_out_stride << 1)), out2); + vst1_u8((pu1_out + ((i4_out_stride << 1) + i4_out_stride)), out3); +} + +void isvc_iquant_itrans_recon_chroma_4x4_with_res_output_neon( + buffer_container_t *ps_src, buffer_container_t *ps_pred, buffer_container_t *ps_res_pred, + buffer_container_t *ps_res, buffer_container_t *ps_rec, + iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants, WORD16 *pi2_tmp, WORD16 *pi2_dc_src, + WORD32 i4_iq_start_idx, UWORD8 u1_res_accumulate) +{ + WORD16 *pi2_src = (WORD16 *) ps_src->pv_data; + WORD16 *pi2_res = (WORD16 *) ps_res->pv_data; + UWORD8 *pu1_pred = (UWORD8 *) ps_pred->pv_data; + UWORD8 *pu1_out = (UWORD8 *) ps_rec->pv_data; + WORD32 i4_res_stride = ps_res->i4_data_stride; + WORD32 i4_pred_stride = ps_pred->i4_data_stride; + WORD32 i4_out_stride = ps_rec->i4_data_stride; + const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat; + const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat; + UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6; + + WORD16 i2_rnd_factor = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0; + + int16x4x4_t src_16x4x2; + int16x4x4_t iscal_16x4x2; + int16x4x4_t weigh_16x4x2; + + int16x4_t q0_16x4, q1_16x4, q2_16x4, q3_16x4; + int32x4_t q0_32x4, q1_32x4, q2_32x4, q3_32x4; + int16x4_t rq1_16x4, rq3_16x4; + int16x4_t x0_16x4, x1_16x4, x2_16x4, x3_16x4; + int16x8_t x0_16x8, x1_16x8, x2_16x8, x3_16x8; + int16x4_t xx0_16x4, xx1_16x4, xx2_16x4, xx3_16x4; + int16x4x2_t xx0_16x4x2, xx1_16x4x2; + int32x2x2_t x0_32x2x2, x1_32x2x2; + int16x4_t weigh0_16x4, weigh1_16x4, weigh2_16x4, weigh3_16x4; + + uint8x8_t pred0_in, pred1_in, pred2_in, pred3_in; + int16x8_t pred0, pred1, pred2, pred3; + int16x8_t rec0, rec1, rec2, rec3; + uint8x8_t rec0_un, rec1_un, rec2_un, rec3_un; + uint8x8_t out0, out1, out2, out3; + int16x8_t resout0, resout1, resout2, resout3; + + uint8x8_t chroma_mask_8x8 = vreinterpret_u8_u16(vdup_n_u16(0x00ff)); + uint16x8_t chroma_mask_16x8 = {0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000}; + int32x4_t qp_div_6_32x4 = vdupq_n_s32(u4_qp_div_6); + int32x4_t rnd_fact = vdupq_n_s32(i2_rnd_factor); + int16x4_t pos_255_16x4 = vdup_n_s16(((WORD16) UINT8_MAX)); + int16x4_t neg_255_16x4 = vdup_n_s16(-((WORD16) UINT8_MAX)); + + UNUSED(i4_iq_start_idx); + UNUSED(ps_res_pred); + UNUSED(u1_res_accumulate); + + src_16x4x2 = vld4_s16(pi2_src); + iscal_16x4x2 = vld4_s16((const int16_t *) pu2_iscal_mat); + weigh_16x4x2 = vld4_s16((const int16_t *) pu2_weigh_mat); + + weigh0_16x4 = vmul_s16(weigh_16x4x2.val[0], iscal_16x4x2.val[0]); + weigh1_16x4 = vmul_s16(weigh_16x4x2.val[1], iscal_16x4x2.val[1]); + weigh2_16x4 = vmul_s16(weigh_16x4x2.val[2], iscal_16x4x2.val[2]); + weigh3_16x4 = vmul_s16(weigh_16x4x2.val[3], iscal_16x4x2.val[3]); + + q0_32x4 = vmull_s16(weigh0_16x4, src_16x4x2.val[0]); + q1_32x4 = vmull_s16(weigh1_16x4, src_16x4x2.val[1]); + q2_32x4 = vmull_s16(weigh2_16x4, src_16x4x2.val[2]); + q3_32x4 = vmull_s16(weigh3_16x4, src_16x4x2.val[3]); + + q0_32x4 = vaddq_s32(q0_32x4, rnd_fact); + q1_32x4 = vaddq_s32(q1_32x4, rnd_fact); + q2_32x4 = vaddq_s32(q2_32x4, rnd_fact); + q3_32x4 = vaddq_s32(q3_32x4, rnd_fact); + + q0_32x4 = vshlq_s32(q0_32x4, qp_div_6_32x4); + q1_32x4 = vshlq_s32(q1_32x4, qp_div_6_32x4); + q2_32x4 = vshlq_s32(q2_32x4, qp_div_6_32x4); + q3_32x4 = vshlq_s32(q3_32x4, qp_div_6_32x4); + + q0_16x4 = vqshrn_n_s32(q0_32x4, 4); + q1_16x4 = vqshrn_n_s32(q1_32x4, 4); + q2_16x4 = vqshrn_n_s32(q2_32x4, 4); + q3_16x4 = vqshrn_n_s32(q3_32x4, 4); + + q0_16x4 = vset_lane_s16(pi2_dc_src[0], q0_16x4, 0); + + rq1_16x4 = vshr_n_s16(q1_16x4, 1); + rq3_16x4 = vshr_n_s16(q3_16x4, 1); + + x0_16x4 = vadd_s16(q0_16x4, q2_16x4); + x1_16x4 = vsub_s16(q0_16x4, q2_16x4); + x2_16x4 = vsub_s16(rq1_16x4, q3_16x4); + x3_16x4 = vadd_s16(q1_16x4, rq3_16x4); + + xx0_16x4 = vadd_s16(x0_16x4, x3_16x4); + xx1_16x4 = vadd_s16(x1_16x4, x2_16x4); + xx2_16x4 = vsub_s16(x1_16x4, x2_16x4); + xx3_16x4 = vsub_s16(x0_16x4, x3_16x4); + + /* row 0 to row 3 */ + xx0_16x4x2 = vtrn_s16(xx0_16x4, xx1_16x4); + xx1_16x4x2 = vtrn_s16(xx2_16x4, xx3_16x4); + x0_32x2x2 = + vzip_s32(vreinterpret_s32_s16(xx0_16x4x2.val[0]), vreinterpret_s32_s16(xx1_16x4x2.val[0])); + x1_32x2x2 = + vzip_s32(vreinterpret_s32_s16(xx0_16x4x2.val[1]), vreinterpret_s32_s16(xx1_16x4x2.val[1])); + + x0_16x4 = vreinterpret_s16_s32(x0_32x2x2.val[0]); + x1_16x4 = vreinterpret_s16_s32(x1_32x2x2.val[0]); + x2_16x4 = vreinterpret_s16_s32(x0_32x2x2.val[1]); + x3_16x4 = vreinterpret_s16_s32(x1_32x2x2.val[1]); + + /* Store Horz transform output into temp */ + vst1_s16(pi2_tmp, x0_16x4); + vst1_s16(pi2_tmp + 4, x1_16x4); + vst1_s16(pi2_tmp + 8, x2_16x4); + vst1_s16(pi2_tmp + 12, x3_16x4); + + /* vertical inverse transform */ + rq1_16x4 = vshr_n_s16(x1_16x4, 1); + rq3_16x4 = vshr_n_s16(x3_16x4, 1); + + xx0_16x4 = vadd_s16(x0_16x4, x2_16x4); + xx1_16x4 = vsub_s16(x0_16x4, x2_16x4); + xx2_16x4 = vsub_s16(rq1_16x4, x3_16x4); + xx3_16x4 = vadd_s16(x1_16x4, rq3_16x4); + + x0_16x4 = vadd_s16(xx0_16x4, xx3_16x4); + x1_16x4 = vadd_s16(xx1_16x4, xx2_16x4); + x2_16x4 = vsub_s16(xx1_16x4, xx2_16x4); + x3_16x4 = vsub_s16(xx0_16x4, xx3_16x4); + + x0_16x4 = vrshr_n_s16(x0_16x4, 6); + x1_16x4 = vrshr_n_s16(x1_16x4, 6); + x2_16x4 = vrshr_n_s16(x2_16x4, 6); + x3_16x4 = vrshr_n_s16(x3_16x4, 6); + + /* Saturate all values < -255 to -255 and retain the rest as it is */ + x0_16x4 = vmax_s16(x0_16x4, neg_255_16x4); + x1_16x4 = vmax_s16(x1_16x4, neg_255_16x4); + x2_16x4 = vmax_s16(x2_16x4, neg_255_16x4); + x3_16x4 = vmax_s16(x3_16x4, neg_255_16x4); + + /* Saturate all values > 255 to 255 and retain the rest as it is */ + x0_16x4 = vmin_s16(x0_16x4, pos_255_16x4); + x1_16x4 = vmin_s16(x1_16x4, pos_255_16x4); + x2_16x4 = vmin_s16(x2_16x4, pos_255_16x4); + x3_16x4 = vmin_s16(x3_16x4, pos_255_16x4); + + resout0 = vld1q_s16(pi2_res); + resout1 = vld1q_s16(pi2_res + i4_res_stride); + resout2 = vld1q_s16(pi2_res + i4_res_stride * 2); + resout3 = vld1q_s16(pi2_res + i4_res_stride * 3); + + x0_16x8 = vreinterpretq_s16_s32(vmovl_s16(x0_16x4)); + x1_16x8 = vreinterpretq_s16_s32(vmovl_s16(x1_16x4)); + x2_16x8 = vreinterpretq_s16_s32(vmovl_s16(x2_16x4)); + x3_16x8 = vreinterpretq_s16_s32(vmovl_s16(x3_16x4)); + + /* Storing res in alternate positions */ + resout0 = vbslq_s16(chroma_mask_16x8, x0_16x8, resout0); + resout1 = vbslq_s16(chroma_mask_16x8, x1_16x8, resout1); + resout2 = vbslq_s16(chroma_mask_16x8, x2_16x8, resout2); + resout3 = vbslq_s16(chroma_mask_16x8, x3_16x8, resout3); + + vst1q_s16(pi2_res, resout0); + vst1q_s16(pi2_res + i4_res_stride, resout1); + vst1q_s16(pi2_res + (i4_res_stride << 1), resout2); + vst1q_s16(pi2_res + (i4_res_stride << 1) + i4_res_stride, resout3); + + pred0_in = vld1_u8((uint8_t *) pu1_pred); + pred1_in = vld1_u8((uint8_t *) pu1_pred + (i4_pred_stride)); + pred2_in = vld1_u8((uint8_t *) pu1_pred + (i4_pred_stride << 1)); + pred3_in = vld1_u8((uint8_t *) pu1_pred + (i4_pred_stride * 3)); + + pred0 = vreinterpretq_s16_u16(vmovl_u8(pred0_in)); + pred1 = vreinterpretq_s16_u16(vmovl_u8(pred1_in)); + pred2 = vreinterpretq_s16_u16(vmovl_u8(pred2_in)); + pred3 = vreinterpretq_s16_u16(vmovl_u8(pred3_in)); + + /* Out pixel = pred + res */ + rec0 = vaddq_s16(pred0, x0_16x8); + rec1 = vaddq_s16(pred1, x1_16x8); + rec2 = vaddq_s16(pred2, x2_16x8); + rec3 = vaddq_s16(pred3, x3_16x8); + + out0 = vld1_u8(pu1_out); + out1 = vld1_u8(pu1_out + i4_out_stride); + out2 = vld1_u8(pu1_out + i4_out_stride * 2); + out3 = vld1_u8(pu1_out + i4_out_stride * 3); + + /* Convert to 8 bit unsigned with saturation */ + rec0_un = vqmovun_s16(rec0); + rec1_un = vqmovun_s16(rec1); + rec2_un = vqmovun_s16(rec2); + rec3_un = vqmovun_s16(rec3); + + /* Store output pixels in alternate positions */ + out0 = vbsl_u8(chroma_mask_8x8, rec0_un, out0); + out1 = vbsl_u8(chroma_mask_8x8, rec1_un, out1); + out2 = vbsl_u8(chroma_mask_8x8, rec2_un, out2); + out3 = vbsl_u8(chroma_mask_8x8, rec3_un, out3); + + vst1_u8((pu1_out), out0); + vst1_u8((pu1_out + i4_out_stride), out1); + vst1_u8((pu1_out + (i4_out_stride << 1)), out2); + vst1_u8((pu1_out + ((i4_out_stride << 1) + i4_out_stride)), out3); +} + +void isvc_iquant_itrans_recon_chroma_4x4_with_res_accumulate_neon( + buffer_container_t *ps_src, buffer_container_t *ps_pred, buffer_container_t *ps_res_pred, + buffer_container_t *ps_res, buffer_container_t *ps_rec, + iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants, WORD16 *pi2_tmp, WORD16 *pi2_dc_src, + WORD32 i4_iq_start_idx, UWORD8 u1_res_accumulate) +{ + WORD16 *pi2_src = (WORD16 *) ps_src->pv_data; + WORD16 *pi2_res = (WORD16 *) ps_res->pv_data; + WORD16 *pi2_res_pred = (WORD16 *) ps_res_pred->pv_data; + UWORD8 *pu1_pred = (UWORD8 *) ps_pred->pv_data; + UWORD8 *pu1_out = (UWORD8 *) ps_rec->pv_data; + WORD32 i4_res_stride = ps_res->i4_data_stride; + WORD32 i4_res_pred_stride = ps_res_pred->i4_data_stride; + WORD32 i4_pred_stride = ps_pred->i4_data_stride; + WORD32 i4_out_stride = ps_rec->i4_data_stride; + const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat; + const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat; + UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6; + + WORD16 i2_rnd_factor = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0; + + int16x4x4_t src_16x4x2; + int16x4x4_t iscal_16x4x2; + int16x4x4_t weigh_16x4x2; + + int16x4_t q0_16x4, q1_16x4, q2_16x4, q3_16x4; + int32x4_t q0_32x4, q1_32x4, q2_32x4, q3_32x4; + int16x4_t rq1_16x4, rq3_16x4; + int16x4_t x0_16x4, x1_16x4, x2_16x4, x3_16x4; + int16x8_t x0_16x8, x1_16x8, x2_16x8, x3_16x8; + int16x4_t xx0_16x4, xx1_16x4, xx2_16x4, xx3_16x4; + int16x4x2_t xx0_16x4x2, xx1_16x4x2; + int32x2x2_t x0_32x2x2, x1_32x2x2; + int16x4_t weigh0_16x4, weigh1_16x4, weigh2_16x4, weigh3_16x4; + + uint8x8_t pred0_in, pred1_in, pred2_in, pred3_in; + int16x8_t pred0, pred1, pred2, pred3; + int16x8_t rec0, rec1, rec2, rec3; + uint8x8_t rec0_un, rec1_un, rec2_un, rec3_un; + int16x8_t resd0_in, resd1_in, resd2_in, resd3_in; + int16x8_t resd1_in_mask, resd2_in_mask, resd3_in_mask; + uint8x8_t out0, out1, out2, out3; + int16x8_t resout0, resout1, resout2, resout3; + int16x8_t pos_255 = vdupq_n_s16(((WORD16) UINT8_MAX)); + int16x8_t neg_255 = vdupq_n_s16(-((WORD16) UINT8_MAX)); + + uint8x8_t chroma_mask_8x8 = vreinterpret_u8_u16(vdup_n_u16(0x00ff)); + uint16x8_t chroma_mask_16x8 = {0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000}; + + int32x4_t qp_div_6_32x4 = vdupq_n_s32(u4_qp_div_6); + int32x4_t rnd_fact = vdupq_n_s32(i2_rnd_factor); + + int16x8_t resd0_in_mask = {0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000}; + + UNUSED(i4_iq_start_idx); + UNUSED(u1_res_accumulate); + + resd1_in_mask = resd0_in_mask; + resd2_in_mask = resd0_in_mask; + resd3_in_mask = resd0_in_mask; + + src_16x4x2 = vld4_s16(pi2_src); + iscal_16x4x2 = vld4_s16((const int16_t *) pu2_iscal_mat); + weigh_16x4x2 = vld4_s16((const int16_t *) pu2_weigh_mat); + + weigh0_16x4 = vmul_s16(weigh_16x4x2.val[0], iscal_16x4x2.val[0]); + weigh1_16x4 = vmul_s16(weigh_16x4x2.val[1], iscal_16x4x2.val[1]); + weigh2_16x4 = vmul_s16(weigh_16x4x2.val[2], iscal_16x4x2.val[2]); + weigh3_16x4 = vmul_s16(weigh_16x4x2.val[3], iscal_16x4x2.val[3]); + + q0_32x4 = vmull_s16(weigh0_16x4, src_16x4x2.val[0]); + q1_32x4 = vmull_s16(weigh1_16x4, src_16x4x2.val[1]); + q2_32x4 = vmull_s16(weigh2_16x4, src_16x4x2.val[2]); + q3_32x4 = vmull_s16(weigh3_16x4, src_16x4x2.val[3]); + + q0_32x4 = vaddq_s32(q0_32x4, rnd_fact); + q1_32x4 = vaddq_s32(q1_32x4, rnd_fact); + q2_32x4 = vaddq_s32(q2_32x4, rnd_fact); + q3_32x4 = vaddq_s32(q3_32x4, rnd_fact); + + q0_32x4 = vshlq_s32(q0_32x4, qp_div_6_32x4); + q1_32x4 = vshlq_s32(q1_32x4, qp_div_6_32x4); + q2_32x4 = vshlq_s32(q2_32x4, qp_div_6_32x4); + q3_32x4 = vshlq_s32(q3_32x4, qp_div_6_32x4); + + q0_16x4 = vqshrn_n_s32(q0_32x4, 4); + q1_16x4 = vqshrn_n_s32(q1_32x4, 4); + q2_16x4 = vqshrn_n_s32(q2_32x4, 4); + q3_16x4 = vqshrn_n_s32(q3_32x4, 4); + + q0_16x4 = vset_lane_s16(pi2_dc_src[0], q0_16x4, 0); + + rq1_16x4 = vshr_n_s16(q1_16x4, 1); + rq3_16x4 = vshr_n_s16(q3_16x4, 1); + + x0_16x4 = vadd_s16(q0_16x4, q2_16x4); + x1_16x4 = vsub_s16(q0_16x4, q2_16x4); + x2_16x4 = vsub_s16(rq1_16x4, q3_16x4); + x3_16x4 = vadd_s16(q1_16x4, rq3_16x4); + + xx0_16x4 = vadd_s16(x0_16x4, x3_16x4); + xx1_16x4 = vadd_s16(x1_16x4, x2_16x4); + xx2_16x4 = vsub_s16(x1_16x4, x2_16x4); + xx3_16x4 = vsub_s16(x0_16x4, x3_16x4); + + /* row 0 to row 3 */ + xx0_16x4x2 = vtrn_s16(xx0_16x4, xx1_16x4); + xx1_16x4x2 = vtrn_s16(xx2_16x4, xx3_16x4); + x0_32x2x2 = + vzip_s32(vreinterpret_s32_s16(xx0_16x4x2.val[0]), vreinterpret_s32_s16(xx1_16x4x2.val[0])); + x1_32x2x2 = + vzip_s32(vreinterpret_s32_s16(xx0_16x4x2.val[1]), vreinterpret_s32_s16(xx1_16x4x2.val[1])); + + x0_16x4 = vreinterpret_s16_s32(x0_32x2x2.val[0]); + x1_16x4 = vreinterpret_s16_s32(x1_32x2x2.val[0]); + x2_16x4 = vreinterpret_s16_s32(x0_32x2x2.val[1]); + x3_16x4 = vreinterpret_s16_s32(x1_32x2x2.val[1]); + + /* Store Horz transform output into temp */ + vst1_s16(pi2_tmp, x0_16x4); + vst1_s16(pi2_tmp + 4, x1_16x4); + vst1_s16(pi2_tmp + 8, x2_16x4); + vst1_s16(pi2_tmp + 12, x3_16x4); + + /* vertical inverse transform */ + rq1_16x4 = vshr_n_s16(x1_16x4, 1); + rq3_16x4 = vshr_n_s16(x3_16x4, 1); + + xx0_16x4 = vadd_s16(x0_16x4, x2_16x4); + xx1_16x4 = vsub_s16(x0_16x4, x2_16x4); + xx2_16x4 = vsub_s16(rq1_16x4, x3_16x4); + xx3_16x4 = vadd_s16(x1_16x4, rq3_16x4); + + x0_16x4 = vadd_s16(xx0_16x4, xx3_16x4); + x1_16x4 = vadd_s16(xx1_16x4, xx2_16x4); + x2_16x4 = vsub_s16(xx1_16x4, xx2_16x4); + x3_16x4 = vsub_s16(xx0_16x4, xx3_16x4); + + x0_16x4 = vrshr_n_s16(x0_16x4, 6); + x1_16x4 = vrshr_n_s16(x1_16x4, 6); + x2_16x4 = vrshr_n_s16(x2_16x4, 6); + x3_16x4 = vrshr_n_s16(x3_16x4, 6); + + resd0_in = vld1q_s16((int16_t *) pi2_res_pred); + resd1_in = vld1q_s16((int16_t *) pi2_res_pred + i4_res_pred_stride); + resd2_in = vld1q_s16((int16_t *) pi2_res_pred + (i4_res_pred_stride * 2)); + resd3_in = vld1q_s16((int16_t *) pi2_res_pred + (i4_res_pred_stride * 3)); + + /* Mask alternate values */ + resd0_in_mask = vbslq_s16(chroma_mask_16x8, resd0_in, resd0_in_mask); + resd1_in_mask = vbslq_s16(chroma_mask_16x8, resd1_in, resd1_in_mask); + resd2_in_mask = vbslq_s16(chroma_mask_16x8, resd2_in, resd2_in_mask); + resd3_in_mask = vbslq_s16(chroma_mask_16x8, resd3_in, resd3_in_mask); + + x0_16x8 = vreinterpretq_s16_s32(vmovl_s16(x0_16x4)); + x1_16x8 = vreinterpretq_s16_s32(vmovl_s16(x1_16x4)); + x2_16x8 = vreinterpretq_s16_s32(vmovl_s16(x2_16x4)); + x3_16x8 = vreinterpretq_s16_s32(vmovl_s16(x3_16x4)); + + resd0_in = vaddq_s16(resd0_in_mask, x0_16x8); + resd1_in = vaddq_s16(resd1_in_mask, x1_16x8); + resd2_in = vaddq_s16(resd2_in_mask, x2_16x8); + resd3_in = vaddq_s16(resd3_in_mask, x3_16x8); + + /* Saturate all values < -255 to -255 and retain the rest as it is */ + resd0_in = vmaxq_s16(resd0_in, neg_255); + resd1_in = vmaxq_s16(resd1_in, neg_255); + resd2_in = vmaxq_s16(resd2_in, neg_255); + resd3_in = vmaxq_s16(resd3_in, neg_255); + + /* Saturate all values > 255 to 255 and retain the rest as it is */ + resd0_in = vminq_s16(resd0_in, pos_255); + resd1_in = vminq_s16(resd1_in, pos_255); + resd2_in = vminq_s16(resd2_in, pos_255); + resd3_in = vminq_s16(resd3_in, pos_255); + + resout0 = vld1q_s16(pi2_res); + resout1 = vld1q_s16(pi2_res + i4_res_stride); + resout2 = vld1q_s16(pi2_res + i4_res_stride * 2); + resout3 = vld1q_s16(pi2_res + i4_res_stride * 3); + + /* Store res in aternate positions */ + resout0 = vbslq_s16(chroma_mask_16x8, resd0_in, resout0); + resout1 = vbslq_s16(chroma_mask_16x8, resd1_in, resout1); + resout2 = vbslq_s16(chroma_mask_16x8, resd2_in, resout2); + resout3 = vbslq_s16(chroma_mask_16x8, resd3_in, resout3); + + vst1q_s16(pi2_res, resout0); + vst1q_s16(pi2_res + i4_res_stride, resout1); + vst1q_s16(pi2_res + (i4_res_stride << 1), resout2); + vst1q_s16(pi2_res + (i4_res_stride << 1) + i4_res_stride, resout3); + + pred0_in = vld1_u8((uint8_t *) pu1_pred); + pred1_in = vld1_u8((uint8_t *) pu1_pred + (i4_pred_stride)); + pred2_in = vld1_u8((uint8_t *) pu1_pred + (i4_pred_stride << 1)); + pred3_in = vld1_u8((uint8_t *) pu1_pred + (i4_pred_stride * 3)); + + pred0 = vreinterpretq_s16_u16(vmovl_u8(pred0_in)); + pred1 = vreinterpretq_s16_u16(vmovl_u8(pred1_in)); + pred2 = vreinterpretq_s16_u16(vmovl_u8(pred2_in)); + pred3 = vreinterpretq_s16_u16(vmovl_u8(pred3_in)); + + /* Out pixel = pred + res */ + rec0 = vaddq_s16(pred0, resout0); + rec1 = vaddq_s16(pred1, resout1); + rec2 = vaddq_s16(pred2, resout2); + rec3 = vaddq_s16(pred3, resout3); + + out0 = vld1_u8(pu1_out); + out1 = vld1_u8(pu1_out + i4_out_stride); + out2 = vld1_u8(pu1_out + i4_out_stride * 2); + out3 = vld1_u8(pu1_out + i4_out_stride * 3); + + /* Convert to 8 bit unsigned with saturation */ + rec0_un = vqmovun_s16(rec0); + rec1_un = vqmovun_s16(rec1); + rec2_un = vqmovun_s16(rec2); + rec3_un = vqmovun_s16(rec3); + + /* Store output pixels in alternate positions */ + out0 = vbsl_u8(chroma_mask_8x8, rec0_un, out0); + out1 = vbsl_u8(chroma_mask_8x8, rec1_un, out1); + out2 = vbsl_u8(chroma_mask_8x8, rec2_un, out2); + out3 = vbsl_u8(chroma_mask_8x8, rec3_un, out3); + + vst1_u8((pu1_out), out0); + vst1_u8((pu1_out + i4_out_stride), out1); + vst1_u8((pu1_out + (i4_out_stride << 1)), out2); + vst1_u8((pu1_out + ((i4_out_stride << 1) + i4_out_stride)), out3); +} + +void isvc_iquant_itrans_recon_4x4_dc_neon(buffer_container_t *ps_src, buffer_container_t *ps_pred, + buffer_container_t *ps_res_pred, + buffer_container_t *ps_res, buffer_container_t *ps_rec, + iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants, + WORD16 *pi2_tmp, WORD16 *pi2_dc_src, + WORD32 i4_iq_start_idx, UWORD8 u1_res_accumulate) +{ + WORD16 *pi2_src = (WORD16 *) ps_src->pv_data; + UWORD8 *pu1_pred = (UWORD8 *) ps_pred->pv_data; + UWORD8 *pu1_out = (UWORD8 *) ps_rec->pv_data; + WORD32 i4_pred_stride = ps_pred->i4_data_stride; + WORD32 i4_out_stride = ps_rec->i4_data_stride; + const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat; + const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat; + UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6; + WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0; + + WORD32 i4_iq_out_temp; + int16x8_t temp_0; + uint8x8_t pred0_in, pred1_in, pred2_in, pred3_in; + int16x8_t pred0, pred1, pred2, pred3; + + UNUSED(pi2_tmp); + UNUSED(ps_res); + UNUSED(ps_res_pred); + UNUSED(u1_res_accumulate); + + if(i4_iq_start_idx == 0) + { + i4_iq_out_temp = pi2_src[0]; + INV_QUANT(i4_iq_out_temp, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4); + } + else + { + i4_iq_out_temp = pi2_dc_src[0]; + } + + temp_0 = vdupq_n_s16((i4_iq_out_temp + 32) >> 6); + + pred0_in = vld1_u8(pu1_pred); + pu1_pred = pu1_pred + i4_pred_stride; + pred1_in = vld1_u8(pu1_pred); + pu1_pred = pu1_pred + i4_pred_stride; + pred2_in = vld1_u8(pu1_pred); + pu1_pred = pu1_pred + i4_pred_stride; + pred3_in = vld1_u8(pu1_pred); + + pred0 = vreinterpretq_s16_u16(vmovl_u8(pred0_in)); + pred1 = vreinterpretq_s16_u16(vmovl_u8(pred1_in)); + pred2 = vreinterpretq_s16_u16(vmovl_u8(pred2_in)); + pred3 = vreinterpretq_s16_u16(vmovl_u8(pred3_in)); + + /* Out pixel = Res + pred */ + pred0 = vaddq_s16(pred0, temp_0); + pred1 = vaddq_s16(pred1, temp_0); + pred2 = vaddq_s16(pred2, temp_0); + pred3 = vaddq_s16(pred3, temp_0); + + /* Convert to unsigned 8 bit with saturation */ + pred0_in = vqmovun_s16(pred0); + pred1_in = vqmovun_s16(pred1); + pred2_in = vqmovun_s16(pred2); + pred3_in = vqmovun_s16(pred3); + + vst1_lane_u32((uint32_t *) (pu1_out), vreinterpret_u32_u8(pred0_in), 0); + vst1_lane_u32((uint32_t *) (pu1_out + i4_out_stride), vreinterpret_u32_u8(pred1_in), 0); + vst1_lane_u32((uint32_t *) (pu1_out + i4_out_stride * 2), vreinterpret_u32_u8(pred2_in), 0); + vst1_lane_u32((uint32_t *) (pu1_out + i4_out_stride * 3), vreinterpret_u32_u8(pred3_in), 0); +} + +void isvc_iquant_itrans_recon_4x4_dc_with_res_output_neon( + buffer_container_t *ps_src, buffer_container_t *ps_pred, buffer_container_t *ps_res_pred, + buffer_container_t *ps_res, buffer_container_t *ps_rec, + iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants, WORD16 *pi2_tmp, WORD16 *pi2_dc_src, + WORD32 i4_iq_start_idx, UWORD8 u1_res_accumulate) +{ + WORD16 *pi2_src = (WORD16 *) ps_src->pv_data; + WORD16 *pi2_res = (WORD16 *) ps_res->pv_data; + UWORD8 *pu1_pred = (UWORD8 *) ps_pred->pv_data; + UWORD8 *pu1_out = (UWORD8 *) ps_rec->pv_data; + WORD32 i4_res_stride = ps_res->i4_data_stride; + WORD32 i4_pred_stride = ps_pred->i4_data_stride; + WORD32 i4_out_stride = ps_rec->i4_data_stride; + const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat; + const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat; + UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6; + WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0; + + WORD16 i2_it_out; + WORD32 i4_iq_out_temp; + int16x8_t temp_0; + int16x4_t residue_res; + uint8x8_t pred0_in, pred1_in, pred2_in, pred3_in; + int16x8_t pred0, pred1, pred2, pred3; + + UNUSED(pi2_tmp); + UNUSED(ps_res_pred); + UNUSED(u1_res_accumulate); + + if(i4_iq_start_idx == 0) + { + i4_iq_out_temp = pi2_src[0]; + INV_QUANT(i4_iq_out_temp, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4); + } + else + { + i4_iq_out_temp = pi2_dc_src[0]; + } + + i2_it_out = ((i4_iq_out_temp + 32) >> 6); + temp_0 = vdupq_n_s16(i2_it_out); + residue_res = vdup_n_s16(isvc_get_residue(i2_it_out, 0, 0)); + + vst1_s16(pi2_res, residue_res); + vst1_s16(pi2_res + i4_res_stride, residue_res); + vst1_s16(pi2_res + (i4_res_stride << 1), residue_res); + vst1_s16(pi2_res + (i4_res_stride << 1) + i4_res_stride, residue_res); + + pred0_in = vld1_u8(pu1_pred); + pu1_pred = pu1_pred + i4_pred_stride; + pred1_in = vld1_u8(pu1_pred); + pu1_pred = pu1_pred + i4_pred_stride; + pred2_in = vld1_u8(pu1_pred); + pu1_pred = pu1_pred + i4_pred_stride; + pred3_in = vld1_u8(pu1_pred); + + pred0 = vreinterpretq_s16_u16(vmovl_u8(pred0_in)); + pred1 = vreinterpretq_s16_u16(vmovl_u8(pred1_in)); + pred2 = vreinterpretq_s16_u16(vmovl_u8(pred2_in)); + pred3 = vreinterpretq_s16_u16(vmovl_u8(pred3_in)); + + /* Out pixel = Res + pred */ + pred0 = vaddq_s16(pred0, temp_0); + pred1 = vaddq_s16(pred1, temp_0); + pred2 = vaddq_s16(pred2, temp_0); + pred3 = vaddq_s16(pred3, temp_0); + + /* Convert to unsigned 8 bit with saturation */ + pred0_in = vqmovun_s16(pred0); + pred1_in = vqmovun_s16(pred1); + pred2_in = vqmovun_s16(pred2); + pred3_in = vqmovun_s16(pred3); + + vst1_lane_u32((uint32_t *) (pu1_out), vreinterpret_u32_u8(pred0_in), 0); + vst1_lane_u32((uint32_t *) (pu1_out + i4_out_stride), vreinterpret_u32_u8(pred1_in), 0); + vst1_lane_u32((uint32_t *) (pu1_out + i4_out_stride * 2), vreinterpret_u32_u8(pred2_in), 0); + vst1_lane_u32((uint32_t *) (pu1_out + i4_out_stride * 3), vreinterpret_u32_u8(pred3_in), 0); +} + +void isvc_iquant_itrans_recon_4x4_dc_with_res_accumulate_neon( + buffer_container_t *ps_src, buffer_container_t *ps_pred, buffer_container_t *ps_res_pred, + buffer_container_t *ps_res, buffer_container_t *ps_rec, + iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants, WORD16 *pi2_tmp, WORD16 *pi2_dc_src, + WORD32 i4_iq_start_idx, UWORD8 u1_res_accumulate) +{ + WORD16 *pi2_src = (WORD16 *) ps_src->pv_data; + WORD16 *pi2_res = (WORD16 *) ps_res->pv_data; + WORD16 *pi2_res_pred = (WORD16 *) ps_res_pred->pv_data; + UWORD8 *pu1_pred = (UWORD8 *) ps_pred->pv_data; + UWORD8 *pu1_out = (UWORD8 *) ps_rec->pv_data; + WORD32 i4_res_stride = ps_res->i4_data_stride; + WORD32 i4_res_pred_stride = ps_res_pred->i4_data_stride; + WORD32 i4_pred_stride = ps_pred->i4_data_stride; + WORD32 i4_out_stride = ps_rec->i4_data_stride; + const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat; + const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat; + UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6; + WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0; + + WORD32 i4_iq_out_temp; + int16x4_t temp_0; + uint8x8_t pred0_in, pred1_in, pred2_in, pred3_in; + int16x8_t pred0, pred1, pred2, pred3; + int16x8_t pred01_in, pred23_in; + uint8x8_t pred01_un, pred23_un; + + int16x4_t resd0_in, resd1_in, resd2_in, resd3_in; + int16x8_t resd01_in, resd23_in; + int16x4_t pos_255 = vdup_n_s16(((WORD16) UINT8_MAX)); + int16x4_t neg_255 = vdup_n_s16(-((WORD16) UINT8_MAX)); + + UNUSED(pi2_tmp); + UNUSED(u1_res_accumulate); + + if(i4_iq_start_idx == 0) + { + i4_iq_out_temp = pi2_src[0]; + INV_QUANT(i4_iq_out_temp, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4); + } + else + { + i4_iq_out_temp = pi2_dc_src[0]; + } + + temp_0 = vdup_n_s16((i4_iq_out_temp + 32) >> 6); + + resd0_in = vld1_s16((int16_t *) pi2_res_pred); + resd1_in = vld1_s16((int16_t *) pi2_res_pred + i4_res_pred_stride); + resd2_in = vld1_s16((int16_t *) pi2_res_pred + (i4_res_pred_stride * 2)); + resd3_in = vld1_s16((int16_t *) pi2_res_pred + (i4_res_pred_stride * 3)); + + /* Add res pred to the res obtained */ + resd0_in = vadd_s16(resd0_in, temp_0); + resd1_in = vadd_s16(resd1_in, temp_0); + resd2_in = vadd_s16(resd2_in, temp_0); + resd3_in = vadd_s16(resd3_in, temp_0); + + /* Saturate all values < -255 to -255 and retain the rest as it is */ + resd0_in = vmax_s16(resd0_in, neg_255); + resd1_in = vmax_s16(resd1_in, neg_255); + resd2_in = vmax_s16(resd2_in, neg_255); + resd3_in = vmax_s16(resd3_in, neg_255); + + /* Saturate all values > 255 to 255 and retain the rest as it is */ + resd0_in = vmin_s16(resd0_in, pos_255); + resd1_in = vmin_s16(resd1_in, pos_255); + resd2_in = vmin_s16(resd2_in, pos_255); + resd3_in = vmin_s16(resd3_in, pos_255); + + vst1_s16(pi2_res, resd0_in); + vst1_s16(pi2_res + i4_res_stride, resd1_in); + vst1_s16(pi2_res + (i4_res_stride << 1), resd2_in); + vst1_s16(pi2_res + (i4_res_stride << 1) + i4_res_stride, resd3_in); + + resd01_in = vcombine_s16(resd0_in, resd1_in); + resd23_in = vcombine_s16(resd2_in, resd3_in); + + pred0_in = vld1_u8(pu1_pred); + pu1_pred = pu1_pred + i4_pred_stride; + pred1_in = vld1_u8(pu1_pred); + pu1_pred = pu1_pred + i4_pred_stride; + pred2_in = vld1_u8(pu1_pred); + pu1_pred = pu1_pred + i4_pred_stride; + pred3_in = vld1_u8(pu1_pred); + + pred0 = vreinterpretq_s16_u16(vmovl_u8(pred0_in)); + pred1 = vreinterpretq_s16_u16(vmovl_u8(pred1_in)); + pred2 = vreinterpretq_s16_u16(vmovl_u8(pred2_in)); + pred3 = vreinterpretq_s16_u16(vmovl_u8(pred3_in)); + + pred01_in = vcombine_s16(vget_low_s16(pred0), vget_low_s16(pred1)); + pred23_in = vcombine_s16(vget_low_s16(pred2), vget_low_s16(pred3)); + + /* Out pixel = Res + pred */ + pred01_in = vaddq_s16(pred01_in, resd01_in); + pred23_in = vaddq_s16(pred23_in, resd23_in); + + /* Convert to unsigned 8 bit with saturation */ + pred01_un = vqmovun_s16(pred01_in); + pred23_un = vqmovun_s16(pred23_in); + + vst1_lane_u32((uint32_t *) (pu1_out), vreinterpret_u32_u8(pred01_un), 0); + vst1_lane_u32((uint32_t *) (pu1_out + i4_out_stride), vreinterpret_u32_u8(pred01_un), 1); + vst1_lane_u32((uint32_t *) (pu1_out + (i4_out_stride << 1)), vreinterpret_u32_u8(pred23_un), 0); + vst1_lane_u32((uint32_t *) (pu1_out + ((i4_out_stride << 1) + i4_out_stride)), + vreinterpret_u32_u8(pred23_un), 1); +} + +void isvc_iquant_itrans_recon_chroma_4x4_dc_neon( + buffer_container_t *ps_src, buffer_container_t *ps_pred, buffer_container_t *ps_res_pred, + buffer_container_t *ps_res, buffer_container_t *ps_rec, + iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants, WORD16 *pi2_tmp, WORD16 *pi2_dc_src, + WORD32 i4_iq_start_idx, UWORD8 u1_res_accumulate) +{ + WORD16 *pi2_src = (WORD16 *) ps_src->pv_data; + UWORD8 *pu1_pred = (UWORD8 *) ps_pred->pv_data; + UWORD8 *pu1_out = (UWORD8 *) ps_rec->pv_data; + WORD32 i4_pred_stride = ps_pred->i4_data_stride; + WORD32 i4_out_stride = ps_rec->i4_data_stride; + const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat; + const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat; + UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6; + + WORD32 i4_iq_out_temp; + int16x8_t temp_0; + uint8x8_t pred0_in, pred1_in, pred2_in, pred3_in; + int16x8_t pred0, pred1, pred2, pred3; + uint8x8_t i4_out_horz_8x8_r0, i4_out_horz_8x8_r1, i4_out_horz_8x8_r2, i4_out_horz_8x8_r3; + uint8x8_t chroma_mask_8x8 = vreinterpret_u8_u16(vdup_n_u16(0x00ff)); + + UNUSED(pi2_src); + UNUSED(pu2_iscal_mat); + UNUSED(pu2_weigh_mat); + UNUSED(u4_qp_div_6); + UNUSED(pi2_tmp); + UNUSED(i4_iq_start_idx); + UNUSED(ps_res); + UNUSED(ps_res_pred); + UNUSED(u1_res_accumulate); + + i4_iq_out_temp = pi2_dc_src[0]; + temp_0 = vdupq_n_s16((i4_iq_out_temp + 32) >> 6); + + pred0_in = vld1_u8(pu1_pred); + pu1_pred = pu1_pred + i4_pred_stride; + pred1_in = vld1_u8(pu1_pred); + pu1_pred = pu1_pred + i4_pred_stride; + pred2_in = vld1_u8(pu1_pred); + pu1_pred = pu1_pred + i4_pred_stride; + pred3_in = vld1_u8(pu1_pred); + + pred0 = vreinterpretq_s16_u16(vmovl_u8(pred0_in)); + pred1 = vreinterpretq_s16_u16(vmovl_u8(pred1_in)); + pred2 = vreinterpretq_s16_u16(vmovl_u8(pred2_in)); + pred3 = vreinterpretq_s16_u16(vmovl_u8(pred3_in)); + + /* Out pixel = Res + pred */ + pred0 = vaddq_s16(pred0, temp_0); + pred1 = vaddq_s16(pred1, temp_0); + pred2 = vaddq_s16(pred2, temp_0); + pred3 = vaddq_s16(pred3, temp_0); + + /* Convert to unsigned 8 bit with saturation */ + pred0_in = vqmovun_s16(pred0); + pred1_in = vqmovun_s16(pred1); + pred2_in = vqmovun_s16(pred2); + pred3_in = vqmovun_s16(pred3); + + i4_out_horz_8x8_r0 = vld1_u8(pu1_out); + i4_out_horz_8x8_r1 = vld1_u8(pu1_out + i4_out_stride); + i4_out_horz_8x8_r2 = vld1_u8(pu1_out + i4_out_stride * 2); + i4_out_horz_8x8_r3 = vld1_u8(pu1_out + i4_out_stride * 3); + + /* Store out pixels in alternate positions */ + i4_out_horz_8x8_r0 = vbsl_u8(chroma_mask_8x8, pred0_in, i4_out_horz_8x8_r0); + i4_out_horz_8x8_r1 = vbsl_u8(chroma_mask_8x8, pred1_in, i4_out_horz_8x8_r1); + i4_out_horz_8x8_r2 = vbsl_u8(chroma_mask_8x8, pred2_in, i4_out_horz_8x8_r2); + i4_out_horz_8x8_r3 = vbsl_u8(chroma_mask_8x8, pred3_in, i4_out_horz_8x8_r3); + + vst1_u8((uint8_t *) (pu1_out), i4_out_horz_8x8_r0); + vst1_u8((uint8_t *) (pu1_out + i4_out_stride), i4_out_horz_8x8_r1); + vst1_u8((uint8_t *) (pu1_out + i4_out_stride * 2), i4_out_horz_8x8_r2); + vst1_u8((uint8_t *) (pu1_out + i4_out_stride * 3), i4_out_horz_8x8_r3); +} + +void isvc_iquant_itrans_recon_chroma_4x4_dc_with_res_output_neon( + buffer_container_t *ps_src, buffer_container_t *ps_pred, buffer_container_t *ps_res_pred, + buffer_container_t *ps_res, buffer_container_t *ps_rec, + iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants, WORD16 *pi2_tmp, WORD16 *pi2_dc_src, + WORD32 i4_iq_start_idx, UWORD8 u1_res_accumulate) +{ + WORD16 *pi2_src = (WORD16 *) ps_src->pv_data; + WORD16 *pi2_res = (WORD16 *) ps_res->pv_data; + UWORD8 *pu1_pred = (UWORD8 *) ps_pred->pv_data; + UWORD8 *pu1_out = (UWORD8 *) ps_rec->pv_data; + WORD32 i4_res_stride = ps_res->i4_data_stride; + WORD32 i4_pred_stride = ps_pred->i4_data_stride; + WORD32 i4_out_stride = ps_rec->i4_data_stride; + const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat; + const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat; + UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6; + + WORD16 i2_it_out; + WORD32 i4_iq_out_temp; + int16x8_t temp_0, residue_res; + uint8x8_t pred0_in, pred1_in, pred2_in, pred3_in; + int16x8_t pred0, pred1, pred2, pred3; + int16x8_t resout0, resout1, resout2, resout3; + + uint8x8_t i4_out_horz_8x8_r0, i4_out_horz_8x8_r1, i4_out_horz_8x8_r2, i4_out_horz_8x8_r3; + uint8x8_t chroma_mask_8x8 = vreinterpret_u8_u16(vdup_n_u16(0x00ff)); + uint16x8_t chroma_mask_16x8 = {0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000}; + + UNUSED(pi2_src); + UNUSED(pu2_iscal_mat); + UNUSED(pu2_weigh_mat); + UNUSED(u4_qp_div_6); + UNUSED(pi2_tmp); + UNUSED(i4_iq_start_idx); + UNUSED(ps_res_pred); + UNUSED(u1_res_accumulate); + + i4_iq_out_temp = pi2_dc_src[0]; + + i2_it_out = ((i4_iq_out_temp + 32) >> 6); + temp_0 = vdupq_n_s16(i2_it_out); + residue_res = vdupq_n_s16(isvc_get_residue(i2_it_out, 0, 0)); + + resout0 = vld1q_s16(pi2_res); + resout1 = vld1q_s16(pi2_res + i4_res_stride); + resout2 = vld1q_s16(pi2_res + i4_res_stride * 2); + resout3 = vld1q_s16(pi2_res + i4_res_stride * 3); + + /* Store res in alternate positions */ + resout0 = vbslq_s16(chroma_mask_16x8, residue_res, resout0); + resout1 = vbslq_s16(chroma_mask_16x8, residue_res, resout1); + resout2 = vbslq_s16(chroma_mask_16x8, residue_res, resout2); + resout3 = vbslq_s16(chroma_mask_16x8, residue_res, resout3); + + vst1q_s16(pi2_res, resout0); + vst1q_s16(pi2_res + i4_res_stride, resout1); + vst1q_s16(pi2_res + (i4_res_stride << 1), resout2); + vst1q_s16(pi2_res + (i4_res_stride << 1) + i4_res_stride, resout3); + + pred0_in = vld1_u8(pu1_pred); + pu1_pred = pu1_pred + i4_pred_stride; + pred1_in = vld1_u8(pu1_pred); + pu1_pred = pu1_pred + i4_pred_stride; + pred2_in = vld1_u8(pu1_pred); + pu1_pred = pu1_pred + i4_pred_stride; + pred3_in = vld1_u8(pu1_pred); + + pred0 = vreinterpretq_s16_u16(vmovl_u8(pred0_in)); + pred1 = vreinterpretq_s16_u16(vmovl_u8(pred1_in)); + pred2 = vreinterpretq_s16_u16(vmovl_u8(pred2_in)); + pred3 = vreinterpretq_s16_u16(vmovl_u8(pred3_in)); + + /* Out pixel = Res + pred */ + pred0 = vaddq_s16(pred0, temp_0); + pred1 = vaddq_s16(pred1, temp_0); + pred2 = vaddq_s16(pred2, temp_0); + pred3 = vaddq_s16(pred3, temp_0); + + /* Convert to unsigned 8 bit with saturation */ + pred0_in = vqmovun_s16(pred0); + pred1_in = vqmovun_s16(pred1); + pred2_in = vqmovun_s16(pred2); + pred3_in = vqmovun_s16(pred3); + + /* Store out pixels in alternate positions */ + i4_out_horz_8x8_r0 = vld1_u8(pu1_out); + i4_out_horz_8x8_r1 = vld1_u8(pu1_out + i4_out_stride); + i4_out_horz_8x8_r2 = vld1_u8(pu1_out + i4_out_stride * 2); + i4_out_horz_8x8_r3 = vld1_u8(pu1_out + i4_out_stride * 3); + + i4_out_horz_8x8_r0 = vbsl_u8(chroma_mask_8x8, pred0_in, i4_out_horz_8x8_r0); + i4_out_horz_8x8_r1 = vbsl_u8(chroma_mask_8x8, pred1_in, i4_out_horz_8x8_r1); + i4_out_horz_8x8_r2 = vbsl_u8(chroma_mask_8x8, pred2_in, i4_out_horz_8x8_r2); + i4_out_horz_8x8_r3 = vbsl_u8(chroma_mask_8x8, pred3_in, i4_out_horz_8x8_r3); + + vst1_u8((uint8_t *) (pu1_out), i4_out_horz_8x8_r0); + vst1_u8((uint8_t *) (pu1_out + i4_out_stride), i4_out_horz_8x8_r1); + vst1_u8((uint8_t *) (pu1_out + i4_out_stride * 2), i4_out_horz_8x8_r2); + vst1_u8((uint8_t *) (pu1_out + i4_out_stride * 3), i4_out_horz_8x8_r3); +} + +void isvc_iquant_itrans_recon_chroma_4x4_dc_with_res_accumulate_neon( + buffer_container_t *ps_src, buffer_container_t *ps_pred, buffer_container_t *ps_res_pred, + buffer_container_t *ps_res, buffer_container_t *ps_rec, + iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants, WORD16 *pi2_tmp, WORD16 *pi2_dc_src, + WORD32 i4_iq_start_idx, UWORD8 u1_res_accumulate) +{ + WORD16 *pi2_src = (WORD16 *) ps_src->pv_data; + WORD16 *pi2_res = (WORD16 *) ps_res->pv_data; + WORD16 *pi2_res_pred = (WORD16 *) ps_res_pred->pv_data; + UWORD8 *pu1_pred = (UWORD8 *) ps_pred->pv_data; + UWORD8 *pu1_out = (UWORD8 *) ps_rec->pv_data; + WORD32 i4_res_stride = ps_res->i4_data_stride; + WORD32 i4_res_pred_stride = ps_res_pred->i4_data_stride; + WORD32 i4_pred_stride = ps_pred->i4_data_stride; + WORD32 i4_out_stride = ps_rec->i4_data_stride; + const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat; + const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat; + UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6; + + WORD32 i4_iq_out_temp; + int16x8_t temp_0; + uint8x8_t pred0_in, pred1_in, pred2_in, pred3_in; + int16x8_t pred0, pred1, pred2, pred3; + int16x8_t resd0_in, resd1_in, resd2_in, resd3_in; + int16x8_t resout0, resout1, resout2, resout3; + int16x8_t resd1_in_mask, resd2_in_mask, resd3_in_mask; + uint8x8_t out0, out1, out2, out3; + int16x8_t pos_255 = vdupq_n_s16(((WORD16) UINT8_MAX)); + int16x8_t neg_255 = vdupq_n_s16(-((WORD16) UINT8_MAX)); + uint8x8_t chroma_mask_8x8 = vreinterpret_u8_u16(vdup_n_u16(0x00ff)); + uint16x8_t chroma_mask_16x8 = {0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000}; + + int16x8_t resd0_in_mask = {0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000}; + + UNUSED(pi2_src); + UNUSED(pu2_iscal_mat); + UNUSED(pu2_weigh_mat); + UNUSED(u4_qp_div_6); + UNUSED(pi2_tmp); + UNUSED(i4_iq_start_idx); + UNUSED(u1_res_accumulate); + + resd1_in_mask = resd0_in_mask; + resd2_in_mask = resd0_in_mask; + resd3_in_mask = resd0_in_mask; + + i4_iq_out_temp = pi2_dc_src[0]; + temp_0 = vdupq_n_s16((i4_iq_out_temp + 32) >> 6); + + resd0_in = vld1q_s16((int16_t *) pi2_res_pred); + resd1_in = vld1q_s16((int16_t *) pi2_res_pred + i4_res_pred_stride); + resd2_in = vld1q_s16((int16_t *) pi2_res_pred + (i4_res_pred_stride * 2)); + resd3_in = vld1q_s16((int16_t *) pi2_res_pred + (i4_res_pred_stride * 3)); + + /* Mask alternate values of res pred */ + resd0_in_mask = vbslq_s16(chroma_mask_16x8, resd0_in, resd0_in_mask); + resd1_in_mask = vbslq_s16(chroma_mask_16x8, resd1_in, resd1_in_mask); + resd2_in_mask = vbslq_s16(chroma_mask_16x8, resd2_in, resd2_in_mask); + resd3_in_mask = vbslq_s16(chroma_mask_16x8, resd3_in, resd3_in_mask); + + /* Add res pred to res obtained */ + resd0_in = vaddq_s16(resd0_in_mask, temp_0); + resd1_in = vaddq_s16(resd1_in_mask, temp_0); + resd2_in = vaddq_s16(resd2_in_mask, temp_0); + resd3_in = vaddq_s16(resd3_in_mask, temp_0); + + /* Saturate all values < -255 to -255 and retain the rest as it is */ + resd0_in = vmaxq_s16(resd0_in, neg_255); + resd1_in = vmaxq_s16(resd1_in, neg_255); + resd2_in = vmaxq_s16(resd2_in, neg_255); + resd3_in = vmaxq_s16(resd3_in, neg_255); + + /* Saturate all values > 255 to 255 and retain the rest as it is */ + resd0_in = vminq_s16(resd0_in, pos_255); + resd1_in = vminq_s16(resd1_in, pos_255); + resd2_in = vminq_s16(resd2_in, pos_255); + resd3_in = vminq_s16(resd3_in, pos_255); + + resout0 = vld1q_s16(pi2_res); + resout1 = vld1q_s16(pi2_res + i4_res_stride); + resout2 = vld1q_s16(pi2_res + i4_res_stride * 2); + resout3 = vld1q_s16(pi2_res + i4_res_stride * 3); + + /* Store res in alternate positions */ + resout0 = vbslq_s16(chroma_mask_16x8, resd0_in, resout0); + resout1 = vbslq_s16(chroma_mask_16x8, resd1_in, resout1); + resout2 = vbslq_s16(chroma_mask_16x8, resd2_in, resout2); + resout3 = vbslq_s16(chroma_mask_16x8, resd3_in, resout3); + + vst1q_s16(pi2_res, resout0); + vst1q_s16(pi2_res + i4_res_stride, resout1); + vst1q_s16(pi2_res + (i4_res_stride << 1), resout2); + vst1q_s16(pi2_res + (i4_res_stride << 1) + i4_res_stride, resout3); + + pred0_in = vld1_u8(pu1_pred); + pu1_pred = pu1_pred + i4_pred_stride; + pred1_in = vld1_u8(pu1_pred); + pu1_pred = pu1_pred + i4_pred_stride; + pred2_in = vld1_u8(pu1_pred); + pu1_pred = pu1_pred + i4_pred_stride; + pred3_in = vld1_u8(pu1_pred); + + pred0 = vreinterpretq_s16_u16(vmovl_u8(pred0_in)); + pred1 = vreinterpretq_s16_u16(vmovl_u8(pred1_in)); + pred2 = vreinterpretq_s16_u16(vmovl_u8(pred2_in)); + pred3 = vreinterpretq_s16_u16(vmovl_u8(pred3_in)); + + /* Out pixel = Res + pred */ + pred0 = vaddq_s16(pred0, resout0); + pred1 = vaddq_s16(pred1, resout1); + pred2 = vaddq_s16(pred2, resout2); + pred3 = vaddq_s16(pred3, resout3); + + /* Convert to unsigned 8 bit with saturation */ + pred0_in = vqmovun_s16(pred0); + pred1_in = vqmovun_s16(pred1); + pred2_in = vqmovun_s16(pred2); + pred3_in = vqmovun_s16(pred3); + + out0 = vld1_u8(pu1_out); + out1 = vld1_u8(pu1_out + i4_out_stride); + out2 = vld1_u8(pu1_out + i4_out_stride * 2); + out3 = vld1_u8(pu1_out + i4_out_stride * 3); + + /* Store out pixels in alternate positions */ + out0 = vbsl_u8(chroma_mask_8x8, pred0_in, out0); + out1 = vbsl_u8(chroma_mask_8x8, pred1_in, out1); + out2 = vbsl_u8(chroma_mask_8x8, pred2_in, out2); + out3 = vbsl_u8(chroma_mask_8x8, pred3_in, out3); + + vst1_u8((uint8_t *) (pu1_out), out0); + vst1_u8((uint8_t *) (pu1_out + i4_out_stride), out1); + vst1_u8((uint8_t *) (pu1_out + i4_out_stride * 2), out2); + vst1_u8((uint8_t *) (pu1_out + i4_out_stride * 3), out3); +} diff --git a/common/arm/svc/isvc_mem_fns_neon.c b/common/arm/svc/isvc_mem_fns_neon.c new file mode 100644 index 0000000..f2cf448 --- /dev/null +++ b/common/arm/svc/isvc_mem_fns_neon.c @@ -0,0 +1,151 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ +/** + * ******************************************************************************* + * * @file + * isvc_mem_fns_av8.c + * + * @brief + * armv8 variants of + * functions used for memory operations + * + * ******************************************************************************* + */ +#include +#include + +#include "ih264_typedefs.h" +#include "isvc_mem_fns.h" + +void isvc_memset_2d_neon(UWORD8 *pu1_dst, WORD32 i4_dst_stride, UWORD8 u1_val, WORD32 i4_blk_wd, + WORD32 i4_blk_ht) +{ + if(i4_blk_wd == 4) + { + vst1_lane_u32((UWORD32 *) pu1_dst, vreinterpret_u32_u8(vdup_n_u8(u1_val)), 0); + pu1_dst += i4_dst_stride; + + vst1_lane_u32((UWORD32 *) pu1_dst, vreinterpret_u32_u8(vdup_n_u8(u1_val)), 0); + pu1_dst += i4_dst_stride; + + vst1_lane_u32((UWORD32 *) pu1_dst, vreinterpret_u32_u8(vdup_n_u8(u1_val)), 0); + pu1_dst += i4_dst_stride; + + vst1_lane_u32((UWORD32 *) pu1_dst, vreinterpret_u32_u8(vdup_n_u8(u1_val)), 0); + } + else if(i4_blk_wd == 8) + { + vst1_u8(pu1_dst, vdup_n_u8(u1_val)); + pu1_dst += i4_dst_stride; + + vst1_u8(pu1_dst, vdup_n_u8(u1_val)); + pu1_dst += i4_dst_stride; + + vst1_u8(pu1_dst, vdup_n_u8(u1_val)); + pu1_dst += i4_dst_stride; + + vst1_u8(pu1_dst, vdup_n_u8(u1_val)); + pu1_dst += i4_dst_stride; + + vst1_u8(pu1_dst, vdup_n_u8(u1_val)); + pu1_dst += i4_dst_stride; + + vst1_u8(pu1_dst, vdup_n_u8(u1_val)); + pu1_dst += i4_dst_stride; + + vst1_u8(pu1_dst, vdup_n_u8(u1_val)); + pu1_dst += i4_dst_stride; + + vst1_u8(pu1_dst, vdup_n_u8(u1_val)); + } + else if((i4_blk_wd % 16 == 0) && (i4_blk_ht % 16 == 0)) + { + WORD32 i, j; + UWORD8 *pu1_dst_col_ptr, *pu1_dst_row_ptr; + WORD32 i4_width_by_16 = i4_blk_wd / 16; + WORD32 i4_height_by_16 = i4_blk_ht / 16; + + for(i = 0; i < i4_height_by_16; i++) + { + pu1_dst_row_ptr = pu1_dst + i * 16 * i4_dst_stride; + for(j = 0; j < i4_width_by_16; j++) + { + pu1_dst_col_ptr = pu1_dst_row_ptr + (j << 4); + + vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val)); + pu1_dst_col_ptr += i4_dst_stride; + + vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val)); + pu1_dst_col_ptr += i4_dst_stride; + + vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val)); + pu1_dst_col_ptr += i4_dst_stride; + + vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val)); + pu1_dst_col_ptr += i4_dst_stride; + + vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val)); + pu1_dst_col_ptr += i4_dst_stride; + + vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val)); + pu1_dst_col_ptr += i4_dst_stride; + + vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val)); + pu1_dst_col_ptr += i4_dst_stride; + + vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val)); + pu1_dst_col_ptr += i4_dst_stride; + + vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val)); + pu1_dst_col_ptr += i4_dst_stride; + + vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val)); + pu1_dst_col_ptr += i4_dst_stride; + + vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val)); + pu1_dst_col_ptr += i4_dst_stride; + + vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val)); + pu1_dst_col_ptr += i4_dst_stride; + + vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val)); + pu1_dst_col_ptr += i4_dst_stride; + + vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val)); + pu1_dst_col_ptr += i4_dst_stride; + + vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val)); + pu1_dst_col_ptr += i4_dst_stride; + + vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val)); + } + } + } + else + { + WORD32 i; + + for(i = 0; i < i4_blk_ht; i++) + { + memset(pu1_dst, u1_val, i4_blk_wd); + pu1_dst += i4_dst_stride; + } + } +} diff --git a/common/arm/svc/isvc_resi_trans_quant_neon.c b/common/arm/svc/isvc_resi_trans_quant_neon.c new file mode 100644 index 0000000..4179cb2 --- /dev/null +++ b/common/arm/svc/isvc_resi_trans_quant_neon.c @@ -0,0 +1,1085 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ +/** + * ******************************************************************************* + * * @file + * isvc_resi_trans_quant_neon.c + * + * @brief + * neon variants of forward transform and quantization functions + * + * ******************************************************************************* + */ + +#include +#include + +#include "ih264_typedefs.h" +#include "ih264_debug.h" +#include "ih264_defs.h" +#include "ih264_trans_macros.h" +#include "ih264_macros.h" +#include "ih264_platform_macros.h" +#include "ih264_trans_data.h" +#include "ih264_size_defs.h" +#include "isvc_structs.h" +#include "isvc_trans_quant_itrans_iquant.h" + +void isvc_resi_trans_quant_4x4_neon(buffer_container_t *ps_src, buffer_container_t *ps_pred, + buffer_container_t *ps_out, + buffer_container_t *ps_upsampled_res, + resi_trans_quant_constants_t *ps_quant_constants, + UWORD8 *pu1_nnz, WORD16 *pi2_dc_out, + UWORD8 u1_use_upsampled_res) +{ + UWORD8 *pu1_src = (UWORD8 *) ps_src->pv_data; + UWORD8 *pu1_pred = (UWORD8 *) ps_pred->pv_data; + WORD16 *pi2_out = (WORD16 *) ps_out->pv_data; + WORD32 i4_src_stride = ps_src->i4_data_stride; + WORD32 i4_pred_stride = ps_pred->i4_data_stride; + WORD32 i4_out_stride = ps_out->i4_data_stride; + const UWORD16 *pu2_scale_matrix = ps_quant_constants->pu2_scale_matrix; + const UWORD16 *pu2_threshold_matrix = ps_quant_constants->pu2_threshold_matrix; + UWORD32 u4_qbits = ps_quant_constants->u4_qbits; + UWORD32 u4_round_factor = ps_quant_constants->u4_round_factor; + + uint8x8_t src0, src1, src2, src3; + uint8x8_t pred0, pred1, pred2, pred3; + uint8x8_t temp0_u8x8, temp1_u8x8; + uint16x4_t temp0_u16x4, temp1_u16x4, temp2_u16x4, temp3_u16x4; + uint16x4_t scale_mat0_16x4, scale_mat1_16x4, scale_mat2_16x4, scale_mat3_16x4; + uint16x4_t threshold0_16x4, threshold1_16x4, threshold2_16x4, threshold3_16x4; + uint16x4_t thresholdmask0_16x4, thresholdmask1_16x4, thresholdmask2_16x4, thresholdmask3_16x4; + int16x4_t res0_16x4, res1_16x4, res2_16x4, res3_16x4; + int16x4_t x0_16x4, x1_16x4, x2_16x4, x3_16x4; + int16x4_t xx0_16x4, xx1_16x4, xx2_16x4, xx3_16x4; + int16x4x2_t xx0_16x4x2, xx1_16x4x2; + int16x4_t temp0_16x4, temp1_16x4, temp2_16x4, temp3_16x4; + uint16x8_t res0_16x8, res1_16x8, res2_16x8, res3_16x8; + uint16x8_t temp0_u16x8, temp1_u16x8; + int32x2x2_t x0_32x2x2, x1_32x2x2; + int32x4_t tx0_32x4, tx1_32x4, tx2_32x4, tx3_32x4; + + int32x4_t rnd_factor_32x4 = vdupq_n_s32(u4_round_factor); + int32x4_t qbits_32x4 = vdupq_n_s32(u4_qbits); + int16x4_t zeros_16x4 = vdup_n_s16(0); + + UNUSED(ps_upsampled_res); + UNUSED(u1_use_upsampled_res); + + threshold0_16x4 = vld1_u16(pu2_threshold_matrix); + threshold1_16x4 = vld1_u16(pu2_threshold_matrix + 4); + threshold2_16x4 = vld1_u16(pu2_threshold_matrix + 8); + threshold3_16x4 = vld1_u16(pu2_threshold_matrix + 12); + + scale_mat0_16x4 = vld1_u16(pu2_scale_matrix); + scale_mat1_16x4 = vld1_u16(pu2_scale_matrix + 4); + scale_mat2_16x4 = vld1_u16(pu2_scale_matrix + 8); + scale_mat3_16x4 = vld1_u16(pu2_scale_matrix + 12); + + src0 = vld1_u8(&pu1_src[0 * i4_src_stride]); + src1 = vld1_u8(&pu1_src[1 * i4_src_stride]); + src2 = vld1_u8(&pu1_src[2 * i4_src_stride]); + src3 = vld1_u8(&pu1_src[3 * i4_src_stride]); + + pred0 = vld1_u8(&pu1_pred[0 * i4_pred_stride]); + pred1 = vld1_u8(&pu1_pred[1 * i4_pred_stride]); + pred2 = vld1_u8(&pu1_pred[2 * i4_pred_stride]); + pred3 = vld1_u8(&pu1_pred[3 * i4_pred_stride]); + + /* calculate res = src - pred */ + res0_16x8 = vsubl_u8(src0, pred0); + res1_16x8 = vsubl_u8(src1, pred1); + res2_16x8 = vsubl_u8(src2, pred2); + res3_16x8 = vsubl_u8(src3, pred3); + + res0_16x4 = vreinterpret_s16_u16(vget_low_u16(res0_16x8)); + res1_16x4 = vreinterpret_s16_u16(vget_low_u16(res1_16x8)); + res2_16x4 = vreinterpret_s16_u16(vget_low_u16(res2_16x8)); + res3_16x4 = vreinterpret_s16_u16(vget_low_u16(res3_16x8)); + + /* Perform Forward transform */ + /*-------------------------------------------------------------*/ + /* DCT [ Horizontal transformation ] */ + /*-------------------------------------------------------------*/ + /* Matrix transpose */ + /* + * a0 a1 a2 a3 + * b0 b1 b2 b3 + * c0 c1 c2 c3 + * d0 d1 d2 d3 + */ + + xx0_16x4x2 = vtrn_s16(res0_16x4, res1_16x4); + xx1_16x4x2 = vtrn_s16(res2_16x4, res3_16x4); + x0_32x2x2 = + vtrn_s32(vreinterpret_s32_s16(xx0_16x4x2.val[0]), vreinterpret_s32_s16(xx1_16x4x2.val[0])); + x1_32x2x2 = + vtrn_s32(vreinterpret_s32_s16(xx0_16x4x2.val[1]), vreinterpret_s32_s16(xx1_16x4x2.val[1])); + + x0_16x4 = vreinterpret_s16_s32(x0_32x2x2.val[0]); + x1_16x4 = vreinterpret_s16_s32(x1_32x2x2.val[0]); + x2_16x4 = vreinterpret_s16_s32(x0_32x2x2.val[1]); + x3_16x4 = vreinterpret_s16_s32(x1_32x2x2.val[1]); + + xx0_16x4 = vadd_s16(x0_16x4, x3_16x4); + xx1_16x4 = vadd_s16(x1_16x4, x2_16x4); + xx2_16x4 = vsub_s16(x1_16x4, x2_16x4); + xx3_16x4 = vsub_s16(x0_16x4, x3_16x4); + + x0_16x4 = vadd_s16(xx0_16x4, xx1_16x4); + temp0_16x4 = vshl_n_s16(xx3_16x4, 1); + x1_16x4 = vadd_s16(xx2_16x4, temp0_16x4); + + x2_16x4 = vsub_s16(xx0_16x4, xx1_16x4); + temp0_16x4 = vshl_n_s16(xx2_16x4, 1); + x3_16x4 = vsub_s16(xx3_16x4, temp0_16x4); + + /* Matrix transpose */ + /* + * a0 b0 c0 d0 + * a1 b1 c1 d1 + * a2 b2 c2 d2 + * a3 b3 c3 d3 + */ + + xx0_16x4x2 = vtrn_s16(x0_16x4, x1_16x4); + xx1_16x4x2 = vtrn_s16(x2_16x4, x3_16x4); + x0_32x2x2 = + vtrn_s32(vreinterpret_s32_s16(xx0_16x4x2.val[0]), vreinterpret_s32_s16(xx1_16x4x2.val[0])); + x1_32x2x2 = + vtrn_s32(vreinterpret_s32_s16(xx0_16x4x2.val[1]), vreinterpret_s32_s16(xx1_16x4x2.val[1])); + + x0_16x4 = vreinterpret_s16_s32(x0_32x2x2.val[0]); + x1_16x4 = vreinterpret_s16_s32(x1_32x2x2.val[0]); + x2_16x4 = vreinterpret_s16_s32(x0_32x2x2.val[1]); + x3_16x4 = vreinterpret_s16_s32(x1_32x2x2.val[1]); + + /* Vertical Transformation */ + + xx0_16x4 = vadd_s16(x0_16x4, x3_16x4); + xx1_16x4 = vadd_s16(x1_16x4, x2_16x4); + xx2_16x4 = vsub_s16(x1_16x4, x2_16x4); + xx3_16x4 = vsub_s16(x0_16x4, x3_16x4); + + x0_16x4 = vadd_s16(xx0_16x4, xx1_16x4); + temp0_16x4 = vshl_n_s16(xx3_16x4, 1); + x1_16x4 = vadd_s16(temp0_16x4, xx2_16x4); + + x2_16x4 = vsub_s16(xx0_16x4, xx1_16x4); + temp0_16x4 = vshl_n_s16(xx2_16x4, 1); + x3_16x4 = vsub_s16(xx3_16x4, temp0_16x4); + + /* get the first 16 bits from the register */ + *pi2_dc_out = vget_lane_s16(x0_16x4, 0); + + xx0_16x4 = vabs_s16(x0_16x4); + xx1_16x4 = vabs_s16(x1_16x4); + xx2_16x4 = vabs_s16(x2_16x4); + xx3_16x4 = vabs_s16(x3_16x4); + + /* compare with zero for getting sign */ + temp0_u16x4 = vcgt_s16(x0_16x4, zeros_16x4); + temp1_u16x4 = vcgt_s16(x1_16x4, zeros_16x4); + temp2_u16x4 = vcgt_s16(x2_16x4, zeros_16x4); + temp3_u16x4 = vcgt_s16(x3_16x4, zeros_16x4); + + /* compare with zero for thresholding */ + thresholdmask0_16x4 = vcgt_s16(vreinterpret_s16_u16(threshold0_16x4), xx0_16x4); + thresholdmask1_16x4 = vcgt_s16(vreinterpret_s16_u16(threshold1_16x4), xx1_16x4); + thresholdmask2_16x4 = vcgt_s16(vreinterpret_s16_u16(threshold2_16x4), xx2_16x4); + thresholdmask3_16x4 = vcgt_s16(vreinterpret_s16_u16(threshold3_16x4), xx3_16x4); + + /* Multiply abs values obtained with scaling matrix */ + tx0_32x4 = vmull_s16(xx0_16x4, vreinterpret_s16_u16(scale_mat0_16x4)); + tx1_32x4 = vmull_s16(xx1_16x4, vreinterpret_s16_u16(scale_mat1_16x4)); + tx2_32x4 = vmull_s16(xx2_16x4, vreinterpret_s16_u16(scale_mat2_16x4)); + tx3_32x4 = vmull_s16(xx3_16x4, vreinterpret_s16_u16(scale_mat3_16x4)); + + tx0_32x4 = vaddq_s32(tx0_32x4, rnd_factor_32x4); + tx1_32x4 = vaddq_s32(tx1_32x4, rnd_factor_32x4); + tx2_32x4 = vaddq_s32(tx2_32x4, rnd_factor_32x4); + tx3_32x4 = vaddq_s32(tx3_32x4, rnd_factor_32x4); + + qbits_32x4 = vnegq_s32(qbits_32x4); + + tx0_32x4 = vshlq_s32(tx0_32x4, qbits_32x4); + tx1_32x4 = vshlq_s32(tx1_32x4, qbits_32x4); + tx2_32x4 = vshlq_s32(tx2_32x4, qbits_32x4); + tx3_32x4 = vshlq_s32(tx3_32x4, qbits_32x4); + + /* Convertion to 16 bits signed */ + temp0_16x4 = vmovn_s32(tx0_32x4); + temp1_16x4 = vmovn_s32(tx1_32x4); + temp2_16x4 = vmovn_s32(tx2_32x4); + temp3_16x4 = vmovn_s32(tx3_32x4); + + x0_16x4 = vneg_s16(temp0_16x4); + x1_16x4 = vneg_s16(temp1_16x4); + x2_16x4 = vneg_s16(temp2_16x4); + x3_16x4 = vneg_s16(temp3_16x4); + + /* Restore sign */ + x0_16x4 = vbsl_s16(temp0_u16x4, temp0_16x4, x0_16x4); + x1_16x4 = vbsl_s16(temp1_u16x4, temp1_16x4, x1_16x4); + x2_16x4 = vbsl_s16(temp2_u16x4, temp2_16x4, x2_16x4); + x3_16x4 = vbsl_s16(temp3_u16x4, temp3_16x4, x3_16x4); + + xx0_16x4 = vbsl_s16(thresholdmask0_16x4, zeros_16x4, x0_16x4); + xx1_16x4 = vbsl_s16(thresholdmask1_16x4, zeros_16x4, x1_16x4); + xx2_16x4 = vbsl_s16(thresholdmask2_16x4, zeros_16x4, x2_16x4); + xx3_16x4 = vbsl_s16(thresholdmask3_16x4, zeros_16x4, x3_16x4); + + /* Store Quantized outputs */ + vst1_s16(&pi2_out[0 * i4_out_stride], xx0_16x4); + vst1_s16(&pi2_out[1 * i4_out_stride], xx1_16x4); + vst1_s16(&pi2_out[2 * i4_out_stride], xx2_16x4); + vst1_s16(&pi2_out[3 * i4_out_stride], xx3_16x4); + + /* NNZ calculation */ + + temp0_u16x4 = vceq_s16(xx0_16x4, zeros_16x4); + temp1_u16x4 = vceq_s16(xx1_16x4, zeros_16x4); + temp2_u16x4 = vceq_s16(xx2_16x4, zeros_16x4); + temp3_u16x4 = vceq_s16(xx3_16x4, zeros_16x4); + + temp0_u16x8 = vcombine_u16(temp0_u16x4, temp2_u16x4); + temp1_u16x8 = vcombine_u16(temp1_u16x4, temp3_u16x4); + + /* Convertion to 8 bit unsigned */ + temp0_u8x8 = vmovn_u16(temp0_u16x8); + temp1_u8x8 = vmovn_u16(temp1_u16x8); + + temp0_u8x8 = vshr_n_u8(temp0_u8x8, 7); + temp1_u8x8 = vshr_n_u8(temp1_u8x8, 7); + + temp0_u8x8 = vadd_u8(temp0_u8x8, temp1_u8x8); + temp0_u8x8 = vpadd_u8(temp0_u8x8, temp1_u8x8); + temp0_u8x8 = vpadd_u8(temp0_u8x8, temp1_u8x8); + temp0_u8x8 = vpadd_u8(temp0_u8x8, temp1_u8x8); + + *pu1_nnz = 16 - vget_lane_u8(temp0_u8x8, 0); +} + +void isvc_resi_trans_quant_4x4_with_residual_sub_neon( + buffer_container_t *ps_src, buffer_container_t *ps_pred, buffer_container_t *ps_out, + buffer_container_t *ps_upsampled_res, resi_trans_quant_constants_t *ps_quant_constants, + UWORD8 *pu1_nnz, WORD16 *pi2_dc_out, UWORD8 u1_use_upsampled_res) +{ + UWORD8 *pu1_src = (UWORD8 *) ps_src->pv_data; + UWORD8 *pu1_pred = (UWORD8 *) ps_pred->pv_data; + WORD16 *pi2_out = (WORD16 *) ps_out->pv_data; + WORD16 *pi2_upsampled_res = ps_upsampled_res ? (WORD16 *) ps_upsampled_res->pv_data : NULL; + WORD32 i4_src_stride = ps_src->i4_data_stride; + WORD32 i4_pred_stride = ps_pred->i4_data_stride; + WORD32 i4_out_stride = ps_out->i4_data_stride; + WORD32 i4_upsampled_res_stride = ps_upsampled_res ? ps_upsampled_res->i4_data_stride : 0; + const UWORD16 *pu2_scale_matrix = ps_quant_constants->pu2_scale_matrix; + const UWORD16 *pu2_threshold_matrix = ps_quant_constants->pu2_threshold_matrix; + UWORD32 u4_qbits = ps_quant_constants->u4_qbits; + UWORD32 u4_round_factor = ps_quant_constants->u4_round_factor; + + uint8x8_t src0, src1, src2, src3; + uint8x8_t pred0, pred1, pred2, pred3; + uint8x8_t temp0_u8x8, temp1_u8x8; + uint16x4_t temp0_u16x4, temp1_u16x4, temp2_u16x4, temp3_u16x4; + uint16x4_t scale_mat0_16x4, scale_mat1_16x4, scale_mat2_16x4, scale_mat3_16x4; + uint16x4_t threshold0_16x4, threshold1_16x4, threshold2_16x4, threshold3_16x4; + uint16x4_t thresholdmask0_16x4, thresholdmask1_16x4, thresholdmask2_16x4, thresholdmask3_16x4; + int16x4_t upres0_16x4, upres1_16x4, upres2_16x4, upres3_16x4; + int16x4_t res0_16x4, res1_16x4, res2_16x4, res3_16x4; + int16x4_t x0_16x4, x1_16x4, x2_16x4, x3_16x4; + int16x4_t xx0_16x4, xx1_16x4, xx2_16x4, xx3_16x4; + int16x4x2_t xx0_16x4x2, xx1_16x4x2; + int16x4_t temp0_16x4, temp1_16x4, temp2_16x4, temp3_16x4; + uint16x8_t res0_16x8, res1_16x8, res2_16x8, res3_16x8; + uint16x8_t temp0_u16x8, temp1_u16x8; + int32x2x2_t x0_32x2x2, x1_32x2x2; + int32x4_t tx0_32x4, tx1_32x4, tx2_32x4, tx3_32x4; + + int32x4_t rnd_factor_32x4 = vdupq_n_s32(u4_round_factor); + int32x4_t qbits_32x4 = vdupq_n_s32(u4_qbits); + int16x4_t zeros_16x4 = vdup_n_s16(0); + int16x4_t pos_255_16x4 = vdup_n_s16(((WORD16) UINT8_MAX)); + int16x4_t neg_255_16x4 = vdup_n_s16(-((WORD16) UINT8_MAX)); + + UNUSED(u1_use_upsampled_res); + + threshold0_16x4 = vld1_u16(pu2_threshold_matrix); + threshold1_16x4 = vld1_u16(pu2_threshold_matrix + 4); + threshold2_16x4 = vld1_u16(pu2_threshold_matrix + 8); + threshold3_16x4 = vld1_u16(pu2_threshold_matrix + 12); + + scale_mat0_16x4 = vld1_u16(pu2_scale_matrix); + scale_mat1_16x4 = vld1_u16(pu2_scale_matrix + 4); + scale_mat2_16x4 = vld1_u16(pu2_scale_matrix + 8); + scale_mat3_16x4 = vld1_u16(pu2_scale_matrix + 12); + + src0 = vld1_u8(&pu1_src[0 * i4_src_stride]); + src1 = vld1_u8(&pu1_src[1 * i4_src_stride]); + src2 = vld1_u8(&pu1_src[2 * i4_src_stride]); + src3 = vld1_u8(&pu1_src[3 * i4_src_stride]); + + pred0 = vld1_u8(&pu1_pred[0 * i4_pred_stride]); + pred1 = vld1_u8(&pu1_pred[1 * i4_pred_stride]); + pred2 = vld1_u8(&pu1_pred[2 * i4_pred_stride]); + pred3 = vld1_u8(&pu1_pred[3 * i4_pred_stride]); + + /* calculate res = src - pred */ + res0_16x8 = vsubl_u8(src0, pred0); + res1_16x8 = vsubl_u8(src1, pred1); + res2_16x8 = vsubl_u8(src2, pred2); + res3_16x8 = vsubl_u8(src3, pred3); + + res0_16x4 = vreinterpret_s16_u16(vget_low_u16(res0_16x8)); + res1_16x4 = vreinterpret_s16_u16(vget_low_u16(res1_16x8)); + res2_16x4 = vreinterpret_s16_u16(vget_low_u16(res2_16x8)); + res3_16x4 = vreinterpret_s16_u16(vget_low_u16(res3_16x8)); + + /* Load upsampled res */ + upres0_16x4 = vld1_s16(&pi2_upsampled_res[0 * i4_upsampled_res_stride]); + upres1_16x4 = vld1_s16(&pi2_upsampled_res[1 * i4_upsampled_res_stride]); + upres2_16x4 = vld1_s16(&pi2_upsampled_res[2 * i4_upsampled_res_stride]); + upres3_16x4 = vld1_s16(&pi2_upsampled_res[3 * i4_upsampled_res_stride]); + + /* subtract upsampled res from (src - pred) to obtain final res */ + res0_16x4 = vsub_s16(res0_16x4, upres0_16x4); + res1_16x4 = vsub_s16(res1_16x4, upres1_16x4); + res2_16x4 = vsub_s16(res2_16x4, upres2_16x4); + res3_16x4 = vsub_s16(res3_16x4, upres3_16x4); + + /* Saturate all values < -255 to -255 and retain the rest as it is */ + res0_16x4 = vmax_s16(res0_16x4, neg_255_16x4); + res1_16x4 = vmax_s16(res1_16x4, neg_255_16x4); + res2_16x4 = vmax_s16(res2_16x4, neg_255_16x4); + res3_16x4 = vmax_s16(res3_16x4, neg_255_16x4); + + /* Saturate all values > 255 to 255 and retain the rest as it is */ + res0_16x4 = vmin_s16(res0_16x4, pos_255_16x4); + res1_16x4 = vmin_s16(res1_16x4, pos_255_16x4); + res2_16x4 = vmin_s16(res2_16x4, pos_255_16x4); + res3_16x4 = vmin_s16(res3_16x4, pos_255_16x4); + + /* Perform Forward transform */ + /*-------------------------------------------------------------*/ + /* DCT [ Horizontal transformation ] */ + /*-------------------------------------------------------------*/ + /* Matrix transpose */ + /* + * a0 a1 a2 a3 + * b0 b1 b2 b3 + * c0 c1 c2 c3 + * d0 d1 d2 d3 + */ + + xx0_16x4x2 = vtrn_s16(res0_16x4, res1_16x4); + xx1_16x4x2 = vtrn_s16(res2_16x4, res3_16x4); + x0_32x2x2 = + vtrn_s32(vreinterpret_s32_s16(xx0_16x4x2.val[0]), vreinterpret_s32_s16(xx1_16x4x2.val[0])); + x1_32x2x2 = + vtrn_s32(vreinterpret_s32_s16(xx0_16x4x2.val[1]), vreinterpret_s32_s16(xx1_16x4x2.val[1])); + + x0_16x4 = vreinterpret_s16_s32(x0_32x2x2.val[0]); + x1_16x4 = vreinterpret_s16_s32(x1_32x2x2.val[0]); + x2_16x4 = vreinterpret_s16_s32(x0_32x2x2.val[1]); + x3_16x4 = vreinterpret_s16_s32(x1_32x2x2.val[1]); + + xx0_16x4 = vadd_s16(x0_16x4, x3_16x4); + xx1_16x4 = vadd_s16(x1_16x4, x2_16x4); + xx2_16x4 = vsub_s16(x1_16x4, x2_16x4); + xx3_16x4 = vsub_s16(x0_16x4, x3_16x4); + + x0_16x4 = vadd_s16(xx0_16x4, xx1_16x4); + temp0_16x4 = vshl_n_s16(xx3_16x4, 1); + x1_16x4 = vadd_s16(xx2_16x4, temp0_16x4); + + x2_16x4 = vsub_s16(xx0_16x4, xx1_16x4); + temp0_16x4 = vshl_n_s16(xx2_16x4, 1); + x3_16x4 = vsub_s16(xx3_16x4, temp0_16x4); + + /* Matrix transpose */ + /* + * a0 b0 c0 d0 + * a1 b1 c1 d1 + * a2 b2 c2 d2 + * a3 b3 c3 d3 + */ + + xx0_16x4x2 = vtrn_s16(x0_16x4, x1_16x4); + xx1_16x4x2 = vtrn_s16(x2_16x4, x3_16x4); + x0_32x2x2 = + vtrn_s32(vreinterpret_s32_s16(xx0_16x4x2.val[0]), vreinterpret_s32_s16(xx1_16x4x2.val[0])); + x1_32x2x2 = + vtrn_s32(vreinterpret_s32_s16(xx0_16x4x2.val[1]), vreinterpret_s32_s16(xx1_16x4x2.val[1])); + + x0_16x4 = vreinterpret_s16_s32(x0_32x2x2.val[0]); + x1_16x4 = vreinterpret_s16_s32(x1_32x2x2.val[0]); + x2_16x4 = vreinterpret_s16_s32(x0_32x2x2.val[1]); + x3_16x4 = vreinterpret_s16_s32(x1_32x2x2.val[1]); + + /* Vertical Transformation */ + + xx0_16x4 = vadd_s16(x0_16x4, x3_16x4); + xx1_16x4 = vadd_s16(x1_16x4, x2_16x4); + xx2_16x4 = vsub_s16(x1_16x4, x2_16x4); + xx3_16x4 = vsub_s16(x0_16x4, x3_16x4); + + x0_16x4 = vadd_s16(xx0_16x4, xx1_16x4); + temp0_16x4 = vshl_n_s16(xx3_16x4, 1); + x1_16x4 = vadd_s16(temp0_16x4, xx2_16x4); + + x2_16x4 = vsub_s16(xx0_16x4, xx1_16x4); + temp0_16x4 = vshl_n_s16(xx2_16x4, 1); + x3_16x4 = vsub_s16(xx3_16x4, temp0_16x4); + + /* get the first 16 bits from the register */ + *pi2_dc_out = vget_lane_s16(x0_16x4, 0); + + xx0_16x4 = vabs_s16(x0_16x4); + xx1_16x4 = vabs_s16(x1_16x4); + xx2_16x4 = vabs_s16(x2_16x4); + xx3_16x4 = vabs_s16(x3_16x4); + + /* compare with zero for getting sign */ + temp0_u16x4 = vcgt_s16(x0_16x4, zeros_16x4); + temp1_u16x4 = vcgt_s16(x1_16x4, zeros_16x4); + temp2_u16x4 = vcgt_s16(x2_16x4, zeros_16x4); + temp3_u16x4 = vcgt_s16(x3_16x4, zeros_16x4); + + /* compare with zero for thresholding */ + thresholdmask0_16x4 = vcgt_s16(vreinterpret_s16_u16(threshold0_16x4), xx0_16x4); + thresholdmask1_16x4 = vcgt_s16(vreinterpret_s16_u16(threshold1_16x4), xx1_16x4); + thresholdmask2_16x4 = vcgt_s16(vreinterpret_s16_u16(threshold2_16x4), xx2_16x4); + thresholdmask3_16x4 = vcgt_s16(vreinterpret_s16_u16(threshold3_16x4), xx3_16x4); + + /* Multiply abs values obtained with scaling matrix */ + tx0_32x4 = vmull_s16(xx0_16x4, vreinterpret_s16_u16(scale_mat0_16x4)); + tx1_32x4 = vmull_s16(xx1_16x4, vreinterpret_s16_u16(scale_mat1_16x4)); + tx2_32x4 = vmull_s16(xx2_16x4, vreinterpret_s16_u16(scale_mat2_16x4)); + tx3_32x4 = vmull_s16(xx3_16x4, vreinterpret_s16_u16(scale_mat3_16x4)); + + tx0_32x4 = vaddq_s32(tx0_32x4, rnd_factor_32x4); + tx1_32x4 = vaddq_s32(tx1_32x4, rnd_factor_32x4); + tx2_32x4 = vaddq_s32(tx2_32x4, rnd_factor_32x4); + tx3_32x4 = vaddq_s32(tx3_32x4, rnd_factor_32x4); + + qbits_32x4 = vnegq_s32(qbits_32x4); + + tx0_32x4 = vshlq_s32(tx0_32x4, qbits_32x4); + tx1_32x4 = vshlq_s32(tx1_32x4, qbits_32x4); + tx2_32x4 = vshlq_s32(tx2_32x4, qbits_32x4); + tx3_32x4 = vshlq_s32(tx3_32x4, qbits_32x4); + + /* Convertion to 16 bits signed */ + temp0_16x4 = vmovn_s32(tx0_32x4); + temp1_16x4 = vmovn_s32(tx1_32x4); + temp2_16x4 = vmovn_s32(tx2_32x4); + temp3_16x4 = vmovn_s32(tx3_32x4); + + x0_16x4 = vneg_s16(temp0_16x4); + x1_16x4 = vneg_s16(temp1_16x4); + x2_16x4 = vneg_s16(temp2_16x4); + x3_16x4 = vneg_s16(temp3_16x4); + + /* Restore sign */ + x0_16x4 = vbsl_s16(temp0_u16x4, temp0_16x4, x0_16x4); + x1_16x4 = vbsl_s16(temp1_u16x4, temp1_16x4, x1_16x4); + x2_16x4 = vbsl_s16(temp2_u16x4, temp2_16x4, x2_16x4); + x3_16x4 = vbsl_s16(temp3_u16x4, temp3_16x4, x3_16x4); + + xx0_16x4 = vbsl_s16(thresholdmask0_16x4, zeros_16x4, x0_16x4); + xx1_16x4 = vbsl_s16(thresholdmask1_16x4, zeros_16x4, x1_16x4); + xx2_16x4 = vbsl_s16(thresholdmask2_16x4, zeros_16x4, x2_16x4); + xx3_16x4 = vbsl_s16(thresholdmask3_16x4, zeros_16x4, x3_16x4); + + /* Store Quantized outputs */ + vst1_s16(&pi2_out[0 * i4_out_stride], xx0_16x4); + vst1_s16(&pi2_out[1 * i4_out_stride], xx1_16x4); + vst1_s16(&pi2_out[2 * i4_out_stride], xx2_16x4); + vst1_s16(&pi2_out[3 * i4_out_stride], xx3_16x4); + + /* NNZ calculation */ + + temp0_u16x4 = vceq_s16(xx0_16x4, zeros_16x4); + temp1_u16x4 = vceq_s16(xx1_16x4, zeros_16x4); + temp2_u16x4 = vceq_s16(xx2_16x4, zeros_16x4); + temp3_u16x4 = vceq_s16(xx3_16x4, zeros_16x4); + + temp0_u16x8 = vcombine_u16(temp0_u16x4, temp2_u16x4); + temp1_u16x8 = vcombine_u16(temp1_u16x4, temp3_u16x4); + + /* Convertion to 8 bit unsigned */ + temp0_u8x8 = vmovn_u16(temp0_u16x8); + temp1_u8x8 = vmovn_u16(temp1_u16x8); + + temp0_u8x8 = vshr_n_u8(temp0_u8x8, 7); + temp1_u8x8 = vshr_n_u8(temp1_u8x8, 7); + + temp0_u8x8 = vadd_u8(temp0_u8x8, temp1_u8x8); + temp0_u8x8 = vpadd_u8(temp0_u8x8, temp1_u8x8); + temp0_u8x8 = vpadd_u8(temp0_u8x8, temp1_u8x8); + temp0_u8x8 = vpadd_u8(temp0_u8x8, temp1_u8x8); + + *pu1_nnz = 16 - vget_lane_u8(temp0_u8x8, 0); +} + +void isvc_resi_trans_quant_chroma_4x4_neon(buffer_container_t *ps_src, buffer_container_t *ps_pred, + buffer_container_t *ps_out, + buffer_container_t *ps_upsampled_res, + resi_trans_quant_constants_t *ps_quant_constants, + UWORD8 *pu1_nnz, WORD16 *pi2_dc_out, + UWORD8 u1_use_upsampled_res) +{ + UWORD8 *pu1_src = (UWORD8 *) ps_src->pv_data; + UWORD8 *pu1_pred = (UWORD8 *) ps_pred->pv_data; + WORD16 *pi2_out = (WORD16 *) ps_out->pv_data; + WORD32 i4_src_stride = ps_src->i4_data_stride; + WORD32 i4_pred_stride = ps_pred->i4_data_stride; + WORD32 i4_out_stride = ps_out->i4_data_stride; + const UWORD16 *pu2_scale_matrix = ps_quant_constants->pu2_scale_matrix; + const UWORD16 *pu2_threshold_matrix = ps_quant_constants->pu2_threshold_matrix; + UWORD32 u4_qbits = ps_quant_constants->u4_qbits; + UWORD32 u4_round_factor = ps_quant_constants->u4_round_factor; + + uint8x8_t src0, src1, src2, src3; + uint8x8_t pred0, pred1, pred2, pred3; + uint8x8x2_t tmp0, tmp1, tmp2, tmp3; + uint8x8_t temp0_u8x8, temp1_u8x8; + uint16x4_t temp0_u16x4, temp1_u16x4, temp2_u16x4, temp3_u16x4; + uint16x4_t scale_mat0_16x4, scale_mat1_16x4, scale_mat2_16x4, scale_mat3_16x4; + uint16x4_t threshold0_16x4, threshold1_16x4, threshold2_16x4, threshold3_16x4; + uint16x4_t thresholdmask0_16x4, thresholdmask1_16x4, thresholdmask2_16x4, thresholdmask3_16x4; + int16x4_t res0_16x4, res1_16x4, res2_16x4, res3_16x4; + int16x4_t x0_16x4, x1_16x4, x2_16x4, x3_16x4; + int16x4_t xx0_16x4, xx1_16x4, xx2_16x4, xx3_16x4; + int16x4x2_t xx0_16x4x2, xx1_16x4x2; + int16x4_t temp0_16x4, temp1_16x4, temp2_16x4, temp3_16x4; + uint16x8_t res0_16x8, res1_16x8, res2_16x8, res3_16x8; + uint16x8_t temp0_u16x8, temp1_u16x8; + int32x2x2_t x0_32x2x2, x1_32x2x2; + int32x4_t tx0_32x4, tx1_32x4, tx2_32x4, tx3_32x4; + + int32x4_t rnd_factor_32x4 = vdupq_n_s32(u4_round_factor); + int32x4_t qbits_32x4 = vdupq_n_s32(u4_qbits); + int16x4_t zeros_16x4 = vdup_n_s16(0); + + UNUSED(ps_upsampled_res); + UNUSED(u1_use_upsampled_res); + + threshold0_16x4 = vld1_u16(pu2_threshold_matrix); + threshold1_16x4 = vld1_u16(pu2_threshold_matrix + 4); + threshold2_16x4 = vld1_u16(pu2_threshold_matrix + 8); + threshold3_16x4 = vld1_u16(pu2_threshold_matrix + 12); + + scale_mat0_16x4 = vld1_u16(pu2_scale_matrix); + scale_mat1_16x4 = vld1_u16(pu2_scale_matrix + 4); + scale_mat2_16x4 = vld1_u16(pu2_scale_matrix + 8); + scale_mat3_16x4 = vld1_u16(pu2_scale_matrix + 12); + + src0 = vld1_u8(&pu1_src[0 * i4_src_stride]); + src1 = vld1_u8(&pu1_src[1 * i4_src_stride]); + src2 = vld1_u8(&pu1_src[2 * i4_src_stride]); + src3 = vld1_u8(&pu1_src[3 * i4_src_stride]); + + /* deinterleaving source buffer */ + tmp0 = vuzp_u8(src0, src0); + tmp1 = vuzp_u8(src1, src1); + tmp2 = vuzp_u8(src2, src2); + tmp3 = vuzp_u8(src3, src3); + + src0 = tmp0.val[0]; + src1 = tmp1.val[0]; + src2 = tmp2.val[0]; + src3 = tmp3.val[0]; + + pred0 = vld1_u8(&pu1_pred[0 * i4_pred_stride]); + pred1 = vld1_u8(&pu1_pred[1 * i4_pred_stride]); + pred2 = vld1_u8(&pu1_pred[2 * i4_pred_stride]); + pred3 = vld1_u8(&pu1_pred[3 * i4_pred_stride]); + + /* deinterleaving pred buffer */ + tmp0 = vuzp_u8(pred0, pred0); + tmp1 = vuzp_u8(pred1, pred1); + tmp2 = vuzp_u8(pred2, pred2); + tmp3 = vuzp_u8(pred3, pred3); + + pred0 = tmp0.val[0]; + pred1 = tmp1.val[0]; + pred2 = tmp2.val[0]; + pred3 = tmp3.val[0]; + + /* calculate res = src - pred */ + res0_16x8 = vsubl_u8(src0, pred0); + res1_16x8 = vsubl_u8(src1, pred1); + res2_16x8 = vsubl_u8(src2, pred2); + res3_16x8 = vsubl_u8(src3, pred3); + + res0_16x4 = vreinterpret_s16_u16(vget_low_u16(res0_16x8)); + res1_16x4 = vreinterpret_s16_u16(vget_low_u16(res1_16x8)); + res2_16x4 = vreinterpret_s16_u16(vget_low_u16(res2_16x8)); + res3_16x4 = vreinterpret_s16_u16(vget_low_u16(res3_16x8)); + + /* Perform Forward transform */ + /*-------------------------------------------------------------*/ + /* DCT [ Horizontal transformation ] */ + /*-------------------------------------------------------------*/ + /* Matrix transpose */ + /* + * a0 a1 a2 a3 + * b0 b1 b2 b3 + * c0 c1 c2 c3 + * d0 d1 d2 d3 + */ + + xx0_16x4x2 = vtrn_s16(res0_16x4, res1_16x4); + xx1_16x4x2 = vtrn_s16(res2_16x4, res3_16x4); + x0_32x2x2 = + vtrn_s32(vreinterpret_s32_s16(xx0_16x4x2.val[0]), vreinterpret_s32_s16(xx1_16x4x2.val[0])); + x1_32x2x2 = + vtrn_s32(vreinterpret_s32_s16(xx0_16x4x2.val[1]), vreinterpret_s32_s16(xx1_16x4x2.val[1])); + + x0_16x4 = vreinterpret_s16_s32(x0_32x2x2.val[0]); + x1_16x4 = vreinterpret_s16_s32(x1_32x2x2.val[0]); + x2_16x4 = vreinterpret_s16_s32(x0_32x2x2.val[1]); + x3_16x4 = vreinterpret_s16_s32(x1_32x2x2.val[1]); + + xx0_16x4 = vadd_s16(x0_16x4, x3_16x4); + xx1_16x4 = vadd_s16(x1_16x4, x2_16x4); + xx2_16x4 = vsub_s16(x1_16x4, x2_16x4); + xx3_16x4 = vsub_s16(x0_16x4, x3_16x4); + + x0_16x4 = vadd_s16(xx0_16x4, xx1_16x4); + temp0_16x4 = vshl_n_s16(xx3_16x4, 1); + x1_16x4 = vadd_s16(xx2_16x4, temp0_16x4); + + x2_16x4 = vsub_s16(xx0_16x4, xx1_16x4); + temp0_16x4 = vshl_n_s16(xx2_16x4, 1); + x3_16x4 = vsub_s16(xx3_16x4, temp0_16x4); + + /* Matrix transpose */ + /* + * a0 b0 c0 d0 + * a1 b1 c1 d1 + * a2 b2 c2 d2 + * a3 b3 c3 d3 + */ + + xx0_16x4x2 = vtrn_s16(x0_16x4, x1_16x4); + xx1_16x4x2 = vtrn_s16(x2_16x4, x3_16x4); + x0_32x2x2 = + vtrn_s32(vreinterpret_s32_s16(xx0_16x4x2.val[0]), vreinterpret_s32_s16(xx1_16x4x2.val[0])); + x1_32x2x2 = + vtrn_s32(vreinterpret_s32_s16(xx0_16x4x2.val[1]), vreinterpret_s32_s16(xx1_16x4x2.val[1])); + + x0_16x4 = vreinterpret_s16_s32(x0_32x2x2.val[0]); + x1_16x4 = vreinterpret_s16_s32(x1_32x2x2.val[0]); + x2_16x4 = vreinterpret_s16_s32(x0_32x2x2.val[1]); + x3_16x4 = vreinterpret_s16_s32(x1_32x2x2.val[1]); + + /* Vertical Transformation */ + + xx0_16x4 = vadd_s16(x0_16x4, x3_16x4); + xx1_16x4 = vadd_s16(x1_16x4, x2_16x4); + xx2_16x4 = vsub_s16(x1_16x4, x2_16x4); + xx3_16x4 = vsub_s16(x0_16x4, x3_16x4); + + x0_16x4 = vadd_s16(xx0_16x4, xx1_16x4); + temp0_16x4 = vshl_n_s16(xx3_16x4, 1); + x1_16x4 = vadd_s16(temp0_16x4, xx2_16x4); + + x2_16x4 = vsub_s16(xx0_16x4, xx1_16x4); + temp0_16x4 = vshl_n_s16(xx2_16x4, 1); + x3_16x4 = vsub_s16(xx3_16x4, temp0_16x4); + + /* get the first 16 bits from the register */ + *pi2_dc_out = vget_lane_s16(x0_16x4, 0); + + xx0_16x4 = vabs_s16(x0_16x4); + xx1_16x4 = vabs_s16(x1_16x4); + xx2_16x4 = vabs_s16(x2_16x4); + xx3_16x4 = vabs_s16(x3_16x4); + + /* compare with zero for getting sign */ + temp0_u16x4 = vcgt_s16(x0_16x4, zeros_16x4); + temp1_u16x4 = vcgt_s16(x1_16x4, zeros_16x4); + temp2_u16x4 = vcgt_s16(x2_16x4, zeros_16x4); + temp3_u16x4 = vcgt_s16(x3_16x4, zeros_16x4); + + /* compare with zero for thresholding */ + thresholdmask0_16x4 = vcgt_s16(vreinterpret_s16_u16(threshold0_16x4), xx0_16x4); + thresholdmask1_16x4 = vcgt_s16(vreinterpret_s16_u16(threshold1_16x4), xx1_16x4); + thresholdmask2_16x4 = vcgt_s16(vreinterpret_s16_u16(threshold2_16x4), xx2_16x4); + thresholdmask3_16x4 = vcgt_s16(vreinterpret_s16_u16(threshold3_16x4), xx3_16x4); + + /* Multiply abs values obtained with scaling matrix */ + tx0_32x4 = vmull_s16(xx0_16x4, vreinterpret_s16_u16(scale_mat0_16x4)); + tx1_32x4 = vmull_s16(xx1_16x4, vreinterpret_s16_u16(scale_mat1_16x4)); + tx2_32x4 = vmull_s16(xx2_16x4, vreinterpret_s16_u16(scale_mat2_16x4)); + tx3_32x4 = vmull_s16(xx3_16x4, vreinterpret_s16_u16(scale_mat3_16x4)); + + tx0_32x4 = vaddq_s32(tx0_32x4, rnd_factor_32x4); + tx1_32x4 = vaddq_s32(tx1_32x4, rnd_factor_32x4); + tx2_32x4 = vaddq_s32(tx2_32x4, rnd_factor_32x4); + tx3_32x4 = vaddq_s32(tx3_32x4, rnd_factor_32x4); + + qbits_32x4 = vnegq_s32(qbits_32x4); + + tx0_32x4 = vshlq_s32(tx0_32x4, qbits_32x4); + tx1_32x4 = vshlq_s32(tx1_32x4, qbits_32x4); + tx2_32x4 = vshlq_s32(tx2_32x4, qbits_32x4); + tx3_32x4 = vshlq_s32(tx3_32x4, qbits_32x4); + + /* Convertion to 16 bits signed */ + temp0_16x4 = vmovn_s32(tx0_32x4); + temp1_16x4 = vmovn_s32(tx1_32x4); + temp2_16x4 = vmovn_s32(tx2_32x4); + temp3_16x4 = vmovn_s32(tx3_32x4); + + x0_16x4 = vneg_s16(temp0_16x4); + x1_16x4 = vneg_s16(temp1_16x4); + x2_16x4 = vneg_s16(temp2_16x4); + x3_16x4 = vneg_s16(temp3_16x4); + + /* Restore sign */ + x0_16x4 = vbsl_s16(temp0_u16x4, temp0_16x4, x0_16x4); + x1_16x4 = vbsl_s16(temp1_u16x4, temp1_16x4, x1_16x4); + x2_16x4 = vbsl_s16(temp2_u16x4, temp2_16x4, x2_16x4); + x3_16x4 = vbsl_s16(temp3_u16x4, temp3_16x4, x3_16x4); + + /* Thresholding */ + xx0_16x4 = vbsl_s16(thresholdmask0_16x4, zeros_16x4, x0_16x4); + xx1_16x4 = vbsl_s16(thresholdmask1_16x4, zeros_16x4, x1_16x4); + xx2_16x4 = vbsl_s16(thresholdmask2_16x4, zeros_16x4, x2_16x4); + xx3_16x4 = vbsl_s16(thresholdmask3_16x4, zeros_16x4, x3_16x4); + + /* Store Quantized outputs */ + vst1_s16(&pi2_out[0 * i4_out_stride], xx0_16x4); + vst1_s16(&pi2_out[1 * i4_out_stride], xx1_16x4); + vst1_s16(&pi2_out[2 * i4_out_stride], xx2_16x4); + vst1_s16(&pi2_out[3 * i4_out_stride], xx3_16x4); + + /* NNZ calculation */ + + temp0_u16x4 = vceq_s16(xx0_16x4, zeros_16x4); + temp1_u16x4 = vceq_s16(xx1_16x4, zeros_16x4); + temp2_u16x4 = vceq_s16(xx2_16x4, zeros_16x4); + temp3_u16x4 = vceq_s16(xx3_16x4, zeros_16x4); + + temp0_u16x8 = vcombine_u16(temp0_u16x4, temp2_u16x4); + temp1_u16x8 = vcombine_u16(temp1_u16x4, temp3_u16x4); + + /* Convertion to 8 bit unsigned */ + temp0_u8x8 = vmovn_u16(temp0_u16x8); + temp1_u8x8 = vmovn_u16(temp1_u16x8); + + temp0_u8x8 = vshr_n_u8(temp0_u8x8, 7); + temp1_u8x8 = vshr_n_u8(temp1_u8x8, 7); + + temp0_u8x8 = vadd_u8(temp0_u8x8, temp1_u8x8); + temp0_u8x8 = vpadd_u8(temp0_u8x8, temp1_u8x8); + temp0_u8x8 = vpadd_u8(temp0_u8x8, temp1_u8x8); + temp0_u8x8 = vpadd_u8(temp0_u8x8, temp1_u8x8); + + *pu1_nnz = 16 - vget_lane_u8(temp0_u8x8, 0); +} + +void isvc_resi_trans_quant_chroma_4x4_with_residual_sub_neon( + buffer_container_t *ps_src, buffer_container_t *ps_pred, buffer_container_t *ps_out, + buffer_container_t *ps_upsampled_res, resi_trans_quant_constants_t *ps_quant_constants, + UWORD8 *pu1_nnz, WORD16 *pi2_dc_out, UWORD8 u1_use_upsampled_res) +{ + UWORD8 *pu1_src = (UWORD8 *) ps_src->pv_data; + UWORD8 *pu1_pred = (UWORD8 *) ps_pred->pv_data; + WORD16 *pi2_out = (WORD16 *) ps_out->pv_data; + WORD16 *pi2_upsampled_res = ps_upsampled_res ? (WORD16 *) ps_upsampled_res->pv_data : NULL; + WORD32 i4_src_stride = ps_src->i4_data_stride; + WORD32 i4_pred_stride = ps_pred->i4_data_stride; + WORD32 i4_out_stride = ps_out->i4_data_stride; + WORD32 i4_upsampled_res_stride = ps_upsampled_res ? ps_upsampled_res->i4_data_stride : 0; + const UWORD16 *pu2_scale_matrix = ps_quant_constants->pu2_scale_matrix; + const UWORD16 *pu2_threshold_matrix = ps_quant_constants->pu2_threshold_matrix; + UWORD32 u4_qbits = ps_quant_constants->u4_qbits; + UWORD32 u4_round_factor = ps_quant_constants->u4_round_factor; + + uint8x8_t src0, src1, src2, src3; + uint8x8_t pred0, pred1, pred2, pred3; + uint8x8x2_t tmp0, tmp1, tmp2, tmp3; + uint8x8_t temp0_u8x8, temp1_u8x8; + uint16x4_t temp0_u16x4, temp1_u16x4, temp2_u16x4, temp3_u16x4; + uint16x4_t scale_mat0_16x4, scale_mat1_16x4, scale_mat2_16x4, scale_mat3_16x4; + uint16x4_t threshold0_16x4, threshold1_16x4, threshold2_16x4, threshold3_16x4; + uint16x4_t thresholdmask0_16x4, thresholdmask1_16x4, thresholdmask2_16x4, thresholdmask3_16x4; + int16x4_t upres0_16x4, upres1_16x4, upres2_16x4, upres3_16x4; + int16x4_t res0_16x4, res1_16x4, res2_16x4, res3_16x4; + int16x4_t x0_16x4, x1_16x4, x2_16x4, x3_16x4; + int16x4_t xx0_16x4, xx1_16x4, xx2_16x4, xx3_16x4; + int16x4x2_t xx0_16x4x2, xx1_16x4x2; + int16x4_t temp0_16x4, temp1_16x4, temp2_16x4, temp3_16x4; + uint16x8_t res0_16x8, res1_16x8, res2_16x8, res3_16x8; + uint16x8_t temp0_u16x8, temp1_u16x8; + int32x2x2_t x0_32x2x2, x1_32x2x2; + int32x4_t tx0_32x4, tx1_32x4, tx2_32x4, tx3_32x4; + + int32x4_t rnd_factor_32x4 = vdupq_n_s32(u4_round_factor); + int32x4_t qbits_32x4 = vdupq_n_s32(u4_qbits); + int16x4_t zeros_16x4 = vdup_n_s16(0); + int16x4_t pos_255_16x4 = vdup_n_s16(((WORD16) UINT8_MAX)); + int16x4_t neg_255_16x4 = vdup_n_s16(-((WORD16) UINT8_MAX)); + + UNUSED(u1_use_upsampled_res); + + threshold0_16x4 = vld1_u16(pu2_threshold_matrix); + threshold1_16x4 = vld1_u16(pu2_threshold_matrix + 4); + threshold2_16x4 = vld1_u16(pu2_threshold_matrix + 8); + threshold3_16x4 = vld1_u16(pu2_threshold_matrix + 12); + + scale_mat0_16x4 = vld1_u16(pu2_scale_matrix); + scale_mat1_16x4 = vld1_u16(pu2_scale_matrix + 4); + scale_mat2_16x4 = vld1_u16(pu2_scale_matrix + 8); + scale_mat3_16x4 = vld1_u16(pu2_scale_matrix + 12); + + src0 = vld1_u8(&pu1_src[0 * i4_src_stride]); + src1 = vld1_u8(&pu1_src[1 * i4_src_stride]); + src2 = vld1_u8(&pu1_src[2 * i4_src_stride]); + src3 = vld1_u8(&pu1_src[3 * i4_src_stride]); + + /* deinterleaving source buffer */ + tmp0 = vuzp_u8(src0, src0); + tmp1 = vuzp_u8(src1, src1); + tmp2 = vuzp_u8(src2, src2); + tmp3 = vuzp_u8(src3, src3); + + src0 = tmp0.val[0]; + src1 = tmp1.val[0]; + src2 = tmp2.val[0]; + src3 = tmp3.val[0]; + + pred0 = vld1_u8(&pu1_pred[0 * i4_pred_stride]); + pred1 = vld1_u8(&pu1_pred[1 * i4_pred_stride]); + pred2 = vld1_u8(&pu1_pred[2 * i4_pred_stride]); + pred3 = vld1_u8(&pu1_pred[3 * i4_pred_stride]); + + /* deinterleaving pred buffer */ + tmp0 = vuzp_u8(pred0, pred0); + tmp1 = vuzp_u8(pred1, pred1); + tmp2 = vuzp_u8(pred2, pred2); + tmp3 = vuzp_u8(pred3, pred3); + + pred0 = tmp0.val[0]; + pred1 = tmp1.val[0]; + pred2 = tmp2.val[0]; + pred3 = tmp3.val[0]; + + /* calculate res = src - pred */ + res0_16x8 = vsubl_u8(src0, pred0); + res1_16x8 = vsubl_u8(src1, pred1); + res2_16x8 = vsubl_u8(src2, pred2); + res3_16x8 = vsubl_u8(src3, pred3); + + res0_16x4 = vreinterpret_s16_u16(vget_low_u16(res0_16x8)); + res1_16x4 = vreinterpret_s16_u16(vget_low_u16(res1_16x8)); + res2_16x4 = vreinterpret_s16_u16(vget_low_u16(res2_16x8)); + res3_16x4 = vreinterpret_s16_u16(vget_low_u16(res3_16x8)); + + /* Load upsampled res */ + upres0_16x4 = vld1_s16(&pi2_upsampled_res[0 * i4_upsampled_res_stride]); + upres1_16x4 = vld1_s16(&pi2_upsampled_res[1 * i4_upsampled_res_stride]); + upres2_16x4 = vld1_s16(&pi2_upsampled_res[2 * i4_upsampled_res_stride]); + upres3_16x4 = vld1_s16(&pi2_upsampled_res[3 * i4_upsampled_res_stride]); + + /* subtract upsampled res from (src - pred) to obtain final res */ + res0_16x4 = vsub_s16(res0_16x4, upres0_16x4); + res1_16x4 = vsub_s16(res1_16x4, upres1_16x4); + res2_16x4 = vsub_s16(res2_16x4, upres2_16x4); + res3_16x4 = vsub_s16(res3_16x4, upres3_16x4); + + /* Saturate all values < -255 to -255 and retain the rest as it is */ + res0_16x4 = vmax_s16(res0_16x4, neg_255_16x4); + res1_16x4 = vmax_s16(res1_16x4, neg_255_16x4); + res2_16x4 = vmax_s16(res2_16x4, neg_255_16x4); + res3_16x4 = vmax_s16(res3_16x4, neg_255_16x4); + + /* Saturate all values > 255 to 255 and retain the rest as it is */ + res0_16x4 = vmin_s16(res0_16x4, pos_255_16x4); + res1_16x4 = vmin_s16(res1_16x4, pos_255_16x4); + res2_16x4 = vmin_s16(res2_16x4, pos_255_16x4); + res3_16x4 = vmin_s16(res3_16x4, pos_255_16x4); + + /* Perform Forward transform */ + /*-------------------------------------------------------------*/ + /* DCT [ Horizontal transformation ] */ + /*-------------------------------------------------------------*/ + /* Matrix transpose */ + /* + * a0 a1 a2 a3 + * b0 b1 b2 b3 + * c0 c1 c2 c3 + * d0 d1 d2 d3 + */ + + xx0_16x4x2 = vtrn_s16(res0_16x4, res1_16x4); + xx1_16x4x2 = vtrn_s16(res2_16x4, res3_16x4); + x0_32x2x2 = + vtrn_s32(vreinterpret_s32_s16(xx0_16x4x2.val[0]), vreinterpret_s32_s16(xx1_16x4x2.val[0])); + x1_32x2x2 = + vtrn_s32(vreinterpret_s32_s16(xx0_16x4x2.val[1]), vreinterpret_s32_s16(xx1_16x4x2.val[1])); + + x0_16x4 = vreinterpret_s16_s32(x0_32x2x2.val[0]); + x1_16x4 = vreinterpret_s16_s32(x1_32x2x2.val[0]); + x2_16x4 = vreinterpret_s16_s32(x0_32x2x2.val[1]); + x3_16x4 = vreinterpret_s16_s32(x1_32x2x2.val[1]); + + xx0_16x4 = vadd_s16(x0_16x4, x3_16x4); + xx1_16x4 = vadd_s16(x1_16x4, x2_16x4); + xx2_16x4 = vsub_s16(x1_16x4, x2_16x4); + xx3_16x4 = vsub_s16(x0_16x4, x3_16x4); + + x0_16x4 = vadd_s16(xx0_16x4, xx1_16x4); + temp0_16x4 = vshl_n_s16(xx3_16x4, 1); + x1_16x4 = vadd_s16(xx2_16x4, temp0_16x4); + + x2_16x4 = vsub_s16(xx0_16x4, xx1_16x4); + temp0_16x4 = vshl_n_s16(xx2_16x4, 1); + x3_16x4 = vsub_s16(xx3_16x4, temp0_16x4); + + /* Matrix transpose */ + /* + * a0 b0 c0 d0 + * a1 b1 c1 d1 + * a2 b2 c2 d2 + * a3 b3 c3 d3 + */ + + xx0_16x4x2 = vtrn_s16(x0_16x4, x1_16x4); + xx1_16x4x2 = vtrn_s16(x2_16x4, x3_16x4); + x0_32x2x2 = + vtrn_s32(vreinterpret_s32_s16(xx0_16x4x2.val[0]), vreinterpret_s32_s16(xx1_16x4x2.val[0])); + x1_32x2x2 = + vtrn_s32(vreinterpret_s32_s16(xx0_16x4x2.val[1]), vreinterpret_s32_s16(xx1_16x4x2.val[1])); + + x0_16x4 = vreinterpret_s16_s32(x0_32x2x2.val[0]); + x1_16x4 = vreinterpret_s16_s32(x1_32x2x2.val[0]); + x2_16x4 = vreinterpret_s16_s32(x0_32x2x2.val[1]); + x3_16x4 = vreinterpret_s16_s32(x1_32x2x2.val[1]); + + /* Vertical Transformation */ + + xx0_16x4 = vadd_s16(x0_16x4, x3_16x4); + xx1_16x4 = vadd_s16(x1_16x4, x2_16x4); + xx2_16x4 = vsub_s16(x1_16x4, x2_16x4); + xx3_16x4 = vsub_s16(x0_16x4, x3_16x4); + + x0_16x4 = vadd_s16(xx0_16x4, xx1_16x4); + temp0_16x4 = vshl_n_s16(xx3_16x4, 1); + x1_16x4 = vadd_s16(temp0_16x4, xx2_16x4); + + x2_16x4 = vsub_s16(xx0_16x4, xx1_16x4); + temp0_16x4 = vshl_n_s16(xx2_16x4, 1); + x3_16x4 = vsub_s16(xx3_16x4, temp0_16x4); + + /* get the first 16 bits from the register */ + *pi2_dc_out = vget_lane_s16(x0_16x4, 0); + + xx0_16x4 = vabs_s16(x0_16x4); + xx1_16x4 = vabs_s16(x1_16x4); + xx2_16x4 = vabs_s16(x2_16x4); + xx3_16x4 = vabs_s16(x3_16x4); + + /* compare with zero for getting sign */ + temp0_u16x4 = vcgt_s16(x0_16x4, zeros_16x4); + temp1_u16x4 = vcgt_s16(x1_16x4, zeros_16x4); + temp2_u16x4 = vcgt_s16(x2_16x4, zeros_16x4); + temp3_u16x4 = vcgt_s16(x3_16x4, zeros_16x4); + + thresholdmask0_16x4 = vcgt_s16(vreinterpret_s16_u16(threshold0_16x4), xx0_16x4); + thresholdmask1_16x4 = vcgt_s16(vreinterpret_s16_u16(threshold1_16x4), xx1_16x4); + thresholdmask2_16x4 = vcgt_s16(vreinterpret_s16_u16(threshold2_16x4), xx2_16x4); + thresholdmask3_16x4 = vcgt_s16(vreinterpret_s16_u16(threshold3_16x4), xx3_16x4); + + /* Multiply abs values obtained with scaling matrix */ + tx0_32x4 = vmull_s16(xx0_16x4, vreinterpret_s16_u16(scale_mat0_16x4)); + tx1_32x4 = vmull_s16(xx1_16x4, vreinterpret_s16_u16(scale_mat1_16x4)); + tx2_32x4 = vmull_s16(xx2_16x4, vreinterpret_s16_u16(scale_mat2_16x4)); + tx3_32x4 = vmull_s16(xx3_16x4, vreinterpret_s16_u16(scale_mat3_16x4)); + + tx0_32x4 = vaddq_s32(tx0_32x4, rnd_factor_32x4); + tx1_32x4 = vaddq_s32(tx1_32x4, rnd_factor_32x4); + tx2_32x4 = vaddq_s32(tx2_32x4, rnd_factor_32x4); + tx3_32x4 = vaddq_s32(tx3_32x4, rnd_factor_32x4); + + qbits_32x4 = vnegq_s32(qbits_32x4); + + tx0_32x4 = vshlq_s32(tx0_32x4, qbits_32x4); + tx1_32x4 = vshlq_s32(tx1_32x4, qbits_32x4); + tx2_32x4 = vshlq_s32(tx2_32x4, qbits_32x4); + tx3_32x4 = vshlq_s32(tx3_32x4, qbits_32x4); + + /* Convertion to 16 bits signed */ + temp0_16x4 = vmovn_s32(tx0_32x4); + temp1_16x4 = vmovn_s32(tx1_32x4); + temp2_16x4 = vmovn_s32(tx2_32x4); + temp3_16x4 = vmovn_s32(tx3_32x4); + + x0_16x4 = vneg_s16(temp0_16x4); + x1_16x4 = vneg_s16(temp1_16x4); + x2_16x4 = vneg_s16(temp2_16x4); + x3_16x4 = vneg_s16(temp3_16x4); + + /* Restore sign */ + x0_16x4 = vbsl_s16(temp0_u16x4, temp0_16x4, x0_16x4); + x1_16x4 = vbsl_s16(temp1_u16x4, temp1_16x4, x1_16x4); + x2_16x4 = vbsl_s16(temp2_u16x4, temp2_16x4, x2_16x4); + x3_16x4 = vbsl_s16(temp3_u16x4, temp3_16x4, x3_16x4); + + xx0_16x4 = vbsl_s16(thresholdmask0_16x4, zeros_16x4, x0_16x4); + xx1_16x4 = vbsl_s16(thresholdmask1_16x4, zeros_16x4, x1_16x4); + xx2_16x4 = vbsl_s16(thresholdmask2_16x4, zeros_16x4, x2_16x4); + xx3_16x4 = vbsl_s16(thresholdmask3_16x4, zeros_16x4, x3_16x4); + + /* Store Quantized outputs */ + vst1_s16(&pi2_out[0 * i4_out_stride], xx0_16x4); + vst1_s16(&pi2_out[1 * i4_out_stride], xx1_16x4); + vst1_s16(&pi2_out[2 * i4_out_stride], xx2_16x4); + vst1_s16(&pi2_out[3 * i4_out_stride], xx3_16x4); + + /* NNZ calculation */ + + temp0_u16x4 = vceq_s16(xx0_16x4, zeros_16x4); + temp1_u16x4 = vceq_s16(xx1_16x4, zeros_16x4); + temp2_u16x4 = vceq_s16(xx2_16x4, zeros_16x4); + temp3_u16x4 = vceq_s16(xx3_16x4, zeros_16x4); + + temp0_u16x8 = vcombine_u16(temp0_u16x4, temp2_u16x4); + temp1_u16x8 = vcombine_u16(temp1_u16x4, temp3_u16x4); + + /* Convertion to 8 bit unsigned */ + temp0_u8x8 = vmovn_u16(temp0_u16x8); + temp1_u8x8 = vmovn_u16(temp1_u16x8); + + temp0_u8x8 = vshr_n_u8(temp0_u8x8, 7); + temp1_u8x8 = vshr_n_u8(temp1_u8x8, 7); + + temp0_u8x8 = vadd_u8(temp0_u8x8, temp1_u8x8); + temp0_u8x8 = vpadd_u8(temp0_u8x8, temp1_u8x8); + temp0_u8x8 = vpadd_u8(temp0_u8x8, temp1_u8x8); + temp0_u8x8 = vpadd_u8(temp0_u8x8, temp1_u8x8); + + *pu1_nnz = 16 - vget_lane_u8(temp0_u8x8, 0); +} diff --git a/common/ih264_cabac_tables.h b/common/ih264_cabac_tables.h index dd2fd35..5cddc45 100644 --- a/common/ih264_cabac_tables.h +++ b/common/ih264_cabac_tables.h @@ -141,11 +141,16 @@ typedef enum LAST_SIGNIFICANT_COEFF_FLAG_8X8_FRAME = 417, COEFF_ABS_LEVEL_MINUS1_8X8 = 426, SIGNIFICANT_COEFF_FLAG_8X8_FIELD = 436, - LAST_SIGNIFICANT_COEFF_FLAG_8X8_FIELD = 451 + LAST_SIGNIFICANT_COEFF_FLAG_8X8_FIELD = 451, + + /* SVC related CABAC offsets */ + BASE_MODE_FLAG = 460, + MOTION_PREDICTION_FLAG_L0 = 463, + MOTION_PREDICTION_FLAG_L1 = 464, + RESIDUAL_PREDICTION_FLAG = 465, } cabac_table_num_t; - /** ****************************************************************************** * @enum ctxIdxOffset diff --git a/common/ih264_defs.h b/common/ih264_defs.h index d9fea26..e3c41d0 100644 --- a/common/ih264_defs.h +++ b/common/ih264_defs.h @@ -135,6 +135,9 @@ enum ISLICE = 2, SPSLICE = 3, SISLICE = 4, + EPSLICE = 5, + EBSLICE = 6, + EISLICE = 7, MAXSLICE_TYPE, }; @@ -144,27 +147,28 @@ enum * @brief Defines the set of possible nal unit types ****************************************************************************** */ -enum +typedef enum NAL_UNIT_TYPE_T { - NAL_UNSPEC_0 = 0, - NAL_SLICE_NON_IDR = 1, - NAL_SLICE_DPA = 2, - NAL_SLICE_DPB = 3, - NAL_SLICE_DPC = 4, - NAL_SLICE_IDR = 5, - NAL_SEI = 6, - NAL_SPS = 7, - NAL_PPS = 8, - NAL_AUD = 9, - NAL_EOSEQ = 10, - NAL_EOSTR = 11, - NAL_FILLER = 12, - NAL_SPSE = 13, - NAL_RES_18 = 14, - NAL_AUX_PIC = 19, - NAL_RES_23 = 20, - NAL_UNSPEC_31 = 24, -}; + NAL_UNSPEC_0 = 0, + NAL_SLICE_NON_IDR = 1, + NAL_SLICE_DPA = 2, + NAL_SLICE_DPB = 3, + NAL_SLICE_DPC = 4, + NAL_SLICE_IDR = 5, + NAL_SEI = 6, + NAL_SPS = 7, + NAL_PPS = 8, + NAL_AUD = 9, + NAL_EOSEQ = 10, + NAL_EOSTR = 11, + NAL_FILLER = 12, + NAL_SPSE = 13, + NAL_PREFIX = 14, + NAL_SUBSET_SPS = 15, + NAL_AUX_PIC = 19, + NAL_CODED_SLICE_EXTENSION = 20, + NAL_UNSPEC_31 = 24, +} NAL_UNIT_TYPE_T; /** ****************************************************************************** @@ -261,27 +265,29 @@ typedef enum */ typedef enum { - I16x16 = 0, - I4x4 = 1, - I8x8 = 2, - P16x16 = 3, - P16x8 = 4, - P8x16 = 5, - P8x8 = 6, - PSKIP = 7, - IPCM = 8, - B16x16 = 9, - BSKIP = 10, - BDIRECT = 11, + INVALID_MB_TYPE = -1, + I16x16 = 0, + I4x4 = 1, + I8x8 = 2, + P16x16 = 3, + P16x8 = 4, + P8x16 = 5, + P8x8 = 6, + PSKIP = 7, + IPCM = 8, + B16x16 = 9, + BSKIP = 10, + BDIRECT = 11, + BASE_MODE = 12, MAX_MBTYPES, -}MBTYPES_T; +} MBTYPES_T; /* Pred Modes */ enum { BLOCK_TYPE_INTER_MB = 0, BLOCK_TYPE_INTRA_MB = 1, - BLOCK_TYPE_SKIP_MB = 2 + BLOCK_TYPE_SKIP_MB = 2 }; /* Prediction list */ @@ -521,9 +527,16 @@ typedef enum /* Number of max TU in a MB row */ #define MAX_TU_IN_MB_ROW ((MB_SIZE / MIN_TU_SIZE)) +#define MIN_TU_IN_MB_ROW ((MB_SIZE / MAX_TU_SIZE)) + /* Number of max PU in a CTb row */ #define MAX_PU_IN_MB_ROW ((MB_SIZE / MIN_PU_SIZE)) +#define MAX_TU_IN_MB_COL MAX_TU_IN_MB_ROW + +#define MIN_TU_IN_MB_COL MIN_TU_IN_MB_ROW + +#define MAX_PU_IN_MB_COL MAX_PU_IN_MB_ROW /* Number of max PU in a MB */ /*****************************************************************************/ @@ -537,7 +550,11 @@ typedef enum #define MAX_TU_IN_MB ((MB_SIZE / MIN_TU_SIZE) * \ (MB_SIZE / MIN_TU_SIZE)) +#define MIN_TU_IN_MB (MIN_TU_IN_MB_ROW * MIN_TU_IN_MB_COL) +#define NUM_4x4_IN_8x8 4 + +#define NUM_COEFFS_IN_MIN_TU (MIN_TU_SIZE * MIN_TU_SIZE) /** * Maximum transform depths diff --git a/common/ih264_size_defs.h b/common/ih264_size_defs.h index e2a8b76..4555647 100644 --- a/common/ih264_size_defs.h +++ b/common/ih264_size_defs.h @@ -44,6 +44,8 @@ /*Width of a 4x4 block*/ #define SUB_BLK_WIDTH_4x4 4 +#define SUB_BLK_HEIGHT_4x4 4 + /*Width of an 8x8 block*/ #define SUB_BLK_WIDTH_8x8 8 diff --git a/common/svc/isvc_cabac_tables.c b/common/svc/isvc_cabac_tables.c new file mode 100644 index 0000000..6893fc2 --- /dev/null +++ b/common/svc/isvc_cabac_tables.c @@ -0,0 +1,6542 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** +****************************************************************************** +* @file +* isvc_cabac_tables.c +* +* @brief +* This file contains H264 cabac tables for init contexts, rlps and +* cabac state transitions +* +* @author +* Ittiam +* +* @par List of Tables +* - gau4_isvc_cabac_table[][] +* - gau1_isvc_cabac_ctxt_init_table[][][] +* +****************************************************************************** +*/ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* User include files */ +#include "ih264_typedefs.h" +#include "isvc_cabac_tables.h" + +/*****************************************************************************/ +/* CABAC TABLES */ +/*****************************************************************************/ +/*combined table :guc_RTAB,NextStateLPS,NextStateMPS + input(combined_state): + bits 0-5: state + bits 6:mps + output + bits 0-7:rangeTabLPS + bits 8-14 :combined_next_state_if_mps + bits 15 -21:combined_next_state_if_lps + + */ +const UWORD32 (*gau4_isvc_cabac_table)[4] = gau4_ih264_cabac_table; + +/*****************************************************************************/ +/* Global Variable Initialization */ +/*****************************************************************************/ +/* This table has been derived using equation 9.5 and table 9.11 from the */ +/* spec. The formulae have been reproduced below - */ +/* + preCtxState = Clip3( 1, 126, ( ( m * Clip3( 0, 51, SliceQPY ) ) >> 4 ) + n ) + if( preCtxState <= 63 ) { + pStateIdx = 63 - preCtxState + valMPS = 0 + } else { + pStateIdx = preCtxState - 64 + valMPS = 1 + } + gau1_isvc_cabac_ctxt_init_table[I|P][SliceQPY][Idx] = (pStateIdx + (valMPS << 7)); +*/ +/* 'm', and 'n' are listed for each index in table 9.11 */ +const UWORD8 + gau1_isvc_cabac_ctxt_init_table[NUM_CAB_INIT_IDC_PLUS_ONE][QP_RANGE][NUM_SVC_CABAC_CTXTS] = + + { + + { + + { + + 62, 9, 74, 62, 9, 74, 126, 104, 10, 9, 12, 30, 61, 62, 54, 14, 118, + 6, 78, 65, 1, 14, 73, 13, 64, 20, 62, 67, 90, 104, 126, 104, 67, 78, + 65, 1, 86, 95, 2, 18, 69, 81, 96, 8, 67, 86, 88, 5, 76, 94, 9, + 69, 81, 88, 67, 74, 74, 80, 72, 5, 22, 0, 0, 0, 83, 86, 97, 72, + 22, 1, 18, 78, 96, 126, 98, 101, 67, 82, 94, 83, 110, 91, 102, 93, 126, + 92, 89, 96, 108, 17, 65, 6, 93, 74, 92, 87, 126, 9, 3, 4, 69, 15, + 68, 69, 88, 85, 78, 75, 77, 9, 13, 68, 13, 21, 81, 0, 70, 67, 6, + 76, 28, 64, 2, 28, 38, 39, 34, 27, 93, 73, 73, 17, 14, 100, 10, 10, + 10, 2, 7, 7, 0, 3, 1, 6, 69, 6, 24, 12, 68, 64, 2, 0, 13, + 24, 19, 11, 15, 3, 4, 4, 30, 19, 20, 78, 3, 69, 35, 23, 19, 14, + 17, 19, 12, 16, 24, 1, 17, 9, 9, 5, 0, 12, 6, 10, 11, 8, 18, + 27, 10, 82, 8, 78, 17, 32, 84, 56, 62, 60, 59, 62, 62, 57, 57, 54, + 44, 36, 33, 43, 29, 70, 67, 4, 67, 33, 31, 28, 34, 32, 25, 20, 22, + 0, 4, 64, 94, 89, 108, 76, 19, 18, 11, 64, 4, 70, 75, 82, 102, 77, + 39, 21, 15, 8, 4, 71, 83, 87, 119, 5, 34, 27, 25, 20, 8, 5, 64, + 74, 90, 70, 34, 32, 21, 4, 5, 72, 81, 97, 5, 58, 49, 45, 36, 23, + 5, 70, 79, 85, 62, 106, 106, 87, 114, 110, 98, 110, 106, 103, 107, 108, 112, + 96, 95, 91, 93, 94, 86, 67, 80, 85, 70, 3, 5, 2, 13, 13, 14, 9, + 22, 17, 12, 14, 11, 22, 16, 8, 22, 19, 13, 10, 14, 0, 64, 69, 4, + 70, 19, 32, 20, 10, 29, 25, 11, 23, 31, 19, 25, 13, 6, 20, 52, 49, + 52, 52, 54, 62, 62, 62, 62, 62, 62, 62, 62, 62, 34, 62, 62, 62, 62, + 62, 62, 54, 37, 36, 6, 82, 75, 97, 125, 62, 62, 62, 57, 55, 53, 41, + 44, 31, 32, 22, 19, 16, 65, 71, 3, 0, 65, 39, 43, 40, 31, 40, 39, + 23, 31, 34, 21, 6, 10, 2, 86, 23, 12, 4, 79, 71, 69, 70, 66, 68, + 73, 69, 70, 67, 1, 70, 66, 65, 0, 62, 62, 62, 62, 62, 60, 54, 36, + 4, 66, 28, 21, 18, 15, 7, 3, 1, 66, 76, 85, 81, 77, 81, 80, 73, + 74, 83, 71, 67, 2, 66, 66, 4, 4, 62, 62, 62, 62, 61, 57, 46, 29, + 1, 75, 65, 4, 67, 67, 104, 106}, + + { + + 62, 9, 74, 62, 9, 74, 125, 102, 11, 10, 12, 29, 60, 62, 54, 14, 115, + 6, 77, 64, 1, 14, 72, 12, 65, 20, 62, 68, 91, 104, 124, 102, 67, 77, + 64, 1, 85, 93, 3, 18, 68, 80, 95, 8, 67, 85, 88, 5, 75, 93, 9, + 69, 80, 88, 66, 73, 73, 79, 71, 5, 22, 0, 0, 0, 82, 86, 97, 71, + 22, 1, 18, 77, 95, 124, 96, 99, 65, 80, 92, 82, 108, 89, 100, 92, 125, + 91, 88, 95, 107, 18, 64, 7, 92, 73, 91, 86, 124, 9, 3, 4, 69, 16, + 68, 68, 87, 84, 77, 74, 76, 9, 13, 67, 13, 21, 80, 0, 69, 67, 6, + 75, 28, 64, 2, 28, 37, 39, 34, 27, 92, 72, 72, 17, 14, 99, 10, 10, + 10, 3, 7, 7, 1, 4, 2, 6, 68, 6, 24, 12, 68, 64, 2, 0, 13, + 23, 19, 11, 15, 4, 5, 4, 29, 19, 20, 77, 3, 69, 35, 23, 19, 14, + 17, 19, 12, 16, 24, 1, 17, 9, 9, 5, 0, 12, 6, 10, 11, 8, 18, + 27, 10, 81, 8, 77, 17, 31, 83, 55, 62, 59, 58, 61, 62, 56, 56, 52, + 43, 35, 32, 41, 28, 71, 67, 4, 67, 32, 30, 27, 33, 31, 24, 19, 21, + 0, 4, 64, 93, 88, 107, 75, 20, 18, 11, 0, 5, 69, 74, 81, 100, 76, + 39, 21, 15, 8, 5, 70, 82, 86, 117, 5, 35, 28, 25, 20, 9, 5, 64, + 73, 89, 70, 35, 32, 21, 4, 6, 71, 80, 96, 5, 58, 49, 45, 36, 23, + 5, 69, 78, 84, 62, 105, 105, 86, 112, 108, 97, 108, 104, 101, 105, 106, 110, + 95, 94, 90, 92, 92, 85, 67, 79, 84, 69, 3, 5, 2, 13, 13, 13, 8, + 22, 17, 13, 14, 11, 22, 16, 8, 22, 19, 13, 10, 14, 0, 64, 68, 5, + 70, 19, 32, 20, 10, 29, 25, 12, 23, 30, 19, 25, 13, 6, 19, 52, 49, + 52, 51, 53, 62, 62, 62, 62, 62, 62, 62, 62, 62, 33, 62, 62, 62, 62, + 62, 62, 53, 36, 35, 6, 81, 74, 95, 122, 62, 62, 62, 56, 53, 52, 40, + 42, 30, 31, 21, 18, 15, 66, 71, 3, 0, 66, 38, 42, 39, 30, 39, 38, + 22, 30, 33, 20, 5, 9, 1, 86, 23, 12, 4, 78, 70, 68, 69, 65, 67, + 71, 68, 69, 66, 3, 68, 65, 0, 2, 62, 62, 62, 62, 62, 58, 51, 34, + 2, 65, 29, 22, 19, 16, 8, 4, 2, 65, 75, 84, 80, 76, 80, 78, 71, + 73, 82, 70, 66, 3, 65, 65, 4, 4, 62, 62, 62, 62, 58, 54, 43, 26, + 64, 75, 65, 4, 66, 66, 102, 103}, + + { + + 62, 9, 74, 62, 9, 74, 123, 101, 11, 10, 12, 28, 59, 61, 54, 14, 113, + 6, 76, 0, 1, 13, 72, 11, 66, 19, 60, 70, 92, 105, 121, 101, 67, 76, + 0, 1, 85, 92, 3, 17, 68, 80, 94, 8, 67, 85, 88, 5, 75, 92, 9, + 69, 80, 88, 66, 73, 73, 79, 71, 5, 22, 0, 0, 0, 81, 86, 97, 71, + 21, 1, 18, 77, 95, 122, 94, 97, 64, 78, 91, 81, 107, 88, 99, 91, 123, + 91, 88, 95, 106, 18, 64, 7, 91, 73, 90, 86, 123, 9, 3, 4, 69, 16, + 68, 68, 87, 84, 77, 74, 76, 9, 13, 67, 13, 21, 80, 0, 69, 67, 6, + 75, 27, 64, 2, 27, 36, 38, 33, 26, 91, 72, 72, 16, 13, 99, 9, 10, + 10, 3, 7, 7, 2, 4, 2, 6, 68, 6, 23, 12, 69, 64, 2, 64, 13, + 22, 19, 11, 14, 4, 5, 4, 28, 19, 19, 77, 3, 70, 34, 23, 19, 14, + 17, 19, 12, 16, 24, 1, 17, 9, 9, 5, 0, 12, 6, 10, 11, 8, 17, + 26, 9, 81, 8, 77, 16, 30, 83, 53, 62, 57, 56, 59, 60, 54, 54, 50, + 41, 33, 30, 39, 26, 72, 67, 4, 68, 31, 29, 26, 32, 29, 23, 18, 20, + 64, 3, 65, 93, 88, 106, 75, 20, 18, 11, 0, 5, 69, 74, 81, 99, 75, + 39, 21, 15, 8, 5, 70, 81, 85, 115, 5, 35, 28, 25, 20, 9, 5, 64, + 73, 88, 70, 35, 32, 21, 4, 6, 71, 80, 95, 5, 57, 48, 44, 35, 23, + 5, 69, 78, 84, 62, 104, 104, 85, 111, 107, 96, 107, 103, 100, 104, 105, 108, + 94, 93, 90, 91, 91, 85, 68, 79, 83, 69, 3, 4, 2, 12, 12, 12, 7, + 21, 17, 13, 14, 10, 21, 16, 8, 21, 18, 13, 10, 13, 0, 64, 68, 5, + 70, 18, 31, 19, 10, 28, 24, 12, 22, 29, 19, 25, 12, 5, 17, 51, 48, + 51, 50, 52, 62, 62, 62, 62, 62, 62, 62, 62, 62, 32, 62, 62, 62, 62, + 62, 62, 51, 35, 34, 6, 80, 74, 94, 120, 60, 60, 62, 54, 51, 50, 38, + 40, 29, 29, 20, 16, 14, 67, 72, 2, 0, 67, 37, 41, 37, 28, 37, 36, + 21, 28, 31, 19, 4, 8, 0, 87, 22, 11, 3, 78, 70, 68, 68, 65, 66, + 70, 67, 68, 65, 4, 67, 64, 1, 3, 62, 62, 62, 62, 60, 55, 48, 31, + 0, 65, 29, 22, 19, 16, 9, 4, 2, 65, 75, 84, 80, 75, 80, 77, 70, + 73, 81, 69, 65, 3, 65, 64, 4, 4, 62, 62, 62, 60, 55, 50, 39, 23, + 67, 75, 65, 4, 66, 66, 101, 101}, + + { + + 62, 9, 74, 62, 9, 74, 121, 99, 12, 10, 11, 26, 57, 60, 54, 14, 111, + 6, 75, 1, 1, 12, 72, 10, 67, 19, 58, 71, 93, 105, 118, 100, 67, 75, + 1, 1, 84, 91, 4, 17, 68, 79, 93, 7, 68, 85, 88, 5, 75, 92, 9, + 69, 80, 88, 65, 73, 73, 79, 70, 5, 22, 0, 0, 0, 81, 86, 97, 70, + 20, 1, 18, 77, 95, 120, 92, 96, 1, 76, 90, 80, 105, 87, 98, 90, 121, + 90, 88, 94, 105, 18, 64, 7, 91, 73, 90, 85, 121, 9, 2, 3, 70, 16, + 68, 68, 86, 84, 76, 74, 75, 9, 13, 67, 13, 20, 80, 0, 69, 67, 6, + 75, 26, 64, 2, 26, 35, 37, 32, 25, 91, 71, 72, 15, 13, 98, 9, 10, + 10, 3, 7, 7, 3, 4, 2, 6, 67, 6, 22, 12, 70, 64, 2, 64, 12, + 21, 19, 11, 13, 4, 5, 4, 26, 19, 18, 77, 3, 70, 33, 23, 19, 14, + 17, 19, 12, 16, 24, 1, 16, 9, 9, 5, 0, 11, 5, 9, 10, 7, 16, + 25, 9, 81, 7, 77, 15, 28, 83, 52, 62, 55, 54, 57, 58, 52, 52, 48, + 39, 32, 29, 37, 24, 73, 67, 4, 68, 30, 28, 25, 30, 28, 21, 17, 19, + 65, 3, 65, 93, 88, 106, 74, 20, 18, 11, 0, 5, 69, 74, 80, 98, 75, + 39, 21, 15, 8, 6, 69, 80, 84, 113, 5, 35, 28, 25, 20, 10, 5, 64, + 73, 88, 70, 35, 32, 20, 4, 6, 71, 80, 94, 5, 57, 48, 43, 34, 23, + 5, 69, 77, 83, 62, 103, 103, 85, 110, 106, 95, 105, 102, 99, 103, 103, 107, + 94, 92, 90, 91, 89, 85, 68, 79, 83, 69, 2, 4, 2, 11, 11, 11, 6, + 21, 16, 13, 13, 10, 21, 15, 8, 20, 18, 12, 10, 12, 0, 65, 68, 5, + 71, 18, 31, 18, 10, 27, 24, 12, 21, 28, 18, 24, 11, 5, 16, 50, 47, + 51, 49, 51, 61, 62, 62, 62, 62, 62, 62, 62, 62, 31, 62, 62, 62, 62, + 62, 62, 49, 34, 33, 6, 79, 74, 93, 118, 58, 58, 62, 52, 49, 48, 37, + 38, 27, 28, 19, 15, 12, 68, 73, 2, 64, 68, 36, 39, 36, 26, 35, 34, + 19, 27, 29, 17, 3, 6, 65, 88, 21, 10, 2, 78, 69, 68, 68, 64, 66, + 69, 66, 67, 64, 5, 66, 0, 3, 4, 62, 62, 62, 62, 58, 52, 45, 28, + 65, 64, 30, 23, 20, 16, 10, 5, 2, 64, 74, 84, 79, 75, 79, 76, 69, + 73, 81, 69, 65, 3, 64, 0, 4, 4, 62, 62, 62, 57, 52, 46, 35, 19, + 69, 75, 65, 4, 65, 65, 99, 99}, + + { + + 62, 9, 74, 62, 9, 74, 120, 98, 12, 10, 11, 25, 56, 58, 54, 14, 108, + 5, 74, 1, 1, 11, 72, 9, 68, 18, 56, 73, 94, 106, 115, 99, 67, 74, + 1, 1, 84, 90, 4, 16, 68, 79, 93, 7, 68, 84, 88, 5, 75, 91, 8, + 70, 80, 88, 65, 72, 73, 78, 70, 5, 22, 0, 0, 0, 80, 87, 97, 70, + 19, 1, 18, 77, 95, 119, 91, 94, 2, 75, 89, 79, 104, 85, 97, 89, 119, + 90, 87, 94, 104, 18, 64, 7, 90, 73, 89, 85, 120, 8, 2, 3, 70, 16, + 68, 68, 86, 84, 76, 74, 75, 9, 12, 67, 13, 20, 80, 0, 69, 67, 6, + 75, 26, 65, 2, 26, 34, 36, 31, 24, 90, 71, 72, 14, 12, 98, 8, 10, + 9, 3, 7, 7, 4, 5, 2, 5, 67, 5, 21, 11, 71, 64, 2, 65, 12, + 20, 18, 10, 13, 5, 5, 4, 25, 18, 17, 77, 3, 71, 33, 23, 19, 14, + 17, 19, 12, 16, 23, 1, 16, 9, 9, 5, 64, 11, 5, 9, 10, 7, 16, + 24, 8, 81, 7, 77, 14, 27, 83, 50, 62, 53, 52, 55, 56, 50, 50, 46, + 37, 30, 27, 34, 22, 74, 67, 3, 69, 29, 27, 24, 29, 26, 20, 16, 17, + 65, 2, 66, 93, 88, 105, 74, 20, 18, 11, 0, 5, 69, 74, 80, 97, 74, + 39, 21, 15, 8, 6, 69, 80, 84, 111, 5, 35, 28, 25, 20, 10, 5, 64, + 73, 87, 70, 35, 31, 20, 4, 6, 71, 80, 94, 5, 56, 47, 42, 33, 23, + 5, 69, 77, 83, 62, 102, 102, 84, 108, 105, 94, 104, 100, 98, 101, 102, 105, + 93, 92, 89, 90, 88, 84, 69, 79, 82, 69, 2, 3, 1, 10, 10, 10, 5, + 20, 16, 13, 13, 9, 20, 15, 8, 19, 17, 12, 9, 11, 64, 65, 68, 5, + 71, 17, 30, 17, 10, 26, 23, 12, 20, 27, 18, 24, 10, 4, 14, 49, 47, + 50, 48, 49, 60, 62, 62, 62, 62, 62, 62, 62, 62, 29, 62, 62, 62, 62, + 62, 62, 47, 33, 31, 6, 78, 73, 92, 116, 57, 56, 60, 51, 47, 46, 35, + 36, 26, 26, 17, 13, 11, 69, 74, 1, 64, 69, 34, 38, 34, 25, 33, 32, + 18, 25, 27, 16, 2, 5, 66, 88, 20, 10, 1, 78, 69, 67, 67, 64, 65, + 68, 66, 66, 0, 6, 65, 1, 4, 5, 62, 62, 62, 61, 55, 49, 42, 25, + 68, 64, 30, 23, 20, 17, 10, 5, 3, 64, 74, 83, 79, 74, 79, 75, 68, + 73, 80, 68, 64, 3, 64, 1, 4, 4, 62, 62, 61, 54, 49, 42, 31, 16, + 72, 75, 65, 4, 65, 65, 98, 97}, + + { + + 62, 9, 74, 62, 9, 74, 118, 96, 12, 10, 10, 23, 54, 57, 54, 14, 106, + 5, 73, 2, 1, 11, 71, 8, 69, 18, 54, 75, 95, 106, 112, 97, 67, 73, + 2, 1, 84, 89, 4, 16, 68, 79, 92, 7, 69, 84, 88, 5, 75, 90, 8, + 70, 80, 88, 64, 72, 72, 78, 69, 5, 22, 0, 0, 0, 80, 87, 97, 69, + 18, 1, 18, 76, 95, 117, 89, 93, 4, 73, 87, 78, 103, 84, 96, 88, 117, + 89, 87, 93, 103, 18, 64, 7, 90, 73, 89, 84, 118, 8, 2, 3, 70, 16, + 68, 67, 85, 84, 76, 74, 74, 9, 12, 67, 13, 20, 79, 0, 68, 67, 6, + 75, 25, 65, 2, 25, 33, 36, 30, 23, 89, 70, 72, 13, 12, 97, 8, 10, + 9, 3, 7, 7, 5, 5, 2, 5, 67, 5, 20, 11, 72, 64, 2, 65, 11, + 19, 18, 10, 12, 5, 5, 4, 24, 18, 16, 77, 3, 71, 32, 23, 19, 14, + 17, 19, 12, 16, 23, 1, 16, 9, 9, 5, 64, 11, 5, 8, 10, 7, 15, + 23, 8, 81, 6, 77, 13, 26, 83, 49, 61, 52, 51, 53, 54, 48, 48, 44, + 35, 28, 25, 32, 21, 75, 67, 3, 69, 28, 26, 23, 28, 25, 18, 15, 16, + 66, 2, 66, 93, 88, 105, 74, 20, 18, 11, 0, 5, 68, 73, 79, 96, 74, + 39, 21, 15, 8, 6, 68, 79, 83, 109, 5, 35, 28, 25, 20, 10, 5, 64, + 73, 86, 70, 36, 31, 19, 4, 6, 71, 80, 93, 5, 56, 46, 41, 32, 23, + 5, 69, 77, 82, 62, 101, 101, 83, 107, 104, 93, 103, 99, 97, 100, 100, 103, + 92, 91, 89, 90, 87, 84, 69, 78, 81, 69, 1, 3, 1, 10, 9, 9, 4, + 19, 15, 13, 12, 9, 20, 15, 8, 18, 16, 12, 9, 10, 64, 65, 68, 5, + 71, 16, 30, 17, 10, 25, 22, 12, 19, 26, 17, 23, 9, 3, 12, 48, 46, + 50, 47, 48, 58, 62, 62, 62, 62, 62, 62, 62, 62, 28, 62, 62, 62, 62, + 62, 61, 45, 32, 30, 6, 77, 73, 91, 114, 55, 55, 58, 49, 45, 44, 34, + 34, 25, 24, 16, 11, 9, 70, 75, 1, 64, 70, 33, 36, 32, 23, 32, 31, + 16, 24, 26, 14, 1, 4, 67, 89, 20, 9, 0, 77, 68, 67, 67, 0, 64, + 67, 65, 65, 1, 8, 64, 2, 5, 7, 62, 62, 62, 58, 53, 46, 39, 22, + 70, 64, 31, 24, 21, 17, 11, 5, 3, 0, 73, 83, 79, 73, 78, 74, 67, + 72, 79, 68, 64, 3, 0, 2, 4, 4, 62, 62, 58, 51, 46, 39, 27, 12, + 75, 75, 65, 4, 65, 65, 96, 95}, + + { + + 62, 9, 75, 62, 9, 75, 116, 95, 13, 10, 10, 22, 53, 56, 54, 14, 104, + 5, 73, 3, 1, 10, 71, 7, 70, 17, 53, 76, 96, 107, 109, 96, 67, 73, + 3, 1, 83, 88, 5, 15, 67, 78, 91, 6, 69, 84, 88, 5, 74, 90, 8, + 70, 79, 88, 64, 72, 72, 78, 69, 5, 22, 0, 0, 0, 79, 87, 97, 69, + 18, 0, 18, 76, 94, 115, 87, 91, 5, 71, 86, 77, 101, 83, 95, 88, 116, + 89, 87, 93, 103, 19, 64, 7, 89, 72, 88, 84, 117, 8, 1, 2, 71, 16, + 68, 67, 85, 84, 75, 74, 74, 9, 12, 66, 13, 19, 79, 0, 68, 67, 6, + 75, 24, 65, 2, 24, 32, 35, 30, 23, 89, 70, 72, 13, 11, 97, 7, 10, + 9, 3, 7, 7, 5, 5, 2, 5, 66, 5, 19, 11, 72, 65, 2, 66, 11, + 18, 18, 10, 11, 5, 5, 4, 22, 18, 15, 77, 3, 72, 31, 23, 18, 14, + 17, 19, 12, 16, 23, 1, 15, 9, 8, 5, 64, 10, 4, 8, 9, 6, 14, + 22, 7, 81, 6, 76, 12, 24, 83, 47, 59, 50, 49, 51, 52, 46, 46, 42, + 33, 27, 24, 30, 19, 76, 67, 3, 70, 27, 25, 22, 26, 23, 17, 14, 15, + 67, 1, 67, 93, 88, 104, 73, 20, 18, 11, 1, 5, 68, 73, 79, 95, 73, + 38, 21, 15, 8, 7, 68, 78, 82, 107, 5, 36, 28, 25, 20, 11, 5, 64, + 72, 86, 70, 36, 31, 19, 4, 6, 70, 79, 92, 5, 55, 46, 40, 32, 23, + 5, 68, 76, 82, 62, 101, 100, 83, 106, 103, 92, 101, 98, 96, 99, 99, 102, + 92, 90, 89, 89, 85, 84, 70, 78, 81, 69, 1, 2, 1, 9, 8, 8, 3, + 19, 15, 13, 12, 8, 19, 14, 8, 18, 16, 11, 9, 10, 64, 66, 68, 5, + 72, 16, 29, 16, 9, 24, 22, 13, 19, 25, 17, 23, 9, 3, 11, 47, 45, + 49, 46, 47, 57, 62, 62, 62, 62, 62, 62, 62, 61, 27, 62, 62, 62, 62, + 62, 59, 43, 31, 29, 6, 76, 73, 89, 111, 53, 53, 56, 47, 43, 42, 32, + 32, 23, 23, 15, 10, 8, 71, 76, 0, 65, 71, 32, 35, 31, 21, 30, 29, + 15, 22, 24, 13, 64, 2, 69, 90, 19, 8, 64, 77, 68, 67, 66, 0, 64, + 65, 64, 64, 2, 9, 1, 3, 7, 8, 62, 62, 60, 56, 50, 44, 36, 20, + 72, 0, 31, 24, 21, 17, 12, 6, 3, 0, 73, 83, 78, 73, 78, 73, 66, + 72, 79, 67, 0, 3, 0, 3, 4, 4, 62, 62, 56, 48, 42, 35, 24, 9, + 77, 75, 65, 4, 64, 64, 95, 92}, + + { + + 62, 9, 75, 62, 9, 75, 114, 93, 13, 10, 9, 20, 51, 54, 54, 14, 101, + 4, 72, 3, 1, 9, 71, 6, 71, 17, 51, 78, 97, 107, 106, 95, 67, 72, + 3, 1, 83, 87, 5, 15, 67, 78, 91, 6, 70, 83, 88, 5, 74, 89, 7, + 70, 79, 88, 0, 71, 72, 77, 68, 5, 22, 0, 0, 0, 79, 87, 97, 68, + 17, 0, 18, 76, 94, 114, 85, 90, 7, 69, 85, 76, 100, 81, 94, 87, 114, + 88, 86, 92, 102, 19, 64, 7, 89, 72, 88, 83, 115, 7, 1, 2, 71, 16, + 68, 67, 84, 84, 75, 74, 73, 9, 11, 66, 13, 19, 79, 0, 68, 67, 6, + 75, 24, 65, 2, 24, 31, 34, 29, 22, 88, 69, 72, 12, 11, 96, 7, 10, + 8, 3, 7, 7, 6, 6, 2, 5, 66, 5, 18, 11, 73, 65, 2, 66, 10, + 17, 17, 10, 11, 6, 5, 4, 21, 17, 14, 77, 3, 72, 31, 23, 18, 14, + 17, 19, 12, 16, 23, 1, 15, 9, 8, 5, 64, 10, 4, 7, 9, 6, 14, + 21, 7, 81, 5, 76, 11, 23, 83, 46, 57, 48, 47, 49, 50, 44, 44, 40, + 31, 25, 22, 27, 17, 77, 67, 2, 70, 26, 24, 21, 25, 22, 15, 13, 14, + 67, 1, 67, 93, 88, 104, 73, 20, 18, 11, 1, 5, 68, 73, 78, 94, 73, + 38, 21, 15, 8, 7, 67, 77, 82, 105, 5, 36, 28, 25, 20, 11, 5, 64, + 72, 85, 70, 36, 30, 18, 4, 6, 70, 79, 92, 5, 55, 45, 39, 31, 23, + 5, 68, 76, 81, 62, 100, 99, 82, 104, 102, 91, 100, 96, 95, 97, 97, 100, + 91, 89, 88, 89, 84, 83, 70, 78, 80, 69, 0, 2, 0, 8, 7, 7, 2, + 18, 14, 13, 11, 8, 19, 14, 8, 17, 15, 11, 8, 9, 64, 66, 68, 5, + 72, 15, 29, 15, 9, 23, 21, 13, 18, 24, 16, 22, 8, 2, 9, 46, 45, + 49, 45, 45, 55, 62, 62, 62, 62, 62, 62, 62, 59, 25, 62, 62, 62, 62, + 62, 56, 41, 30, 28, 6, 75, 72, 88, 109, 52, 51, 54, 46, 41, 40, 31, + 30, 22, 21, 13, 8, 6, 72, 77, 0, 65, 72, 30, 33, 29, 20, 28, 27, + 13, 21, 22, 11, 65, 1, 70, 90, 18, 8, 65, 77, 67, 66, 66, 1, 0, + 64, 0, 0, 3, 10, 2, 4, 8, 9, 62, 61, 58, 53, 48, 41, 33, 17, + 74, 0, 32, 25, 22, 18, 13, 6, 4, 1, 72, 82, 78, 72, 77, 72, 65, + 72, 78, 67, 0, 3, 1, 4, 4, 4, 62, 62, 53, 45, 39, 31, 20, 5, + 80, 75, 65, 4, 64, 64, 93, 90}, + + { + + 62, 8, 75, 62, 8, 75, 113, 92, 13, 10, 9, 19, 50, 53, 54, 14, 99, 4, + 71, 4, 1, 8, 71, 5, 73, 16, 49, 80, 98, 108, 104, 94, 67, 71, 4, 1, + 83, 86, 5, 14, 67, 78, 90, 5, 70, 83, 89, 5, 74, 89, 7, 71, 79, 88, + 0, 71, 72, 77, 68, 5, 22, 0, 0, 0, 78, 88, 97, 68, 16, 0, 18, 76, + 94, 112, 84, 88, 8, 68, 84, 75, 99, 80, 93, 86, 112, 88, 86, 92, 101, 19, + 64, 7, 88, 72, 87, 83, 114, 7, 0, 1, 72, 16, 68, 67, 84, 84, 75, 74, + 73, 8, 11, 66, 13, 18, 79, 0, 68, 67, 5, 75, 23, 66, 2, 23, 29, 33, + 28, 21, 88, 69, 72, 11, 10, 96, 6, 9, 8, 3, 7, 7, 7, 6, 2, 4, + 66, 4, 17, 10, 74, 65, 2, 67, 10, 16, 17, 9, 10, 6, 5, 4, 19, 17, + 13, 77, 3, 73, 30, 22, 18, 14, 17, 18, 11, 16, 22, 0, 14, 9, 8, 4, + 65, 9, 3, 7, 8, 5, 13, 20, 6, 81, 5, 76, 10, 21, 83, 44, 55, 46, + 45, 47, 47, 42, 42, 38, 29, 23, 20, 25, 15, 78, 67, 2, 71, 25, 22, 19, + 23, 20, 14, 11, 12, 68, 0, 68, 93, 88, 103, 73, 20, 18, 11, 1, 5, 68, + 73, 78, 93, 72, 38, 21, 15, 8, 7, 67, 77, 81, 104, 5, 36, 28, 25, 19, + 11, 5, 64, 72, 85, 70, 36, 30, 18, 4, 6, 70, 79, 91, 5, 54, 44, 38, + 30, 22, 5, 68, 76, 81, 62, 99, 98, 82, 103, 101, 91, 99, 95, 94, 96, 96, + 99, 91, 89, 88, 88, 83, 83, 71, 78, 80, 69, 0, 1, 0, 7, 6, 5, 1, + 17, 14, 13, 11, 7, 18, 13, 7, 16, 14, 10, 8, 8, 65, 67, 68, 5, 73, + 14, 28, 14, 9, 22, 20, 13, 17, 23, 16, 22, 7, 1, 7, 45, 44, 48, 43, + 44, 54, 62, 62, 62, 62, 62, 62, 62, 56, 24, 62, 62, 62, 62, 61, 54, 39, + 28, 26, 6, 75, 72, 87, 107, 50, 49, 52, 44, 38, 38, 29, 28, 20, 19, 12, + 6, 5, 73, 78, 64, 66, 73, 29, 32, 27, 18, 26, 25, 12, 19, 20, 10, 66, + 64, 72, 91, 17, 7, 66, 77, 67, 66, 65, 1, 0, 0, 0, 1, 4, 11, 3, + 5, 9, 10, 61, 59, 56, 51, 45, 38, 30, 14, 77, 0, 32, 25, 22, 18, 13, + 6, 4, 1, 72, 82, 78, 72, 77, 71, 64, 72, 78, 66, 1, 3, 1, 4, 4, + 3, 62, 61, 51, 42, 36, 27, 16, 2, 83, 75, 66, 3, 64, 64, 92, 88}, + + { + + 62, 8, 75, 62, 8, 75, 111, 91, 14, 10, 9, 18, 49, 52, 54, 14, 97, 4, + 70, 5, 1, 8, 70, 4, 74, 15, 47, 81, 99, 109, 101, 92, 67, 70, 5, 1, + 82, 85, 6, 13, 67, 77, 89, 5, 70, 83, 89, 5, 74, 88, 7, 71, 79, 88, + 0, 71, 71, 77, 68, 5, 22, 0, 0, 0, 77, 88, 97, 68, 15, 0, 18, 75, + 94, 110, 82, 86, 9, 66, 82, 74, 97, 79, 91, 85, 110, 88, 86, 92, 100, 19, + 64, 7, 87, 72, 86, 82, 113, 7, 0, 1, 72, 16, 68, 66, 83, 83, 74, 74, + 73, 8, 11, 66, 13, 18, 78, 0, 67, 67, 5, 74, 22, 66, 2, 22, 28, 33, + 27, 20, 87, 69, 71, 10, 9, 96, 5, 9, 8, 4, 7, 7, 8, 6, 2, 4, + 65, 4, 17, 10, 75, 65, 2, 68, 10, 15, 17, 9, 9, 6, 5, 4, 18, 17, + 13, 77, 3, 74, 29, 22, 18, 14, 17, 18, 11, 16, 22, 0, 14, 9, 8, 4, + 65, 9, 3, 7, 8, 5, 12, 20, 6, 81, 5, 76, 9, 20, 83, 42, 54, 45, + 44, 45, 45, 41, 41, 36, 27, 22, 19, 23, 14, 79, 67, 2, 72, 24, 21, 18, + 22, 19, 13, 10, 11, 69, 64, 69, 93, 87, 102, 72, 21, 18, 11, 1, 6, 67, + 72, 77, 92, 71, 38, 21, 15, 8, 8, 67, 76, 80, 102, 5, 36, 28, 25, 19, + 12, 5, 64, 72, 84, 70, 37, 30, 18, 4, 7, 70, 79, 90, 5, 54, 44, 38, + 29, 22, 5, 68, 75, 80, 62, 98, 97, 81, 102, 99, 90, 97, 94, 92, 95, 95, + 97, 90, 88, 88, 87, 81, 83, 72, 77, 79, 69, 0, 0, 0, 7, 5, 4, 0, + 17, 14, 13, 11, 7, 17, 13, 7, 15, 14, 10, 8, 7, 65, 67, 67, 6, 73, + 14, 27, 14, 9, 22, 20, 13, 16, 22, 16, 22, 6, 1, 6, 45, 43, 47, 42, + 43, 53, 60, 60, 62, 62, 62, 62, 62, 54, 23, 62, 62, 62, 62, 58, 52, 38, + 27, 25, 6, 74, 72, 86, 105, 48, 48, 50, 42, 36, 37, 28, 26, 19, 18, 11, + 5, 4, 74, 78, 64, 66, 74, 28, 31, 26, 16, 25, 24, 11, 18, 19, 9, 67, + 65, 73, 92, 17, 6, 66, 76, 67, 66, 64, 2, 1, 1, 1, 2, 5, 13, 4, + 6, 11, 12, 60, 58, 54, 49, 42, 35, 27, 11, 79, 1, 32, 25, 23, 18, 14, + 7, 4, 2, 71, 82, 77, 71, 77, 70, 1, 71, 77, 65, 2, 3, 2, 5, 4, + 3, 62, 59, 49, 40, 33, 24, 12, 64, 85, 75, 66, 3, 0, 0, 91, 86}, + + { + + 62, 8, 75, 62, 8, 75, 109, 89, 14, 10, 8, 16, 47, 50, 54, 14, 94, 3, + 69, 5, 1, 7, 70, 3, 75, 15, 45, 83, 100, 109, 98, 91, 67, 69, 5, 1, + 82, 84, 6, 13, 67, 77, 89, 5, 71, 82, 89, 5, 74, 87, 6, 71, 79, 88, + 1, 70, 71, 76, 67, 5, 22, 0, 0, 0, 77, 88, 97, 67, 14, 0, 18, 75, + 94, 109, 80, 85, 11, 64, 81, 73, 96, 77, 90, 84, 108, 87, 85, 91, 99, 19, + 64, 7, 87, 72, 86, 82, 111, 6, 0, 1, 72, 16, 68, 66, 83, 83, 74, 74, + 72, 8, 10, 66, 13, 18, 78, 0, 67, 67, 5, 74, 22, 66, 2, 22, 27, 32, + 26, 19, 86, 68, 71, 9, 9, 95, 5, 9, 7, 4, 7, 7, 9, 7, 2, 4, + 65, 4, 16, 10, 76, 65, 2, 68, 9, 14, 16, 9, 9, 7, 5, 4, 17, 16, + 12, 77, 3, 74, 29, 22, 18, 14, 17, 18, 11, 16, 22, 0, 14, 9, 8, 4, + 65, 9, 3, 6, 8, 5, 12, 19, 5, 81, 4, 76, 8, 19, 83, 41, 52, 43, + 42, 43, 43, 39, 39, 34, 25, 20, 17, 20, 12, 80, 67, 1, 72, 23, 20, 17, + 21, 17, 11, 9, 10, 69, 64, 69, 93, 87, 102, 72, 21, 18, 11, 1, 6, 67, + 72, 77, 91, 71, 38, 21, 15, 8, 8, 66, 75, 80, 100, 5, 36, 28, 25, 19, + 12, 5, 64, 72, 83, 70, 37, 29, 17, 4, 7, 70, 79, 90, 5, 53, 43, 37, + 28, 22, 5, 68, 75, 80, 62, 97, 96, 80, 100, 98, 89, 96, 92, 91, 93, 93, + 95, 89, 87, 87, 87, 80, 82, 72, 77, 78, 69, 64, 0, 64, 6, 4, 3, 64, + 16, 13, 13, 10, 6, 17, 13, 7, 14, 13, 10, 7, 6, 65, 67, 67, 6, 73, + 13, 27, 13, 9, 21, 19, 13, 15, 21, 15, 21, 5, 0, 4, 44, 43, 47, 41, + 41, 51, 58, 58, 62, 62, 62, 62, 62, 52, 21, 59, 62, 59, 62, 56, 49, 36, + 26, 24, 6, 73, 71, 85, 103, 47, 46, 48, 41, 34, 35, 26, 24, 18, 16, 9, + 3, 2, 75, 79, 65, 66, 75, 26, 29, 24, 15, 23, 22, 9, 16, 17, 7, 68, + 66, 74, 92, 16, 6, 67, 76, 66, 65, 64, 2, 2, 2, 2, 3, 6, 14, 5, + 7, 12, 13, 60, 56, 52, 46, 40, 32, 24, 8, 81, 1, 33, 26, 23, 19, 15, + 7, 5, 2, 71, 81, 77, 70, 76, 69, 2, 71, 76, 65, 2, 3, 2, 6, 4, + 3, 62, 57, 46, 37, 30, 20, 8, 68, 88, 75, 66, 3, 0, 0, 89, 84}, + + { + + 62, 8, 76, 62, 8, 76, 107, 88, 15, 10, 8, 15, 46, 49, 54, 14, 92, 3, + 69, 6, 1, 6, 70, 2, 76, 14, 44, 84, 101, 110, 95, 90, 67, 69, 6, 1, + 81, 83, 7, 12, 66, 76, 88, 4, 71, 82, 89, 5, 73, 87, 6, 71, 78, 88, + 1, 70, 71, 76, 67, 5, 22, 0, 0, 0, 76, 88, 97, 67, 14, 64, 18, 75, + 93, 107, 78, 83, 12, 1, 80, 72, 94, 76, 89, 84, 107, 87, 85, 91, 99, 20, + 64, 7, 86, 71, 85, 81, 110, 6, 64, 0, 73, 16, 68, 66, 82, 83, 73, 74, + 72, 8, 10, 65, 13, 17, 78, 0, 67, 67, 5, 74, 21, 66, 2, 21, 26, 31, + 26, 19, 86, 68, 71, 9, 8, 95, 4, 9, 7, 4, 7, 7, 9, 7, 2, 4, + 64, 4, 15, 10, 76, 66, 2, 69, 9, 13, 16, 9, 8, 7, 5, 4, 15, 16, + 11, 77, 3, 75, 28, 22, 17, 14, 17, 18, 11, 16, 22, 0, 13, 9, 7, 4, + 65, 8, 2, 6, 7, 4, 11, 18, 5, 81, 4, 75, 7, 17, 83, 39, 50, 41, + 40, 41, 41, 37, 37, 32, 23, 19, 16, 18, 10, 81, 67, 1, 73, 22, 19, 16, + 19, 16, 10, 8, 9, 70, 65, 70, 93, 87, 101, 71, 21, 18, 11, 2, 6, 67, + 72, 76, 90, 70, 37, 21, 15, 8, 9, 66, 74, 79, 98, 5, 37, 28, 25, 19, + 13, 5, 64, 71, 83, 70, 37, 29, 17, 4, 7, 69, 78, 89, 5, 53, 43, 36, + 28, 22, 5, 67, 74, 79, 62, 97, 95, 80, 99, 97, 88, 94, 91, 90, 92, 92, + 94, 89, 86, 87, 86, 78, 82, 73, 77, 78, 69, 64, 64, 64, 5, 3, 2, 65, + 16, 13, 13, 10, 6, 16, 12, 7, 14, 13, 9, 7, 6, 65, 68, 67, 6, 74, + 13, 26, 12, 8, 20, 19, 14, 15, 20, 15, 21, 5, 0, 3, 43, 42, 46, 40, + 40, 50, 56, 56, 61, 60, 62, 62, 60, 49, 20, 57, 62, 56, 62, 53, 47, 34, + 25, 23, 6, 72, 71, 83, 100, 45, 44, 46, 39, 32, 33, 25, 22, 16, 15, 8, + 2, 1, 76, 80, 65, 67, 76, 25, 28, 23, 13, 21, 20, 8, 15, 15, 6, 70, + 68, 76, 93, 15, 5, 68, 76, 66, 65, 0, 3, 2, 4, 3, 4, 7, 15, 7, + 8, 14, 14, 59, 55, 50, 44, 37, 30, 21, 6, 83, 2, 33, 26, 24, 19, 16, + 8, 5, 3, 70, 81, 76, 70, 76, 68, 3, 71, 76, 64, 3, 3, 3, 7, 4, + 3, 62, 55, 44, 34, 26, 16, 5, 71, 90, 75, 66, 3, 1, 1, 88, 81}, + + { + + 62, 8, 76, 62, 8, 76, 106, 86, 15, 10, 7, 13, 44, 48, 54, 14, 90, 3, + 68, 7, 1, 5, 70, 1, 77, 14, 42, 86, 102, 110, 92, 89, 67, 68, 7, 1, + 81, 82, 7, 12, 66, 76, 87, 4, 72, 82, 89, 5, 73, 86, 6, 72, 78, 88, + 2, 70, 71, 76, 66, 5, 22, 0, 0, 0, 76, 89, 97, 66, 13, 64, 18, 75, + 93, 105, 77, 82, 14, 2, 79, 71, 93, 75, 88, 83, 105, 86, 85, 90, 98, 20, + 64, 7, 86, 71, 85, 81, 108, 6, 64, 0, 73, 16, 68, 66, 82, 83, 73, 74, + 71, 8, 10, 65, 13, 17, 78, 0, 67, 67, 5, 74, 20, 67, 2, 20, 25, 30, + 25, 18, 85, 67, 71, 8, 8, 94, 4, 9, 7, 4, 7, 7, 10, 7, 2, 3, + 64, 3, 14, 9, 77, 66, 2, 69, 8, 12, 16, 8, 7, 7, 5, 4, 14, 16, + 10, 77, 3, 75, 27, 22, 17, 14, 17, 18, 11, 16, 21, 0, 13, 9, 7, 4, + 66, 8, 2, 5, 7, 4, 10, 17, 4, 81, 3, 75, 6, 16, 83, 38, 48, 39, + 38, 39, 39, 35, 35, 30, 21, 17, 14, 16, 8, 82, 67, 1, 73, 21, 18, 15, + 18, 14, 8, 7, 7, 71, 65, 70, 93, 87, 101, 71, 21, 18, 11, 2, 6, 67, + 72, 76, 89, 70, 37, 21, 15, 8, 9, 65, 74, 78, 96, 5, 37, 28, 25, 19, + 13, 5, 64, 71, 82, 70, 37, 29, 16, 4, 7, 69, 78, 88, 5, 52, 42, 35, + 27, 22, 5, 67, 74, 79, 62, 96, 94, 79, 98, 96, 87, 93, 90, 89, 91, 90, + 92, 88, 86, 87, 86, 77, 82, 73, 77, 77, 69, 65, 64, 64, 4, 2, 1, 66, + 15, 12, 13, 9, 5, 16, 12, 7, 13, 12, 9, 7, 5, 66, 68, 67, 6, 74, + 12, 26, 11, 8, 19, 18, 14, 14, 19, 14, 20, 4, 64, 1, 42, 41, 46, 39, + 39, 48, 54, 54, 59, 57, 62, 62, 57, 47, 19, 54, 62, 53, 58, 50, 44, 32, + 24, 21, 6, 71, 71, 82, 98, 43, 42, 44, 37, 30, 31, 23, 20, 15, 13, 7, + 0, 64, 77, 81, 66, 67, 77, 24, 26, 21, 11, 19, 18, 6, 13, 13, 4, 71, + 69, 77, 94, 14, 4, 69, 76, 65, 65, 0, 3, 3, 5, 3, 5, 8, 16, 8, + 9, 15, 15, 59, 53, 48, 41, 35, 27, 18, 3, 86, 2, 34, 27, 24, 19, 16, + 8, 5, 3, 70, 81, 76, 69, 75, 67, 4, 71, 75, 64, 3, 3, 3, 8, 4, + 3, 61, 53, 41, 31, 23, 12, 1, 75, 93, 75, 66, 3, 1, 1, 86, 79}, + + { + + 62, 8, 76, 62, 8, 76, 104, 85, 15, 10, 7, 12, 43, 46, 54, 14, 87, 2, + 67, 7, 1, 5, 69, 0, 78, 13, 40, 88, 103, 111, 89, 87, 67, 67, 7, 1, + 81, 81, 7, 11, 66, 76, 87, 4, 72, 81, 89, 5, 73, 85, 5, 72, 78, 88, + 2, 69, 70, 75, 66, 5, 22, 0, 0, 0, 75, 89, 97, 66, 12, 64, 18, 74, + 93, 104, 75, 80, 15, 4, 77, 70, 92, 73, 87, 82, 103, 86, 84, 90, 97, 20, + 64, 7, 85, 71, 84, 80, 107, 5, 64, 0, 73, 16, 68, 65, 81, 83, 73, 74, + 71, 8, 9, 65, 13, 17, 77, 0, 66, 67, 5, 74, 20, 67, 2, 20, 24, 30, + 24, 17, 84, 67, 71, 7, 7, 94, 3, 9, 6, 4, 7, 7, 11, 8, 2, 3, + 64, 3, 13, 9, 78, 66, 2, 70, 8, 11, 15, 8, 7, 8, 5, 4, 13, 15, + 9, 77, 3, 76, 27, 22, 17, 14, 17, 18, 11, 16, 21, 0, 13, 9, 7, 4, + 66, 8, 2, 5, 7, 4, 10, 16, 4, 81, 3, 75, 5, 15, 83, 36, 46, 38, + 37, 37, 37, 33, 33, 28, 19, 15, 12, 13, 7, 83, 67, 0, 74, 20, 17, 14, + 17, 13, 7, 6, 6, 71, 66, 71, 93, 87, 100, 71, 21, 18, 11, 2, 6, 66, + 71, 75, 88, 69, 37, 21, 15, 8, 9, 65, 73, 78, 94, 5, 37, 28, 25, 19, + 13, 5, 64, 71, 81, 70, 38, 28, 16, 4, 7, 69, 78, 88, 5, 52, 41, 34, + 26, 22, 5, 67, 74, 78, 62, 95, 93, 78, 96, 95, 86, 92, 88, 88, 89, 89, + 90, 87, 85, 86, 85, 76, 81, 74, 76, 76, 69, 65, 65, 65, 4, 1, 0, 67, + 14, 12, 13, 9, 5, 15, 12, 7, 12, 11, 9, 6, 4, 66, 68, 67, 6, 74, + 11, 25, 11, 8, 18, 17, 14, 13, 18, 14, 20, 3, 65, 64, 41, 41, 45, 38, + 37, 47, 52, 52, 57, 55, 62, 61, 54, 45, 17, 51, 62, 50, 54, 48, 42, 30, + 23, 20, 6, 70, 70, 81, 96, 42, 41, 42, 36, 28, 29, 22, 18, 14, 11, 5, + 65, 65, 78, 82, 66, 67, 78, 22, 25, 19, 10, 18, 17, 5, 12, 12, 3, 72, + 70, 78, 94, 14, 4, 70, 75, 65, 64, 1, 4, 4, 6, 4, 6, 9, 18, 9, + 10, 16, 17, 58, 51, 46, 39, 32, 24, 15, 0, 88, 2, 34, 27, 25, 20, 17, + 8, 6, 4, 69, 80, 76, 68, 75, 66, 5, 70, 74, 0, 4, 3, 4, 9, 4, + 3, 59, 51, 39, 28, 20, 9, 66, 78, 96, 75, 66, 3, 1, 1, 85, 77}, + + { + + 61, 8, 76, 61, 8, 76, 102, 83, 16, 10, 6, 10, 41, 45, 54, 14, 85, 2, + 66, 8, 1, 4, 69, 64, 79, 13, 38, 89, 104, 111, 86, 86, 67, 66, 8, 1, + 80, 80, 8, 11, 66, 75, 86, 3, 73, 81, 89, 5, 73, 85, 5, 72, 78, 88, + 3, 69, 70, 75, 65, 5, 22, 0, 0, 0, 75, 89, 97, 65, 11, 64, 18, 74, + 93, 102, 73, 79, 17, 6, 76, 69, 90, 72, 86, 81, 101, 85, 84, 89, 96, 20, + 64, 7, 85, 71, 84, 80, 105, 5, 65, 64, 74, 16, 68, 65, 81, 83, 72, 74, + 70, 8, 9, 65, 13, 16, 77, 0, 66, 67, 5, 74, 19, 67, 2, 19, 23, 29, + 23, 16, 84, 66, 71, 6, 7, 93, 3, 9, 6, 4, 7, 7, 12, 8, 2, 3, + 0, 3, 12, 9, 79, 66, 2, 70, 7, 10, 15, 8, 6, 8, 5, 4, 11, 15, + 8, 77, 3, 76, 26, 22, 17, 14, 17, 18, 11, 16, 21, 0, 12, 9, 7, 4, + 66, 7, 1, 4, 6, 3, 9, 15, 3, 81, 2, 75, 4, 13, 83, 35, 44, 36, + 35, 35, 35, 31, 31, 26, 17, 14, 11, 11, 5, 84, 67, 0, 74, 19, 16, 13, + 15, 11, 5, 5, 5, 72, 66, 71, 93, 87, 100, 70, 21, 18, 11, 2, 6, 66, + 71, 75, 87, 69, 37, 21, 15, 8, 10, 64, 72, 77, 92, 5, 37, 28, 25, 19, + 14, 5, 64, 71, 81, 70, 38, 28, 15, 4, 7, 69, 78, 87, 5, 51, 41, 33, + 25, 22, 5, 67, 73, 78, 62, 94, 92, 78, 95, 94, 85, 90, 87, 87, 88, 87, + 89, 87, 84, 86, 85, 74, 81, 74, 76, 76, 69, 66, 65, 65, 3, 0, 64, 68, + 14, 11, 13, 8, 4, 15, 11, 7, 11, 11, 8, 6, 3, 66, 69, 67, 6, 75, + 11, 25, 10, 8, 17, 17, 14, 12, 17, 13, 19, 2, 65, 65, 40, 40, 45, 37, + 36, 45, 50, 50, 55, 52, 60, 59, 51, 42, 16, 48, 62, 47, 50, 45, 39, 28, + 22, 19, 6, 69, 70, 80, 94, 40, 39, 40, 34, 26, 27, 20, 16, 12, 10, 4, + 66, 67, 79, 83, 67, 68, 79, 21, 23, 18, 8, 16, 15, 3, 10, 10, 1, 73, + 72, 80, 95, 13, 3, 71, 75, 64, 64, 1, 4, 4, 7, 5, 7, 10, 19, 10, + 11, 18, 18, 58, 50, 44, 36, 30, 21, 12, 66, 90, 3, 35, 28, 25, 20, 18, + 9, 6, 4, 69, 80, 75, 68, 74, 65, 6, 70, 74, 0, 4, 3, 4, 10, 4, + 3, 58, 49, 36, 25, 17, 5, 70, 82, 98, 75, 66, 3, 2, 2, 83, 75}, + + { + + 60, 8, 76, 60, 8, 76, 100, 82, 16, 10, 6, 9, 40, 44, 54, 14, 83, 2, + 65, 9, 1, 3, 69, 65, 80, 12, 36, 91, 105, 112, 83, 85, 67, 65, 9, 1, + 80, 79, 8, 10, 66, 75, 85, 3, 73, 81, 89, 5, 73, 84, 5, 72, 78, 88, + 3, 69, 70, 75, 65, 5, 22, 0, 0, 0, 74, 89, 97, 65, 10, 64, 18, 74, + 93, 100, 71, 77, 18, 8, 75, 68, 89, 71, 85, 80, 99, 85, 84, 89, 95, 20, + 64, 7, 84, 71, 83, 79, 104, 5, 65, 64, 74, 16, 68, 65, 80, 83, 72, 74, + 70, 8, 9, 65, 13, 16, 77, 0, 66, 67, 5, 74, 18, 67, 2, 18, 22, 28, + 22, 15, 83, 66, 71, 5, 6, 93, 2, 9, 6, 4, 7, 7, 13, 8, 2, 3, + 0, 3, 11, 9, 80, 66, 2, 71, 7, 9, 15, 8, 5, 8, 5, 4, 10, 15, + 7, 77, 3, 77, 25, 22, 17, 14, 17, 18, 11, 16, 21, 0, 12, 9, 7, 4, + 66, 7, 1, 4, 6, 3, 8, 14, 3, 81, 2, 75, 3, 12, 83, 33, 42, 34, + 33, 33, 33, 29, 29, 24, 15, 12, 9, 9, 3, 85, 67, 0, 75, 18, 15, 12, + 14, 10, 4, 4, 4, 73, 67, 72, 93, 87, 99, 70, 21, 18, 11, 2, 6, 66, + 71, 74, 86, 68, 37, 21, 15, 8, 10, 64, 71, 76, 90, 5, 37, 28, 25, 19, + 14, 5, 64, 71, 80, 70, 38, 28, 15, 4, 7, 69, 78, 86, 5, 51, 40, 32, + 24, 22, 5, 67, 73, 77, 62, 93, 91, 77, 94, 93, 84, 89, 86, 86, 87, 86, + 87, 86, 83, 86, 84, 73, 81, 75, 76, 75, 69, 66, 66, 65, 2, 64, 65, 69, + 13, 11, 13, 8, 4, 14, 11, 7, 10, 10, 8, 6, 2, 66, 69, 67, 6, 75, + 10, 24, 9, 8, 16, 16, 14, 11, 16, 13, 19, 1, 66, 67, 39, 39, 44, 36, + 35, 44, 48, 48, 53, 50, 57, 56, 48, 40, 15, 45, 59, 44, 46, 42, 37, 26, + 21, 18, 6, 68, 70, 79, 92, 38, 37, 38, 32, 24, 25, 19, 14, 11, 8, 3, + 68, 68, 80, 84, 67, 68, 80, 20, 22, 16, 6, 14, 13, 2, 9, 8, 0, 74, + 73, 81, 96, 12, 2, 72, 75, 64, 64, 2, 5, 5, 8, 6, 8, 11, 20, 11, + 12, 19, 19, 57, 48, 42, 34, 27, 18, 9, 69, 92, 3, 35, 28, 26, 20, 19, + 9, 6, 5, 68, 80, 75, 67, 74, 64, 7, 70, 73, 1, 5, 3, 5, 11, 4, + 3, 57, 47, 34, 22, 14, 1, 74, 85, 101, 75, 66, 3, 2, 2, 82, 73}, + + { + + 58, 7, 77, 58, 7, 77, 99, 81, 16, 10, 5, 7, 38, 42, 53, 14, 81, 1, 65, + 9, 0, 2, 69, 67, 82, 11, 34, 93, 106, 113, 81, 84, 68, 65, 9, 0, 80, 78, + 8, 9, 66, 75, 85, 2, 74, 81, 90, 5, 73, 84, 4, 73, 78, 88, 3, 69, 70, + 75, 65, 4, 22, 0, 0, 0, 74, 90, 97, 65, 9, 65, 18, 74, 93, 99, 70, 76, + 19, 9, 74, 67, 88, 70, 84, 80, 98, 85, 84, 89, 95, 20, 64, 7, 84, 71, 83, + 79, 103, 4, 66, 65, 75, 16, 68, 65, 80, 83, 72, 74, 70, 7, 8, 65, 12, 15, + 77, 64, 66, 67, 4, 74, 17, 68, 1, 17, 20, 27, 21, 14, 83, 66, 71, 4, 5, + 93, 1, 8, 5, 4, 7, 7, 13, 8, 2, 2, 0, 2, 10, 8, 81, 67, 1, 72, + 6, 8, 14, 7, 4, 8, 5, 4, 8, 14, 6, 77, 3, 78, 24, 21, 16, 14, 17, + 17, 10, 16, 20, 64, 11, 9, 6, 3, 67, 6, 0, 3, 5, 2, 7, 13, 2, 81, + 1, 75, 2, 10, 83, 31, 40, 32, 31, 31, 30, 27, 27, 22, 13, 10, 7, 6, 1, + 87, 68, 64, 76, 17, 13, 10, 12, 8, 2, 2, 2, 74, 68, 73, 93, 87, 99, 70, + 21, 18, 11, 2, 6, 66, 71, 74, 85, 68, 36, 21, 15, 8, 10, 64, 71, 76, 89, + 4, 37, 28, 24, 18, 14, 5, 64, 71, 80, 70, 38, 27, 14, 3, 7, 69, 78, 86, + 5, 50, 39, 31, 23, 21, 5, 67, 73, 77, 62, 93, 90, 77, 93, 92, 84, 88, 85, + 85, 86, 85, 86, 86, 83, 86, 84, 72, 81, 76, 76, 75, 69, 67, 67, 66, 1, 65, + 67, 71, 12, 10, 13, 7, 3, 13, 10, 6, 9, 9, 7, 5, 1, 67, 70, 67, 6, + 76, 9, 23, 8, 7, 15, 15, 14, 10, 14, 12, 18, 0, 67, 69, 38, 38, 43, 34, + 33, 42, 46, 46, 50, 47, 54, 53, 45, 37, 13, 42, 55, 41, 41, 39, 34, 24, 19, + 16, 6, 68, 70, 78, 90, 36, 35, 36, 30, 21, 23, 17, 11, 9, 6, 1, 70, 70, + 81, 85, 68, 69, 82, 18, 20, 14, 4, 12, 11, 0, 7, 6, 65, 76, 75, 83, 97, + 11, 1, 73, 75, 64, 64, 2, 5, 5, 9, 6, 9, 11, 21, 12, 13, 20, 20, 56, + 46, 39, 31, 24, 15, 5, 72, 95, 3, 35, 28, 26, 20, 19, 9, 6, 5, 68, 80, + 75, 67, 74, 0, 8, 70, 73, 1, 5, 3, 5, 11, 4, 2, 55, 44, 31, 19, 10, + 66, 78, 89, 104, 75, 67, 2, 2, 2, 81, 71}, + + { + + 57, 7, 77, 57, 7, 77, 97, 79, 17, 11, 5, 6, 37, 41, 53, 14, 78, 1, 64, + 10, 0, 2, 68, 68, 83, 11, 33, 94, 107, 113, 78, 82, 68, 64, 10, 0, 79, 76, + 9, 9, 65, 74, 84, 2, 74, 80, 90, 5, 72, 83, 4, 73, 77, 88, 4, 68, 69, + 74, 64, 4, 22, 0, 0, 0, 73, 90, 97, 64, 9, 65, 18, 73, 92, 97, 68, 74, + 21, 11, 72, 66, 86, 68, 82, 79, 96, 84, 83, 88, 94, 21, 0, 8, 83, 70, 82, + 78, 101, 4, 66, 65, 75, 17, 68, 64, 79, 82, 71, 73, 69, 7, 8, 64, 12, 15, + 76, 64, 65, 67, 4, 73, 17, 68, 1, 17, 19, 27, 21, 14, 82, 65, 70, 4, 5, + 92, 1, 8, 5, 5, 7, 7, 14, 9, 3, 2, 1, 2, 10, 8, 81, 67, 1, 72, + 6, 7, 14, 7, 4, 9, 6, 4, 7, 14, 6, 76, 3, 78, 24, 21, 16, 14, 17, + 17, 10, 16, 20, 64, 11, 9, 6, 3, 67, 6, 0, 3, 5, 2, 7, 13, 2, 80, + 1, 74, 2, 9, 82, 30, 39, 31, 30, 29, 28, 26, 26, 20, 12, 9, 6, 4, 0, + 88, 68, 64, 76, 16, 12, 9, 11, 7, 1, 1, 1, 74, 68, 73, 92, 86, 98, 69, + 22, 18, 11, 3, 7, 65, 70, 73, 83, 67, 36, 21, 15, 8, 11, 0, 70, 75, 87, + 4, 38, 29, 24, 18, 15, 5, 64, 70, 79, 70, 39, 27, 14, 3, 8, 68, 77, 85, + 5, 50, 39, 31, 23, 21, 5, 66, 72, 76, 62, 92, 89, 76, 91, 90, 83, 86, 83, + 83, 84, 83, 84, 85, 82, 85, 83, 70, 80, 76, 75, 74, 68, 67, 67, 66, 1, 65, + 68, 72, 12, 10, 14, 7, 3, 13, 10, 6, 9, 9, 7, 5, 1, 67, 70, 66, 7, + 76, 9, 23, 8, 7, 15, 15, 15, 10, 13, 12, 18, 0, 67, 70, 38, 38, 43, 33, + 32, 41, 44, 44, 48, 45, 52, 51, 43, 35, 12, 40, 52, 38, 37, 37, 32, 23, 18, + 15, 6, 67, 69, 76, 87, 35, 34, 35, 29, 19, 22, 16, 9, 8, 5, 0, 71, 71, + 82, 85, 68, 69, 83, 17, 19, 13, 3, 11, 10, 64, 6, 5, 66, 77, 76, 84, 97, + 11, 1, 73, 74, 0, 0, 3, 6, 6, 11, 7, 10, 12, 23, 14, 14, 22, 22, 56, + 45, 37, 29, 22, 13, 2, 74, 97, 4, 36, 29, 27, 21, 20, 10, 7, 6, 67, 79, + 74, 66, 73, 2, 10, 69, 72, 2, 6, 4, 6, 12, 4, 2, 54, 42, 29, 17, 7, + 69, 81, 92, 106, 75, 67, 2, 3, 3, 79, 68}, + + { + + 56, 7, 77, 56, 7, 77, 95, 78, 17, 11, 5, 5, 36, 40, 53, 14, 76, 1, 0, + 11, 0, 1, 68, 69, 84, 10, 31, 96, 108, 114, 75, 81, 68, 0, 11, 0, 79, 75, + 9, 8, 65, 74, 83, 2, 74, 80, 90, 5, 72, 82, 4, 73, 77, 88, 4, 68, 69, + 74, 64, 4, 22, 0, 0, 0, 72, 90, 97, 64, 8, 65, 18, 73, 92, 95, 66, 72, + 22, 13, 71, 65, 85, 67, 81, 78, 94, 84, 83, 88, 93, 21, 0, 8, 82, 70, 81, + 78, 100, 4, 66, 65, 75, 17, 68, 64, 79, 82, 71, 73, 69, 7, 8, 64, 12, 15, + 76, 64, 65, 67, 4, 73, 16, 68, 1, 16, 18, 26, 20, 13, 81, 65, 70, 3, 4, + 92, 0, 8, 5, 5, 7, 7, 15, 9, 3, 2, 1, 2, 9, 8, 82, 67, 1, 73, + 6, 6, 14, 7, 3, 9, 6, 4, 6, 14, 5, 76, 3, 79, 23, 21, 16, 14, 17, + 17, 10, 16, 20, 64, 11, 9, 6, 3, 67, 6, 0, 3, 5, 2, 6, 12, 1, 80, + 1, 74, 1, 8, 82, 28, 37, 29, 28, 27, 26, 24, 24, 18, 10, 7, 4, 2, 65, + 89, 68, 64, 77, 15, 11, 8, 10, 5, 0, 0, 0, 75, 69, 74, 92, 86, 97, 69, + 22, 18, 11, 3, 7, 65, 70, 73, 82, 66, 36, 21, 15, 8, 11, 0, 69, 74, 85, + 4, 38, 29, 24, 18, 15, 5, 64, 70, 78, 70, 39, 27, 14, 3, 8, 68, 77, 84, + 5, 49, 38, 30, 22, 21, 5, 66, 72, 76, 62, 91, 88, 75, 90, 89, 82, 85, 82, + 82, 83, 82, 82, 84, 81, 85, 82, 69, 80, 77, 75, 73, 68, 67, 68, 66, 0, 66, + 69, 73, 11, 10, 14, 7, 2, 12, 10, 6, 8, 8, 7, 5, 0, 67, 70, 66, 7, + 76, 8, 22, 7, 7, 14, 14, 15, 9, 12, 12, 18, 64, 68, 72, 37, 37, 42, 32, + 31, 40, 42, 42, 46, 43, 49, 48, 40, 33, 11, 37, 49, 35, 33, 34, 30, 21, 17, + 14, 6, 66, 69, 75, 85, 33, 32, 33, 27, 17, 20, 14, 7, 7, 3, 64, 73, 72, + 83, 86, 69, 69, 84, 16, 18, 11, 1, 9, 8, 65, 4, 3, 67, 78, 77, 85, 98, + 10, 0, 74, 74, 0, 0, 4, 6, 7, 12, 8, 11, 13, 24, 15, 15, 23, 23, 55, + 43, 35, 27, 19, 10, 64, 77, 99, 4, 36, 29, 27, 21, 21, 10, 7, 6, 67, 79, + 74, 65, 73, 3, 11, 69, 71, 3, 7, 4, 6, 13, 4, 2, 53, 40, 27, 14, 4, + 73, 85, 95, 109, 75, 67, 2, 3, 3, 78, 66}, + + { + + 55, 7, 77, 55, 7, 77, 93, 76, 18, 11, 4, 3, 34, 39, 53, 14, 74, 1, 1, + 12, 0, 0, 68, 70, 85, 10, 29, 97, 109, 114, 72, 80, 68, 1, 12, 0, 78, 74, + 10, 8, 65, 73, 82, 1, 75, 80, 90, 5, 72, 82, 4, 73, 77, 88, 5, 68, 69, + 74, 0, 4, 22, 0, 0, 0, 72, 90, 97, 0, 7, 65, 18, 73, 92, 93, 64, 71, + 24, 15, 70, 64, 83, 66, 80, 77, 92, 83, 83, 87, 92, 21, 0, 8, 82, 70, 81, + 77, 98, 4, 67, 66, 76, 17, 68, 64, 78, 82, 70, 73, 68, 7, 8, 64, 12, 14, + 76, 64, 65, 67, 4, 73, 15, 68, 1, 15, 17, 25, 19, 12, 81, 64, 70, 2, 4, + 91, 0, 8, 5, 5, 7, 7, 16, 9, 3, 2, 2, 2, 8, 8, 83, 67, 1, 73, + 5, 5, 14, 7, 2, 9, 6, 4, 4, 14, 4, 76, 3, 79, 22, 21, 16, 14, 17, + 17, 10, 16, 20, 64, 10, 9, 6, 3, 67, 5, 64, 2, 4, 1, 5, 11, 1, 80, + 0, 74, 0, 6, 82, 27, 35, 27, 26, 25, 24, 22, 22, 16, 8, 6, 3, 0, 67, + 90, 68, 64, 77, 14, 10, 7, 8, 4, 65, 64, 64, 76, 69, 74, 92, 86, 97, 68, + 22, 18, 11, 3, 7, 65, 70, 72, 81, 66, 36, 21, 15, 8, 12, 1, 68, 73, 83, + 4, 38, 29, 24, 18, 16, 5, 64, 70, 78, 70, 39, 27, 13, 3, 8, 68, 77, 83, + 5, 49, 38, 29, 21, 21, 5, 66, 71, 75, 62, 90, 87, 75, 89, 88, 81, 83, 81, + 81, 82, 80, 81, 84, 80, 85, 82, 67, 80, 77, 75, 73, 68, 68, 68, 66, 64, 67, + 70, 74, 11, 9, 14, 6, 2, 12, 9, 6, 7, 8, 6, 5, 64, 67, 71, 66, 7, + 77, 8, 22, 6, 7, 13, 14, 15, 8, 11, 11, 17, 65, 68, 73, 36, 36, 42, 31, + 30, 38, 40, 40, 44, 40, 47, 46, 37, 30, 10, 34, 46, 32, 29, 31, 27, 19, 16, + 13, 6, 65, 69, 74, 83, 31, 30, 31, 25, 15, 18, 13, 5, 5, 2, 65, 74, 74, + 84, 87, 69, 70, 85, 15, 16, 10, 64, 7, 6, 67, 3, 1, 69, 79, 79, 87, 99, + 9, 64, 75, 74, 1, 0, 4, 7, 7, 13, 9, 12, 14, 25, 16, 16, 25, 24, 55, + 42, 33, 24, 17, 7, 67, 80, 101, 5, 37, 30, 28, 21, 22, 11, 7, 7, 66, 79, + 73, 65, 72, 4, 12, 69, 71, 3, 7, 4, 7, 14, 4, 2, 52, 38, 24, 11, 1, + 77, 89, 99, 111, 75, 67, 2, 4, 4, 76, 64}, + + { + + 53, 7, 77, 53, 7, 77, 92, 75, 18, 11, 4, 2, 33, 37, 53, 14, 71, 0, 2, + 12, 0, 64, 68, 71, 86, 9, 27, 99, 110, 115, 69, 79, 68, 2, 12, 0, 78, 73, + 10, 7, 65, 73, 82, 1, 75, 79, 90, 5, 72, 81, 3, 74, 77, 88, 5, 67, 69, + 73, 0, 4, 22, 0, 0, 0, 71, 91, 97, 0, 6, 65, 18, 73, 92, 92, 0, 69, + 25, 16, 69, 0, 82, 64, 79, 76, 90, 83, 82, 87, 91, 21, 0, 8, 81, 70, 80, + 77, 97, 3, 67, 66, 76, 17, 68, 64, 78, 82, 70, 73, 68, 7, 7, 64, 12, 14, + 76, 64, 65, 67, 4, 73, 15, 69, 1, 15, 16, 24, 18, 11, 80, 64, 70, 1, 3, + 91, 64, 8, 4, 5, 7, 7, 17, 10, 3, 1, 2, 1, 7, 7, 84, 67, 1, 74, + 5, 4, 13, 6, 2, 10, 6, 4, 3, 13, 3, 76, 3, 80, 22, 21, 16, 14, 17, + 17, 10, 16, 19, 64, 10, 9, 6, 3, 68, 5, 64, 2, 4, 1, 5, 10, 0, 80, + 0, 74, 64, 5, 82, 25, 33, 25, 24, 23, 22, 20, 20, 14, 6, 4, 1, 66, 69, + 91, 68, 65, 78, 13, 9, 6, 7, 2, 66, 65, 66, 76, 70, 75, 92, 86, 96, 68, + 22, 18, 11, 3, 7, 65, 70, 72, 80, 65, 36, 21, 15, 8, 12, 1, 68, 73, 81, + 4, 38, 29, 24, 18, 16, 5, 64, 70, 77, 70, 39, 26, 13, 3, 8, 68, 77, 83, + 5, 48, 37, 28, 20, 21, 5, 66, 71, 75, 62, 89, 86, 74, 87, 87, 80, 82, 79, + 80, 80, 79, 79, 83, 80, 84, 81, 66, 79, 78, 75, 72, 68, 68, 69, 67, 65, 68, + 71, 75, 10, 9, 14, 6, 1, 11, 9, 6, 6, 7, 6, 4, 65, 68, 71, 66, 7, + 77, 7, 21, 5, 7, 12, 13, 15, 7, 10, 11, 17, 66, 69, 75, 35, 36, 41, 30, + 28, 37, 38, 38, 42, 38, 44, 43, 34, 28, 8, 31, 42, 29, 25, 29, 25, 17, 15, + 11, 6, 64, 68, 73, 81, 30, 28, 29, 24, 13, 16, 11, 3, 4, 0, 67, 76, 75, + 85, 88, 70, 70, 86, 13, 15, 8, 65, 5, 4, 68, 1, 64, 70, 80, 80, 88, 99, + 8, 64, 76, 74, 1, 1, 5, 7, 8, 14, 9, 13, 15, 26, 17, 17, 26, 25, 54, + 40, 31, 22, 14, 4, 70, 83, 104, 5, 37, 30, 28, 22, 22, 11, 8, 7, 66, 78, + 73, 64, 72, 5, 13, 69, 70, 4, 8, 4, 7, 15, 4, 2, 50, 36, 22, 8, 65, + 81, 93, 102, 114, 75, 67, 2, 4, 4, 75, 1}, + + { + + 52, 7, 77, 52, 7, 77, 90, 73, 18, 11, 3, 0, 31, 36, 53, 14, 69, 0, + 3, 13, 0, 64, 67, 72, 87, 9, 25, 101, 111, 115, 66, 77, 68, 3, 13, 0, + 78, 72, 10, 7, 65, 73, 81, 1, 76, 79, 90, 5, 72, 80, 3, 74, 77, 88, + 6, 67, 68, 73, 1, 4, 22, 0, 0, 0, 71, 91, 97, 1, 5, 65, 18, 72, + 92, 90, 2, 68, 27, 18, 67, 1, 81, 0, 78, 75, 88, 82, 82, 86, 90, 21, + 0, 8, 81, 70, 80, 76, 95, 3, 67, 66, 76, 17, 68, 0, 77, 82, 70, 73, + 67, 7, 7, 64, 12, 14, 75, 64, 64, 67, 4, 73, 14, 69, 1, 14, 15, 24, + 17, 10, 79, 0, 70, 0, 3, 90, 64, 8, 4, 5, 7, 7, 18, 10, 3, 1, + 2, 1, 6, 7, 85, 67, 1, 74, 4, 3, 13, 6, 1, 10, 6, 4, 2, 13, + 2, 76, 3, 80, 21, 21, 16, 14, 17, 17, 10, 16, 19, 64, 10, 9, 6, 3, + 68, 5, 64, 1, 4, 1, 4, 9, 0, 80, 64, 74, 65, 4, 82, 24, 31, 24, + 23, 21, 20, 18, 18, 12, 4, 2, 64, 68, 70, 92, 68, 65, 78, 12, 8, 5, + 6, 1, 68, 66, 67, 77, 70, 75, 92, 86, 96, 68, 22, 18, 11, 3, 7, 64, + 69, 71, 79, 65, 36, 21, 15, 8, 12, 2, 67, 72, 79, 4, 38, 29, 24, 18, + 16, 5, 64, 70, 76, 70, 40, 26, 12, 3, 8, 68, 77, 82, 5, 48, 36, 27, + 19, 21, 5, 66, 71, 74, 62, 88, 85, 73, 86, 86, 79, 81, 78, 79, 79, 77, + 77, 82, 79, 84, 81, 65, 79, 78, 74, 71, 68, 69, 69, 67, 65, 69, 72, 76, + 9, 8, 14, 5, 1, 11, 9, 6, 5, 6, 6, 4, 66, 68, 71, 66, 7, 77, + 6, 21, 5, 7, 11, 12, 15, 6, 9, 10, 16, 67, 70, 77, 34, 35, 41, 29, + 27, 35, 36, 36, 40, 35, 41, 41, 31, 26, 7, 28, 39, 26, 21, 26, 22, 15, + 14, 10, 6, 0, 68, 72, 79, 28, 27, 27, 22, 11, 14, 10, 1, 3, 65, 68, + 78, 77, 86, 89, 70, 70, 87, 12, 13, 6, 67, 4, 3, 70, 0, 65, 72, 81, + 81, 89, 100, 8, 65, 77, 73, 2, 1, 5, 8, 9, 15, 10, 14, 16, 28, 18, + 18, 27, 27, 54, 38, 29, 19, 12, 1, 73, 86, 106, 5, 38, 31, 29, 22, 23, + 11, 8, 8, 65, 78, 73, 0, 71, 6, 14, 68, 69, 4, 8, 4, 8, 16, 4, + 2, 49, 34, 19, 5, 68, 84, 97, 106, 117, 75, 67, 2, 4, 4, 73, 3}, + + { + + 51, 7, 78, 51, 7, 78, 88, 72, 19, 11, 3, 64, 30, 35, 53, 14, 67, 0, + 3, 14, 0, 65, 67, 73, 88, 8, 24, 102, 112, 116, 0, 76, 68, 3, 14, 0, + 77, 71, 11, 6, 64, 72, 80, 0, 76, 79, 90, 5, 71, 80, 3, 74, 76, 88, + 6, 67, 68, 73, 1, 4, 22, 0, 0, 0, 70, 91, 97, 1, 5, 66, 18, 72, + 91, 88, 4, 66, 28, 20, 66, 2, 79, 1, 77, 75, 87, 82, 82, 86, 90, 22, + 0, 8, 80, 69, 79, 76, 94, 3, 68, 67, 77, 17, 68, 0, 77, 82, 69, 73, + 67, 7, 7, 0, 12, 13, 75, 64, 64, 67, 4, 73, 13, 69, 1, 13, 14, 23, + 17, 10, 79, 0, 70, 0, 2, 90, 65, 8, 4, 5, 7, 7, 18, 10, 3, 1, + 3, 1, 5, 7, 85, 68, 1, 75, 4, 2, 13, 6, 0, 10, 6, 4, 0, 13, + 1, 76, 3, 81, 20, 21, 15, 14, 17, 17, 10, 16, 19, 64, 9, 9, 5, 3, + 68, 4, 65, 1, 3, 0, 3, 8, 64, 80, 64, 73, 66, 2, 82, 22, 29, 22, + 21, 19, 18, 16, 16, 10, 2, 1, 65, 70, 72, 93, 68, 65, 79, 11, 7, 4, + 4, 64, 69, 67, 68, 78, 71, 76, 92, 86, 95, 67, 22, 18, 11, 4, 7, 64, + 69, 71, 78, 64, 35, 21, 15, 8, 13, 2, 66, 71, 77, 4, 39, 29, 24, 18, + 17, 5, 64, 69, 76, 70, 40, 26, 12, 3, 8, 67, 76, 81, 5, 47, 36, 26, + 19, 21, 5, 65, 70, 74, 62, 88, 84, 73, 85, 85, 78, 79, 77, 78, 78, 76, + 76, 82, 78, 84, 80, 0, 79, 79, 74, 71, 68, 69, 70, 67, 66, 70, 73, 77, + 9, 8, 14, 5, 0, 10, 8, 6, 5, 6, 5, 4, 66, 68, 72, 66, 7, 78, + 6, 20, 4, 6, 10, 12, 16, 6, 8, 10, 16, 67, 70, 78, 33, 34, 40, 28, + 26, 34, 34, 34, 38, 33, 39, 38, 28, 23, 6, 26, 36, 23, 17, 23, 20, 13, + 13, 9, 6, 1, 68, 70, 76, 26, 25, 25, 20, 9, 12, 8, 64, 1, 66, 69, + 79, 78, 87, 90, 71, 71, 88, 11, 12, 5, 69, 2, 1, 71, 65, 67, 73, 83, + 83, 91, 101, 7, 66, 78, 73, 2, 1, 6, 8, 9, 17, 11, 15, 17, 29, 20, + 19, 29, 28, 53, 37, 27, 17, 9, 64, 76, 88, 108, 6, 38, 31, 29, 22, 24, + 12, 8, 8, 65, 78, 72, 0, 71, 7, 15, 68, 69, 5, 9, 4, 8, 17, 4, + 2, 48, 32, 17, 2, 72, 88, 100, 109, 119, 75, 67, 2, 5, 5, 72, 6}, + + { + + 50, 7, 78, 50, 7, 78, 86, 70, 19, 11, 2, 66, 28, 33, 53, 14, 64, 64, + 4, 14, 0, 66, 67, 74, 89, 8, 22, 104, 113, 116, 3, 75, 68, 4, 14, 0, + 77, 70, 11, 6, 64, 72, 80, 0, 77, 78, 90, 5, 71, 79, 2, 74, 76, 88, + 7, 66, 68, 72, 2, 4, 22, 0, 0, 0, 70, 91, 97, 2, 4, 66, 18, 72, + 91, 87, 6, 65, 30, 22, 65, 3, 78, 3, 76, 74, 85, 81, 81, 85, 89, 22, + 0, 8, 80, 69, 79, 75, 92, 2, 68, 67, 77, 17, 68, 0, 76, 82, 69, 73, + 66, 7, 6, 0, 12, 13, 75, 64, 64, 67, 4, 73, 13, 69, 1, 13, 13, 22, + 16, 9, 78, 1, 70, 64, 2, 89, 65, 8, 3, 5, 7, 7, 19, 11, 3, 1, + 3, 1, 4, 7, 86, 68, 1, 75, 3, 1, 12, 6, 0, 11, 6, 4, 64, 12, + 0, 76, 3, 81, 20, 21, 15, 14, 17, 17, 10, 16, 19, 64, 9, 9, 5, 3, + 68, 4, 65, 0, 3, 0, 3, 7, 64, 80, 65, 73, 67, 1, 82, 21, 27, 20, + 19, 17, 16, 14, 14, 8, 0, 64, 67, 73, 74, 94, 68, 66, 79, 10, 6, 3, + 3, 65, 71, 68, 69, 78, 71, 76, 92, 86, 95, 67, 22, 18, 11, 4, 7, 64, + 69, 70, 77, 64, 35, 21, 15, 8, 13, 3, 65, 71, 75, 4, 39, 29, 24, 18, + 17, 5, 64, 69, 75, 70, 40, 25, 11, 3, 8, 67, 76, 81, 5, 47, 35, 25, + 18, 21, 5, 65, 70, 73, 62, 87, 83, 72, 83, 84, 77, 78, 75, 77, 76, 74, + 74, 81, 77, 83, 80, 1, 78, 79, 74, 70, 68, 70, 70, 68, 67, 71, 74, 78, + 8, 7, 14, 4, 0, 10, 8, 6, 4, 5, 5, 3, 67, 68, 72, 66, 7, 78, + 5, 20, 3, 6, 9, 11, 16, 5, 7, 9, 15, 68, 71, 80, 32, 34, 40, 27, + 24, 32, 32, 32, 36, 30, 36, 36, 25, 21, 4, 23, 32, 20, 13, 21, 17, 11, + 12, 8, 6, 2, 67, 69, 74, 25, 23, 23, 19, 7, 10, 7, 66, 0, 68, 71, + 81, 80, 88, 91, 71, 71, 89, 9, 10, 3, 70, 0, 64, 73, 66, 69, 75, 84, + 84, 92, 101, 6, 66, 79, 73, 3, 2, 6, 9, 10, 18, 12, 16, 18, 30, 21, + 20, 30, 29, 53, 35, 25, 14, 7, 67, 79, 91, 110, 6, 39, 32, 30, 23, 25, + 12, 9, 9, 64, 77, 72, 1, 70, 8, 16, 68, 68, 5, 9, 4, 9, 18, 4, + 2, 46, 30, 14, 64, 75, 92, 104, 113, 122, 75, 67, 2, 5, 5, 70, 8}, + + { + + 48, 6, 78, 48, 6, 78, 85, 69, 19, 11, 2, 67, 27, 32, 53, 14, 1, 64, + 5, 15, 0, 67, 67, 75, 91, 7, 20, 106, 114, 117, 5, 74, 68, 5, 15, 0, + 77, 69, 11, 5, 64, 72, 79, 64, 77, 78, 91, 5, 71, 79, 2, 75, 76, 88, + 7, 66, 68, 72, 2, 4, 22, 0, 0, 0, 69, 92, 97, 2, 3, 66, 18, 72, + 91, 85, 7, 0, 31, 23, 64, 4, 77, 4, 75, 73, 83, 81, 81, 85, 88, 22, + 0, 8, 79, 69, 78, 75, 91, 2, 69, 68, 78, 17, 68, 0, 76, 82, 69, 73, + 66, 6, 6, 0, 12, 12, 75, 64, 64, 67, 3, 73, 12, 70, 1, 12, 11, 21, + 15, 8, 78, 1, 70, 65, 1, 89, 66, 7, 3, 5, 7, 7, 20, 11, 3, 0, + 3, 0, 3, 6, 87, 68, 1, 76, 3, 0, 12, 5, 64, 11, 6, 4, 66, 12, + 64, 76, 3, 82, 19, 20, 15, 14, 17, 16, 9, 16, 18, 65, 8, 9, 5, 2, + 69, 3, 66, 0, 2, 64, 2, 6, 65, 80, 65, 73, 68, 64, 82, 19, 25, 18, + 17, 15, 13, 12, 12, 6, 65, 66, 69, 75, 76, 95, 68, 66, 80, 9, 4, 1, + 1, 67, 72, 70, 71, 79, 72, 77, 92, 86, 94, 67, 22, 18, 11, 4, 7, 64, + 69, 70, 76, 0, 35, 21, 15, 8, 13, 3, 65, 70, 74, 4, 39, 29, 24, 17, + 17, 5, 64, 69, 75, 70, 40, 25, 11, 3, 8, 67, 76, 80, 5, 46, 34, 24, + 17, 20, 5, 65, 70, 73, 62, 86, 82, 72, 82, 83, 77, 77, 74, 76, 75, 73, + 73, 81, 77, 83, 79, 2, 78, 80, 74, 70, 68, 70, 71, 68, 68, 72, 76, 79, + 7, 7, 14, 4, 64, 9, 7, 5, 3, 4, 4, 3, 68, 69, 73, 66, 7, 79, + 4, 19, 2, 6, 8, 10, 16, 4, 6, 9, 15, 69, 72, 82, 31, 33, 39, 25, + 23, 31, 30, 30, 33, 28, 33, 33, 22, 18, 3, 20, 29, 17, 9, 18, 15, 9, + 10, 6, 6, 2, 67, 68, 72, 23, 21, 21, 17, 4, 8, 5, 68, 65, 70, 72, + 83, 81, 89, 92, 72, 72, 90, 8, 9, 1, 72, 65, 66, 74, 68, 71, 76, 85, + 86, 94, 102, 5, 67, 80, 73, 3, 2, 7, 9, 10, 19, 12, 17, 19, 31, 22, + 21, 31, 30, 52, 33, 23, 12, 4, 70, 82, 94, 113, 6, 39, 32, 30, 23, 25, + 12, 9, 9, 64, 77, 72, 1, 70, 9, 17, 68, 68, 6, 10, 4, 9, 18, 4, + 1, 45, 28, 12, 67, 78, 96, 108, 116, 125, 75, 68, 1, 5, 5, 69, 10}, + + { + + 47, 6, 78, 47, 6, 78, 83, 68, 20, 11, 2, 68, 26, 31, 53, 14, 3, 64, + 6, 16, 0, 67, 66, 76, 92, 6, 18, 107, 115, 118, 8, 72, 68, 6, 16, 0, + 76, 68, 12, 4, 64, 71, 78, 64, 77, 78, 91, 5, 71, 78, 2, 75, 76, 88, + 7, 66, 67, 72, 2, 4, 22, 0, 0, 0, 68, 92, 97, 2, 2, 66, 18, 71, + 91, 83, 9, 2, 32, 25, 1, 5, 75, 5, 73, 72, 81, 81, 81, 85, 87, 22, + 0, 8, 78, 69, 77, 74, 90, 2, 69, 68, 78, 17, 68, 1, 75, 81, 68, 73, + 66, 6, 6, 0, 12, 12, 74, 64, 0, 67, 3, 72, 11, 70, 1, 11, 10, 21, + 14, 7, 77, 1, 69, 66, 0, 89, 67, 7, 3, 6, 7, 7, 21, 11, 3, 0, + 4, 0, 3, 6, 88, 68, 1, 77, 3, 64, 12, 5, 65, 11, 6, 4, 67, 12, + 64, 76, 3, 83, 18, 20, 15, 14, 17, 16, 9, 16, 18, 65, 8, 9, 5, 2, + 69, 3, 66, 0, 2, 64, 1, 6, 65, 80, 65, 73, 69, 65, 82, 17, 24, 17, + 16, 13, 11, 11, 11, 4, 67, 67, 70, 77, 77, 96, 68, 66, 81, 8, 3, 0, + 0, 68, 73, 71, 72, 80, 73, 78, 92, 85, 93, 66, 23, 18, 11, 4, 8, 0, + 68, 69, 75, 1, 35, 21, 15, 8, 14, 3, 64, 69, 72, 4, 39, 29, 24, 17, + 18, 5, 64, 69, 74, 70, 41, 25, 11, 3, 9, 67, 76, 79, 5, 46, 34, 24, + 16, 20, 5, 65, 69, 72, 62, 85, 81, 71, 81, 81, 76, 75, 73, 74, 74, 72, + 71, 80, 76, 83, 78, 4, 78, 81, 73, 69, 68, 70, 72, 68, 68, 73, 77, 80, + 7, 7, 14, 4, 64, 8, 7, 5, 2, 4, 4, 3, 69, 69, 73, 65, 8, 79, + 4, 18, 2, 6, 8, 10, 16, 3, 5, 9, 15, 70, 72, 83, 31, 32, 38, 24, + 22, 30, 28, 28, 31, 26, 31, 30, 20, 16, 2, 17, 26, 14, 5, 15, 13, 8, + 9, 5, 6, 3, 67, 67, 70, 21, 20, 19, 15, 2, 7, 4, 70, 66, 71, 73, + 84, 82, 90, 92, 72, 72, 91, 7, 8, 0, 74, 66, 67, 75, 69, 72, 77, 86, + 87, 95, 103, 5, 68, 80, 72, 3, 2, 8, 10, 11, 20, 13, 18, 20, 33, 23, + 22, 33, 32, 51, 32, 21, 10, 1, 73, 85, 97, 115, 7, 39, 32, 31, 23, 26, + 13, 9, 10, 0, 77, 71, 2, 70, 10, 19, 67, 67, 7, 11, 4, 10, 19, 4, + 1, 44, 26, 10, 69, 81, 99, 112, 119, 126, 75, 68, 1, 6, 6, 68, 12}, + + { + + 46, 6, 78, 46, 6, 78, 81, 66, 20, 11, 1, 70, 24, 29, 53, 14, 6, 65, + 7, 16, 0, 68, 66, 77, 93, 6, 16, 109, 116, 118, 11, 71, 68, 7, 16, 0, + 76, 67, 12, 4, 64, 71, 78, 64, 78, 77, 91, 5, 71, 77, 1, 75, 76, 88, + 8, 65, 67, 71, 3, 4, 22, 0, 0, 0, 68, 92, 97, 3, 1, 66, 18, 71, + 91, 82, 11, 3, 34, 27, 2, 6, 74, 7, 72, 71, 79, 80, 80, 84, 86, 22, + 0, 8, 78, 69, 77, 74, 88, 1, 69, 68, 78, 17, 68, 1, 75, 81, 68, 73, + 65, 6, 5, 0, 12, 12, 74, 64, 0, 67, 3, 72, 11, 70, 1, 11, 9, 20, + 13, 6, 76, 2, 69, 67, 0, 88, 67, 7, 2, 6, 7, 7, 22, 12, 3, 0, + 4, 0, 2, 6, 89, 68, 1, 77, 2, 65, 11, 5, 65, 12, 6, 4, 68, 11, + 65, 76, 3, 83, 18, 20, 15, 14, 17, 16, 9, 16, 18, 65, 8, 9, 5, 2, + 69, 3, 66, 64, 2, 64, 1, 5, 66, 80, 66, 73, 70, 66, 82, 16, 22, 15, + 14, 11, 9, 9, 9, 2, 69, 69, 72, 80, 79, 97, 68, 67, 81, 7, 2, 64, + 64, 70, 75, 72, 73, 80, 73, 78, 92, 85, 93, 66, 23, 18, 11, 4, 8, 0, + 68, 69, 74, 1, 35, 21, 15, 8, 14, 4, 0, 69, 70, 4, 39, 29, 24, 17, + 18, 5, 64, 69, 73, 70, 41, 24, 10, 3, 9, 67, 76, 79, 5, 45, 33, 23, + 15, 20, 5, 65, 69, 72, 62, 84, 80, 70, 79, 80, 75, 74, 71, 73, 72, 70, + 69, 79, 75, 82, 78, 5, 77, 81, 73, 68, 68, 71, 72, 69, 69, 74, 78, 81, + 6, 6, 14, 3, 65, 8, 7, 5, 1, 3, 4, 2, 70, 69, 73, 65, 8, 79, + 3, 18, 1, 6, 7, 9, 16, 2, 4, 8, 14, 71, 73, 85, 30, 32, 38, 23, + 20, 28, 26, 26, 29, 23, 28, 28, 17, 14, 0, 14, 22, 11, 1, 13, 10, 6, + 8, 4, 6, 4, 66, 66, 68, 20, 18, 17, 14, 0, 5, 2, 72, 67, 73, 75, + 86, 84, 91, 93, 73, 72, 92, 5, 6, 65, 75, 68, 69, 77, 71, 74, 79, 87, + 88, 96, 103, 4, 68, 81, 72, 4, 3, 8, 10, 12, 21, 14, 19, 21, 34, 24, + 23, 34, 33, 51, 30, 19, 7, 64, 76, 88, 100, 117, 7, 40, 33, 31, 24, 27, + 13, 10, 10, 0, 76, 71, 3, 69, 11, 20, 67, 66, 7, 11, 4, 10, 20, 4, + 1, 42, 24, 7, 72, 84, 103, 116, 123, 126, 75, 68, 1, 6, 6, 66, 14}, + + { + + 45, 6, 79, 45, 6, 79, 79, 65, 21, 11, 1, 71, 23, 28, 53, 14, 8, 65, + 7, 17, 0, 69, 66, 78, 94, 5, 15, 110, 117, 119, 14, 70, 68, 7, 17, 0, + 75, 66, 13, 3, 0, 70, 77, 65, 78, 77, 91, 5, 70, 77, 1, 75, 75, 88, + 8, 65, 67, 71, 3, 4, 22, 0, 0, 0, 67, 92, 97, 3, 1, 67, 18, 71, + 90, 80, 13, 5, 35, 29, 3, 7, 72, 8, 71, 71, 78, 80, 80, 84, 86, 23, + 0, 8, 77, 68, 76, 73, 87, 1, 70, 69, 79, 17, 68, 1, 74, 81, 67, 73, + 65, 6, 5, 1, 12, 11, 74, 64, 0, 67, 3, 72, 10, 70, 1, 10, 8, 19, + 13, 6, 76, 2, 69, 67, 64, 88, 68, 7, 2, 6, 7, 7, 22, 12, 3, 0, + 5, 0, 1, 6, 89, 69, 1, 78, 2, 66, 11, 5, 66, 12, 6, 4, 70, 11, + 66, 76, 3, 84, 17, 20, 14, 14, 17, 16, 9, 16, 18, 65, 7, 9, 4, 2, + 69, 2, 67, 64, 1, 65, 0, 4, 66, 80, 66, 72, 71, 68, 82, 14, 20, 13, + 12, 9, 7, 7, 7, 0, 71, 70, 73, 82, 81, 98, 68, 67, 82, 6, 1, 65, + 66, 71, 76, 73, 74, 81, 74, 79, 92, 85, 92, 65, 23, 18, 11, 5, 8, 0, + 68, 68, 73, 2, 34, 21, 15, 8, 15, 4, 1, 68, 68, 4, 40, 29, 24, 17, + 19, 5, 64, 68, 73, 70, 41, 24, 10, 3, 9, 66, 75, 78, 5, 45, 33, 22, + 15, 20, 5, 64, 68, 71, 62, 84, 79, 70, 78, 79, 74, 72, 70, 72, 71, 69, + 68, 79, 74, 82, 77, 7, 77, 82, 73, 68, 68, 71, 73, 69, 70, 75, 79, 82, + 6, 6, 14, 3, 65, 7, 6, 5, 1, 3, 3, 2, 70, 69, 74, 65, 8, 80, + 3, 17, 0, 5, 6, 9, 17, 2, 3, 8, 14, 71, 73, 86, 29, 31, 37, 22, + 19, 27, 24, 24, 27, 21, 26, 25, 14, 11, 64, 12, 19, 8, 66, 10, 8, 4, + 7, 3, 6, 5, 66, 64, 65, 18, 16, 15, 12, 65, 3, 1, 74, 69, 74, 76, + 87, 85, 92, 94, 73, 73, 93, 4, 5, 66, 77, 70, 71, 78, 72, 76, 80, 89, + 90, 98, 104, 3, 69, 82, 72, 4, 3, 9, 11, 12, 23, 15, 20, 22, 35, 26, + 24, 36, 34, 50, 29, 17, 5, 67, 78, 91, 102, 119, 8, 40, 33, 32, 24, 28, + 14, 10, 11, 1, 76, 70, 3, 69, 12, 21, 67, 66, 8, 12, 4, 11, 21, 4, + 1, 41, 22, 5, 75, 88, 107, 119, 126, 126, 75, 68, 1, 7, 7, 65, 17}, + + { + + 43, 6, 79, 43, 6, 79, 78, 0, 21, 11, 0, 73, 21, 27, 53, 14, 10, 65, + 8, 18, 0, 70, 66, 79, 95, 5, 13, 112, 118, 119, 17, 69, 68, 8, 18, 0, + 75, 65, 13, 3, 0, 70, 76, 65, 79, 77, 91, 5, 70, 76, 1, 76, 75, 88, + 9, 65, 67, 71, 4, 4, 22, 0, 0, 0, 67, 93, 97, 4, 0, 67, 18, 71, + 90, 78, 14, 6, 37, 30, 4, 8, 71, 9, 70, 70, 76, 79, 80, 83, 85, 23, + 0, 8, 77, 68, 76, 73, 85, 1, 70, 69, 79, 17, 68, 1, 74, 81, 67, 73, + 64, 6, 5, 1, 12, 11, 74, 64, 0, 67, 3, 72, 9, 71, 1, 9, 7, 18, + 12, 5, 75, 3, 69, 68, 64, 87, 68, 7, 2, 6, 7, 7, 23, 12, 3, 64, + 5, 64, 0, 5, 90, 69, 1, 78, 1, 67, 11, 4, 67, 12, 6, 4, 71, 11, + 67, 76, 3, 84, 16, 20, 14, 14, 17, 16, 9, 16, 17, 65, 7, 9, 4, 2, + 70, 2, 67, 65, 1, 65, 64, 3, 67, 80, 67, 72, 72, 69, 82, 13, 18, 11, + 10, 7, 5, 5, 5, 65, 73, 72, 75, 84, 83, 99, 68, 67, 82, 5, 0, 66, + 67, 73, 78, 74, 76, 82, 74, 79, 92, 85, 92, 65, 23, 18, 11, 5, 8, 0, + 68, 68, 72, 2, 34, 21, 15, 8, 15, 5, 1, 67, 66, 4, 40, 29, 24, 17, + 19, 5, 64, 68, 72, 70, 41, 24, 9, 3, 9, 66, 75, 77, 5, 44, 32, 21, + 14, 20, 5, 64, 68, 71, 62, 83, 78, 69, 77, 78, 73, 71, 69, 71, 70, 67, + 66, 78, 74, 82, 77, 8, 77, 82, 73, 67, 68, 72, 73, 69, 71, 76, 80, 83, + 5, 5, 14, 2, 66, 7, 6, 5, 0, 2, 3, 2, 71, 70, 74, 65, 8, 80, + 2, 17, 64, 5, 5, 8, 17, 1, 2, 7, 13, 72, 74, 88, 28, 30, 37, 21, + 18, 25, 22, 22, 25, 18, 23, 23, 11, 9, 65, 9, 16, 5, 70, 7, 5, 2, + 6, 1, 6, 6, 66, 0, 0, 16, 14, 13, 10, 67, 1, 64, 76, 70, 76, 77, + 89, 87, 93, 95, 74, 73, 94, 3, 3, 68, 79, 72, 73, 80, 74, 78, 82, 90, + 91, 99, 105, 2, 70, 83, 72, 5, 3, 9, 11, 13, 24, 15, 21, 23, 36, 27, + 25, 37, 35, 50, 27, 15, 2, 69, 81, 94, 105, 122, 8, 41, 34, 32, 24, 28, + 14, 10, 11, 1, 76, 70, 4, 68, 13, 22, 67, 65, 8, 12, 4, 11, 22, 4, + 1, 40, 20, 2, 78, 91, 111, 123, 126, 126, 75, 68, 1, 7, 7, 0, 19}, + + { + + 42, 6, 79, 42, 6, 79, 76, 1, 21, 11, 0, 74, 20, 25, 53, 14, 13, 66, + 9, 18, 0, 70, 65, 80, 96, 4, 11, 114, 119, 120, 20, 67, 68, 9, 18, 0, + 75, 64, 13, 2, 0, 70, 76, 65, 79, 76, 91, 5, 70, 75, 0, 76, 75, 88, + 9, 64, 66, 70, 4, 4, 22, 0, 0, 0, 66, 93, 97, 4, 64, 67, 18, 70, + 90, 77, 16, 8, 38, 32, 6, 9, 70, 11, 69, 69, 74, 79, 79, 83, 84, 23, + 0, 8, 76, 68, 75, 72, 84, 0, 70, 69, 79, 17, 68, 2, 73, 81, 67, 73, + 64, 6, 4, 1, 12, 11, 73, 64, 1, 67, 3, 72, 9, 71, 1, 9, 6, 18, + 11, 4, 74, 3, 69, 69, 65, 87, 69, 7, 1, 6, 7, 7, 24, 13, 3, 64, + 5, 64, 64, 5, 91, 69, 1, 79, 1, 68, 10, 4, 67, 13, 6, 4, 72, 10, + 68, 76, 3, 85, 16, 20, 14, 14, 17, 16, 9, 16, 17, 65, 7, 9, 4, 2, + 70, 2, 67, 65, 1, 65, 64, 2, 67, 80, 67, 72, 73, 70, 82, 11, 16, 10, + 9, 5, 3, 3, 3, 67, 75, 74, 77, 87, 84, 100, 68, 68, 83, 4, 64, 67, + 68, 74, 79, 75, 77, 82, 75, 80, 92, 85, 91, 65, 23, 18, 11, 5, 8, 1, + 67, 67, 71, 3, 34, 21, 15, 8, 15, 5, 2, 67, 64, 4, 40, 29, 24, 17, + 19, 5, 64, 68, 71, 70, 42, 23, 9, 3, 9, 66, 75, 77, 5, 44, 31, 20, + 13, 20, 5, 64, 68, 70, 62, 82, 77, 68, 75, 77, 72, 70, 67, 70, 68, 66, + 64, 77, 73, 81, 76, 9, 76, 83, 72, 66, 68, 72, 74, 70, 71, 77, 81, 84, + 4, 5, 14, 2, 66, 6, 6, 5, 64, 1, 3, 1, 72, 70, 74, 65, 8, 80, + 1, 16, 64, 5, 4, 7, 17, 0, 1, 7, 13, 73, 75, 90, 27, 30, 36, 20, + 16, 24, 20, 20, 23, 16, 20, 20, 8, 7, 67, 6, 12, 2, 74, 5, 3, 0, + 5, 0, 6, 7, 65, 1, 2, 15, 13, 11, 9, 69, 64, 65, 78, 71, 78, 79, + 91, 88, 94, 96, 74, 73, 95, 1, 2, 70, 80, 73, 74, 81, 75, 79, 83, 91, + 92, 100, 105, 2, 70, 84, 71, 5, 4, 10, 12, 14, 25, 16, 22, 24, 38, 28, + 26, 38, 37, 49, 25, 13, 0, 72, 84, 97, 108, 124, 8, 41, 34, 33, 25, 29, + 14, 11, 12, 2, 75, 70, 5, 68, 14, 23, 66, 64, 9, 13, 4, 12, 23, 4, + 1, 38, 18, 0, 81, 94, 114, 126, 126, 126, 75, 68, 1, 7, 7, 1, 21}, + + { + + 41, 6, 79, 41, 6, 79, 74, 3, 22, 11, 64, 76, 18, 24, 53, 14, 15, 66, + 10, 19, 0, 71, 65, 81, 97, 4, 9, 115, 120, 120, 23, 66, 68, 10, 19, 0, + 74, 0, 14, 2, 0, 69, 75, 66, 80, 76, 91, 5, 70, 75, 0, 76, 75, 88, + 10, 64, 66, 70, 5, 4, 22, 0, 0, 0, 66, 93, 97, 5, 65, 67, 18, 70, + 90, 75, 18, 9, 40, 34, 7, 10, 68, 12, 68, 68, 72, 78, 79, 82, 83, 23, + 0, 8, 76, 68, 75, 72, 82, 0, 71, 70, 80, 17, 68, 2, 73, 81, 66, 73, + 0, 6, 4, 1, 12, 10, 73, 64, 1, 67, 3, 72, 8, 71, 1, 8, 5, 17, + 10, 3, 74, 4, 69, 70, 65, 86, 69, 7, 1, 6, 7, 7, 25, 13, 3, 64, + 6, 64, 65, 5, 92, 69, 1, 79, 0, 69, 10, 4, 68, 13, 6, 4, 74, 10, + 69, 76, 3, 85, 15, 20, 14, 14, 17, 16, 9, 16, 17, 65, 6, 9, 4, 2, + 70, 1, 68, 66, 0, 66, 65, 1, 68, 80, 68, 72, 74, 72, 82, 10, 14, 8, + 7, 3, 1, 1, 1, 69, 77, 75, 78, 89, 86, 101, 68, 68, 83, 3, 65, 68, + 70, 76, 81, 76, 78, 83, 75, 80, 92, 85, 91, 64, 23, 18, 11, 5, 8, 1, + 67, 67, 70, 3, 34, 21, 15, 8, 16, 6, 3, 66, 1, 4, 40, 29, 24, 17, + 20, 5, 64, 68, 71, 70, 42, 23, 8, 3, 9, 66, 75, 76, 5, 43, 31, 19, + 12, 20, 5, 64, 67, 70, 62, 81, 76, 68, 74, 76, 71, 68, 66, 69, 67, 64, + 0, 77, 72, 81, 76, 11, 76, 83, 72, 66, 68, 73, 74, 70, 72, 78, 82, 85, + 4, 4, 14, 1, 67, 6, 5, 5, 65, 1, 2, 1, 73, 70, 75, 65, 8, 81, + 1, 16, 65, 5, 3, 7, 17, 64, 0, 6, 12, 74, 75, 91, 26, 29, 36, 19, + 15, 22, 18, 18, 21, 13, 18, 18, 5, 4, 68, 3, 9, 64, 78, 2, 0, 65, + 4, 64, 6, 8, 65, 2, 4, 13, 11, 9, 7, 71, 66, 67, 80, 73, 79, 80, + 92, 90, 95, 97, 75, 74, 96, 0, 0, 71, 82, 75, 76, 83, 77, 81, 85, 92, + 94, 102, 106, 1, 71, 85, 71, 6, 4, 10, 12, 14, 26, 17, 23, 25, 39, 29, + 27, 40, 38, 49, 24, 11, 66, 74, 87, 100, 111, 126, 9, 42, 35, 33, 25, 30, + 15, 11, 12, 2, 75, 69, 5, 67, 15, 24, 66, 64, 9, 13, 4, 12, 24, 4, + 1, 37, 16, 66, 84, 97, 118, 126, 126, 126, 75, 68, 1, 8, 8, 3, 23}, + + { + + 40, 6, 79, 40, 6, 79, 72, 4, 22, 11, 64, 77, 17, 23, 53, 14, 17, 66, + 11, 20, 0, 72, 65, 82, 98, 3, 7, 117, 121, 121, 26, 65, 68, 11, 20, 0, + 74, 1, 14, 1, 0, 69, 74, 66, 80, 76, 91, 5, 70, 74, 0, 76, 75, 88, + 10, 64, 66, 70, 5, 4, 22, 0, 0, 0, 65, 93, 97, 5, 66, 67, 18, 70, + 90, 73, 20, 11, 41, 36, 8, 11, 67, 13, 67, 67, 70, 78, 79, 82, 82, 23, + 0, 8, 75, 68, 74, 71, 81, 0, 71, 70, 80, 17, 68, 2, 72, 81, 66, 73, + 0, 6, 4, 1, 12, 10, 73, 64, 1, 67, 3, 72, 7, 71, 1, 7, 4, 16, + 9, 2, 73, 4, 69, 71, 66, 86, 70, 7, 1, 6, 7, 7, 26, 13, 3, 64, + 6, 64, 66, 5, 93, 69, 1, 80, 0, 70, 10, 4, 69, 13, 6, 4, 75, 10, + 70, 76, 3, 86, 14, 20, 14, 14, 17, 16, 9, 16, 17, 65, 6, 9, 4, 2, + 70, 1, 68, 66, 0, 66, 66, 0, 68, 80, 68, 72, 75, 73, 82, 8, 12, 6, + 5, 1, 64, 64, 64, 71, 79, 77, 80, 91, 88, 102, 68, 68, 84, 2, 66, 69, + 71, 77, 82, 77, 79, 84, 76, 81, 92, 85, 90, 64, 23, 18, 11, 5, 8, 1, + 67, 66, 69, 4, 34, 21, 15, 8, 16, 6, 4, 65, 3, 4, 40, 29, 24, 17, + 20, 5, 64, 68, 70, 70, 42, 23, 8, 3, 9, 66, 75, 75, 5, 43, 30, 18, + 11, 20, 5, 64, 67, 69, 62, 80, 75, 67, 73, 75, 70, 67, 65, 68, 66, 0, + 2, 76, 71, 81, 75, 12, 76, 84, 72, 65, 68, 73, 75, 70, 73, 79, 83, 86, + 3, 4, 14, 1, 67, 5, 5, 5, 66, 0, 2, 1, 74, 70, 75, 65, 8, 81, + 0, 15, 66, 5, 2, 6, 17, 65, 64, 6, 12, 75, 76, 93, 25, 28, 35, 18, + 14, 21, 16, 16, 19, 11, 15, 15, 2, 2, 69, 0, 6, 67, 82, 64, 65, 67, + 3, 65, 6, 9, 65, 3, 6, 11, 9, 7, 5, 73, 68, 68, 82, 74, 81, 81, + 94, 91, 96, 98, 75, 74, 97, 64, 64, 73, 84, 77, 78, 84, 78, 83, 86, 93, + 95, 103, 107, 0, 72, 86, 71, 6, 4, 11, 13, 15, 27, 18, 24, 26, 40, 30, + 28, 41, 39, 48, 22, 9, 68, 77, 90, 103, 114, 126, 9, 42, 35, 34, 25, 31, + 15, 11, 13, 3, 75, 69, 6, 67, 16, 25, 66, 0, 10, 14, 4, 13, 25, 4, + 1, 36, 14, 68, 87, 100, 122, 126, 126, 126, 75, 68, 1, 8, 8, 4, 25}, + + { + + 38, 5, 80, 38, 5, 80, 71, 5, 22, 11, 65, 79, 15, 21, 52, 14, 19, 67, + 11, 20, 64, 73, 65, 84, 100, 2, 5, 119, 122, 122, 28, 64, 69, 11, 20, 64, + 74, 2, 14, 0, 0, 69, 74, 67, 81, 76, 92, 5, 70, 74, 64, 77, 75, 88, + 10, 64, 66, 70, 5, 3, 22, 0, 0, 0, 65, 94, 97, 5, 67, 68, 18, 70, + 90, 72, 21, 12, 42, 37, 9, 12, 66, 14, 66, 67, 69, 78, 79, 82, 82, 23, + 0, 8, 75, 68, 74, 71, 80, 64, 72, 71, 81, 17, 68, 2, 72, 81, 66, 73, + 0, 5, 3, 1, 11, 9, 73, 65, 1, 67, 2, 72, 6, 72, 0, 6, 2, 15, + 8, 1, 73, 4, 69, 72, 67, 86, 71, 6, 0, 6, 7, 7, 26, 13, 3, 65, + 6, 65, 67, 4, 94, 70, 0, 81, 64, 71, 9, 3, 70, 13, 6, 4, 77, 9, + 71, 76, 3, 87, 13, 19, 13, 14, 17, 15, 8, 16, 16, 66, 5, 9, 3, 1, + 71, 0, 69, 67, 64, 67, 67, 64, 69, 80, 69, 72, 76, 75, 82, 6, 10, 4, + 3, 64, 67, 66, 66, 73, 81, 79, 82, 94, 90, 104, 69, 69, 85, 1, 68, 71, + 73, 79, 84, 79, 81, 85, 77, 82, 92, 85, 90, 64, 23, 18, 11, 5, 8, 1, + 67, 66, 68, 4, 33, 21, 15, 8, 16, 6, 4, 65, 4, 3, 40, 29, 23, 16, + 20, 5, 64, 68, 70, 70, 42, 22, 7, 2, 9, 66, 75, 75, 5, 42, 29, 17, + 10, 19, 5, 64, 67, 69, 62, 80, 74, 67, 72, 74, 70, 66, 64, 67, 65, 1, + 3, 76, 71, 81, 75, 13, 76, 85, 72, 65, 68, 74, 76, 71, 74, 80, 85, 88, + 2, 3, 14, 0, 68, 4, 4, 4, 67, 64, 1, 0, 75, 71, 76, 65, 8, 82, + 64, 14, 67, 4, 1, 5, 17, 66, 66, 5, 11, 76, 77, 95, 24, 27, 34, 16, + 12, 19, 14, 14, 16, 8, 12, 12, 64, 64, 71, 66, 2, 70, 87, 67, 68, 69, + 1, 67, 6, 9, 65, 4, 8, 9, 7, 5, 3, 76, 70, 70, 85, 76, 83, 83, + 96, 93, 97, 99, 76, 75, 99, 66, 66, 75, 86, 79, 80, 86, 80, 85, 88, 95, + 97, 105, 108, 64, 73, 87, 71, 6, 4, 11, 13, 15, 28, 18, 25, 26, 41, 31, + 29, 42, 40, 47, 20, 6, 71, 80, 93, 107, 117, 126, 9, 42, 35, 34, 25, 31, + 15, 11, 13, 3, 75, 69, 6, 67, 17, 26, 66, 0, 10, 14, 4, 13, 25, 4, + 0, 34, 11, 71, 90, 104, 126, 126, 126, 126, 75, 69, 0, 8, 8, 5, 27}, + + { + + 37, 5, 80, 37, 5, 80, 69, 7, 23, 12, 65, 80, 14, 20, 52, 14, 22, 67, + 12, 21, 64, 73, 64, 85, 101, 2, 4, 120, 123, 122, 31, 1, 69, 12, 21, 64, + 73, 4, 15, 0, 1, 68, 73, 67, 81, 75, 92, 5, 69, 73, 64, 77, 74, 88, + 11, 0, 65, 69, 6, 3, 22, 0, 0, 0, 64, 94, 97, 6, 67, 68, 18, 69, + 89, 70, 23, 14, 44, 39, 11, 13, 64, 16, 64, 66, 67, 77, 78, 81, 81, 24, + 1, 9, 74, 67, 73, 70, 78, 64, 72, 71, 81, 18, 68, 3, 71, 80, 65, 72, + 1, 5, 3, 2, 11, 9, 72, 65, 2, 67, 2, 71, 6, 72, 0, 6, 1, 15, + 8, 1, 72, 5, 68, 72, 67, 85, 71, 6, 0, 7, 7, 7, 27, 14, 4, 65, + 7, 65, 67, 4, 94, 70, 0, 81, 64, 72, 9, 3, 70, 14, 7, 4, 78, 9, + 71, 75, 3, 87, 13, 19, 13, 14, 17, 15, 8, 16, 16, 66, 5, 9, 3, 1, + 71, 0, 69, 67, 64, 67, 67, 64, 69, 79, 69, 71, 76, 76, 81, 5, 9, 3, + 2, 66, 69, 67, 67, 75, 82, 80, 83, 96, 91, 105, 69, 69, 85, 0, 69, 72, + 74, 80, 85, 80, 82, 85, 77, 82, 91, 84, 89, 0, 24, 18, 11, 6, 9, 2, + 66, 65, 66, 5, 33, 21, 15, 8, 17, 7, 5, 64, 6, 3, 41, 30, 23, 16, + 21, 5, 64, 67, 69, 70, 43, 22, 7, 2, 10, 65, 74, 74, 5, 42, 29, 17, + 10, 19, 5, 0, 66, 68, 62, 79, 73, 66, 70, 72, 69, 64, 1, 65, 0, 3, + 5, 75, 70, 80, 74, 15, 75, 85, 71, 64, 67, 74, 76, 71, 74, 80, 86, 89, + 2, 3, 15, 0, 68, 4, 4, 4, 67, 64, 1, 0, 75, 71, 76, 64, 9, 82, + 64, 14, 67, 4, 1, 5, 18, 66, 67, 5, 11, 76, 77, 96, 24, 27, 34, 15, + 11, 18, 12, 12, 14, 6, 10, 10, 66, 66, 72, 68, 64, 73, 91, 69, 70, 70, + 0, 68, 6, 10, 64, 6, 11, 8, 6, 4, 2, 78, 71, 71, 87, 77, 84, 84, + 97, 94, 98, 99, 76, 75, 100, 67, 67, 76, 87, 80, 81, 87, 81, 86, 89, 96, + 98, 106, 108, 64, 73, 87, 70, 7, 5, 12, 14, 16, 30, 19, 26, 27, 43, 33, + 30, 44, 42, 47, 19, 4, 73, 82, 95, 110, 119, 126, 10, 43, 36, 35, 26, 32, + 16, 12, 14, 4, 74, 68, 7, 66, 19, 28, 65, 1, 11, 15, 5, 14, 26, 4, + 0, 33, 9, 73, 92, 107, 126, 126, 126, 126, 75, 69, 0, 9, 9, 7, 30}, + + { + + 36, 5, 80, 36, 5, 80, 67, 8, 23, 12, 65, 81, 13, 19, 52, 14, 24, + 67, 13, 22, 64, 74, 64, 86, 102, 1, 2, 122, 124, 123, 34, 2, 69, 13, + 22, 64, 73, 5, 15, 64, 1, 68, 72, 67, 81, 75, 92, 5, 69, 72, 64, + 77, 74, 88, 11, 0, 65, 69, 6, 3, 22, 0, 0, 0, 0, 94, 97, 6, + 68, 68, 18, 69, 89, 68, 25, 16, 45, 41, 12, 14, 0, 17, 0, 65, 65, + 77, 78, 81, 80, 24, 1, 9, 73, 67, 72, 70, 77, 64, 72, 71, 81, 18, + 68, 3, 71, 80, 65, 72, 1, 5, 3, 2, 11, 9, 72, 65, 2, 67, 2, + 71, 5, 72, 0, 5, 0, 14, 7, 0, 71, 5, 68, 73, 68, 85, 72, 6, + 0, 7, 7, 7, 28, 14, 4, 65, 7, 65, 68, 4, 95, 70, 0, 82, 64, + 73, 9, 3, 71, 14, 7, 4, 79, 9, 72, 75, 3, 88, 12, 19, 13, 14, + 17, 15, 8, 16, 16, 66, 5, 9, 3, 1, 71, 0, 69, 67, 64, 67, 68, + 65, 70, 79, 69, 71, 77, 77, 81, 3, 7, 1, 0, 68, 71, 69, 69, 77, + 84, 82, 85, 98, 93, 106, 69, 69, 86, 64, 70, 73, 75, 82, 86, 81, 83, + 86, 78, 83, 91, 84, 88, 0, 24, 18, 11, 6, 9, 2, 66, 65, 65, 6, + 33, 21, 15, 8, 17, 7, 6, 0, 8, 3, 41, 30, 23, 16, 21, 5, 64, + 67, 68, 70, 43, 22, 7, 2, 10, 65, 74, 73, 5, 41, 28, 16, 9, 19, + 5, 0, 66, 68, 62, 78, 72, 65, 69, 71, 68, 0, 2, 64, 1, 4, 7, + 74, 69, 80, 73, 16, 75, 86, 71, 0, 67, 74, 77, 71, 75, 81, 87, 90, + 1, 3, 15, 0, 69, 3, 4, 4, 68, 65, 1, 0, 76, 71, 76, 64, 9, + 82, 65, 13, 68, 4, 0, 4, 18, 67, 68, 5, 11, 77, 78, 98, 23, 26, + 33, 14, 10, 17, 10, 10, 12, 4, 7, 7, 69, 68, 73, 71, 67, 76, 95, + 72, 72, 72, 64, 69, 6, 11, 64, 7, 13, 6, 4, 2, 0, 80, 73, 73, + 89, 78, 86, 85, 99, 95, 99, 100, 77, 75, 101, 68, 68, 78, 89, 82, 83, + 88, 83, 88, 90, 97, 99, 107, 109, 65, 74, 88, 70, 7, 5, 13, 14, 17, + 31, 20, 27, 28, 44, 34, 31, 45, 43, 46, 17, 2, 75, 85, 98, 113, 122, + 126, 10, 43, 36, 35, 26, 33, 16, 12, 14, 4, 74, 68, 8, 66, 20, 29, + 65, 2, 12, 16, 5, 14, 27, 4, 0, 32, 7, 75, 95, 110, 126, 126, 126, + 126, 75, 69, 0, 9, 9, 8, 32}, + + { + + 35, 5, 80, 35, 5, 80, 65, 10, 24, 12, 66, 83, 11, 18, 52, 14, 26, + 67, 14, 23, 64, 75, 64, 87, 103, 1, 0, 123, 125, 123, 37, 3, 69, 14, + 23, 64, 72, 6, 16, 64, 1, 67, 71, 68, 82, 75, 92, 5, 69, 72, 64, + 77, 74, 88, 12, 0, 65, 69, 7, 3, 22, 0, 0, 0, 0, 94, 97, 7, + 69, 68, 18, 69, 89, 66, 27, 17, 47, 43, 13, 15, 2, 18, 1, 64, 0, + 76, 78, 80, 79, 24, 1, 9, 73, 67, 72, 69, 75, 64, 73, 72, 82, 18, + 68, 3, 70, 80, 64, 72, 2, 5, 3, 2, 11, 8, 72, 65, 2, 67, 2, + 71, 4, 72, 0, 4, 64, 13, 6, 64, 71, 6, 68, 74, 68, 84, 72, 6, + 0, 7, 7, 7, 29, 14, 4, 65, 8, 65, 69, 4, 96, 70, 0, 82, 65, + 74, 9, 3, 72, 14, 7, 4, 81, 9, 73, 75, 3, 88, 11, 19, 13, 14, + 17, 15, 8, 16, 16, 66, 4, 9, 3, 1, 71, 64, 70, 68, 65, 68, 69, + 66, 70, 79, 70, 71, 78, 79, 81, 2, 5, 64, 65, 70, 73, 71, 71, 79, + 86, 83, 86, 100, 95, 107, 69, 69, 86, 65, 71, 74, 77, 83, 88, 82, 84, + 87, 78, 83, 91, 84, 88, 1, 24, 18, 11, 6, 9, 2, 66, 64, 64, 6, + 33, 21, 15, 8, 18, 8, 7, 1, 10, 3, 41, 30, 23, 16, 22, 5, 64, + 67, 68, 70, 43, 22, 6, 2, 10, 65, 74, 72, 5, 41, 28, 15, 8, 19, + 5, 0, 65, 67, 62, 77, 71, 65, 68, 70, 67, 2, 3, 0, 2, 6, 8, + 74, 68, 80, 73, 18, 75, 86, 71, 0, 67, 75, 77, 71, 76, 82, 88, 91, + 1, 2, 15, 64, 69, 3, 3, 4, 69, 65, 0, 0, 77, 71, 77, 64, 9, + 83, 65, 13, 69, 4, 64, 4, 18, 68, 69, 4, 10, 78, 78, 99, 22, 25, + 33, 13, 9, 15, 8, 8, 10, 1, 5, 5, 72, 71, 74, 74, 70, 79, 99, + 75, 75, 74, 65, 70, 6, 12, 64, 8, 15, 4, 2, 0, 65, 82, 75, 74, + 91, 80, 87, 86, 100, 97, 100, 101, 77, 76, 102, 69, 70, 79, 91, 84, 85, + 90, 84, 90, 92, 98, 101, 109, 110, 66, 75, 89, 70, 8, 5, 13, 15, 17, + 32, 21, 28, 29, 45, 35, 32, 47, 44, 46, 16, 0, 78, 87, 101, 116, 125, + 126, 11, 44, 37, 36, 26, 34, 17, 12, 15, 5, 74, 67, 8, 65, 21, 30, + 65, 2, 12, 16, 5, 15, 28, 4, 0, 31, 5, 78, 98, 113, 126, 126, 126, + 126, 75, 69, 0, 10, 10, 10, 34}, + + { + + 33, 5, 80, 33, 5, 80, 64, 11, 24, 12, 66, 84, 10, 16, 52, 14, 29, + 68, 15, 23, 64, 76, 64, 88, 104, 0, 65, 125, 126, 124, 40, 4, 69, 15, + 23, 64, 72, 7, 16, 65, 1, 67, 71, 68, 82, 74, 92, 5, 69, 71, 65, + 78, 74, 88, 12, 1, 65, 68, 7, 3, 22, 0, 0, 0, 1, 95, 97, 7, + 70, 68, 18, 69, 89, 65, 28, 19, 48, 44, 14, 16, 3, 20, 2, 0, 2, + 76, 77, 80, 78, 24, 1, 9, 72, 67, 71, 69, 74, 65, 73, 72, 82, 18, + 68, 3, 70, 80, 64, 72, 2, 5, 2, 2, 11, 8, 72, 65, 2, 67, 2, + 71, 4, 73, 0, 4, 65, 12, 5, 65, 70, 6, 68, 75, 69, 84, 73, 6, + 64, 7, 7, 7, 30, 15, 4, 66, 8, 66, 70, 3, 97, 70, 0, 83, 65, + 75, 8, 2, 72, 15, 7, 4, 82, 8, 74, 75, 3, 89, 11, 19, 13, 14, + 17, 15, 8, 16, 15, 66, 4, 9, 3, 1, 72, 64, 70, 68, 65, 68, 69, + 67, 71, 79, 70, 71, 79, 80, 81, 0, 3, 66, 67, 72, 75, 73, 73, 81, + 88, 85, 88, 103, 97, 108, 69, 70, 87, 66, 72, 75, 78, 85, 89, 83, 86, + 87, 79, 84, 91, 84, 87, 1, 24, 18, 11, 6, 9, 2, 66, 64, 0, 7, + 33, 21, 15, 8, 18, 8, 7, 1, 12, 3, 41, 30, 23, 16, 22, 5, 64, + 67, 67, 70, 43, 21, 6, 2, 10, 65, 74, 72, 5, 40, 27, 14, 7, 19, + 5, 0, 65, 67, 62, 76, 70, 64, 66, 69, 66, 3, 5, 1, 4, 7, 10, + 73, 68, 79, 72, 19, 74, 87, 71, 1, 67, 75, 78, 72, 77, 83, 89, 92, + 0, 2, 15, 64, 70, 2, 3, 4, 70, 66, 0, 64, 78, 72, 77, 64, 9, + 83, 66, 12, 70, 4, 65, 3, 18, 69, 70, 4, 10, 79, 79, 101, 21, 25, + 32, 12, 7, 14, 6, 6, 8, 64, 2, 2, 75, 73, 76, 77, 74, 82, 103, + 77, 77, 76, 66, 72, 6, 13, 0, 9, 17, 3, 0, 65, 66, 84, 77, 76, + 93, 81, 89, 88, 102, 98, 101, 102, 78, 76, 103, 71, 71, 81, 92, 86, 87, + 91, 86, 92, 93, 99, 102, 110, 110, 67, 75, 90, 70, 8, 6, 14, 15, 18, + 33, 21, 29, 30, 46, 36, 33, 48, 45, 45, 14, 65, 80, 90, 104, 119, 126, + 126, 11, 44, 37, 36, 27, 34, 17, 13, 15, 5, 73, 67, 9, 65, 22, 31, + 65, 3, 13, 17, 5, 15, 29, 4, 0, 29, 3, 80, 101, 116, 126, 126, 126, + 126, 75, 69, 0, 10, 10, 11, 36}, + + { + + 32, 5, 80, 32, 5, 80, 1, 13, 24, 12, 67, 86, 8, 15, 52, 14, 31, + 68, 16, 24, 64, 76, 0, 89, 105, 0, 67, 126, 126, 124, 43, 6, 69, 16, + 24, 64, 72, 8, 16, 65, 1, 67, 70, 68, 83, 74, 92, 5, 69, 70, 65, + 78, 74, 88, 13, 1, 64, 68, 8, 3, 22, 0, 0, 0, 1, 95, 97, 8, + 71, 68, 18, 68, 89, 0, 30, 20, 50, 46, 16, 17, 4, 21, 3, 1, 4, + 75, 77, 79, 77, 24, 1, 9, 72, 67, 71, 68, 72, 65, 73, 72, 82, 18, + 68, 4, 69, 80, 64, 72, 3, 5, 2, 2, 11, 8, 71, 65, 3, 67, 2, + 71, 3, 73, 0, 3, 66, 12, 4, 66, 69, 7, 68, 76, 69, 83, 73, 6, + 64, 7, 7, 7, 31, 15, 4, 66, 8, 66, 71, 3, 98, 70, 0, 83, 66, + 76, 8, 2, 73, 15, 7, 4, 83, 8, 75, 75, 3, 89, 10, 19, 13, 14, + 17, 15, 8, 16, 15, 66, 4, 9, 3, 1, 72, 64, 70, 69, 65, 68, 70, + 68, 71, 79, 71, 71, 80, 81, 81, 64, 1, 67, 68, 74, 77, 75, 75, 83, + 90, 87, 90, 105, 98, 109, 69, 70, 87, 67, 73, 76, 79, 86, 91, 84, 87, + 88, 79, 84, 91, 84, 87, 1, 24, 18, 11, 6, 9, 3, 65, 0, 1, 7, + 33, 21, 15, 8, 18, 9, 8, 2, 14, 3, 41, 30, 23, 16, 22, 5, 64, + 67, 66, 70, 44, 21, 5, 2, 10, 65, 74, 71, 5, 40, 26, 13, 6, 19, + 5, 0, 65, 66, 62, 75, 69, 0, 65, 68, 65, 4, 6, 2, 5, 9, 12, + 72, 67, 79, 72, 20, 74, 87, 70, 2, 67, 76, 78, 72, 77, 84, 90, 93, + 64, 1, 15, 65, 70, 2, 3, 4, 71, 67, 0, 64, 79, 72, 77, 64, 9, + 83, 67, 12, 70, 4, 66, 2, 18, 70, 71, 3, 9, 80, 80, 103, 20, 24, + 32, 11, 6, 12, 4, 4, 6, 67, 64, 0, 78, 75, 77, 80, 77, 85, 107, + 80, 80, 78, 67, 73, 6, 14, 0, 10, 19, 1, 64, 67, 68, 86, 79, 77, + 95, 82, 91, 89, 104, 100, 102, 103, 78, 76, 104, 72, 73, 83, 94, 87, 88, + 93, 87, 93, 95, 100, 103, 111, 111, 67, 76, 91, 69, 9, 6, 14, 16, 19, + 34, 22, 30, 31, 48, 37, 34, 49, 47, 45, 12, 67, 83, 92, 107, 122, 126, + 126, 11, 45, 38, 37, 27, 35, 17, 13, 16, 6, 73, 67, 10, 64, 23, 32, + 64, 4, 13, 17, 5, 16, 30, 4, 0, 28, 1, 83, 104, 119, 126, 126, 126, + 126, 75, 69, 0, 10, 10, 13, 38}, + + { + + 31, 5, 81, 31, 5, 81, 3, 14, 25, 12, 67, 87, 7, 14, 52, 14, 33, + 68, 16, 25, 64, 77, 0, 90, 106, 64, 68, 126, 126, 125, 46, 7, 69, 16, + 25, 64, 71, 9, 17, 66, 2, 66, 69, 69, 83, 74, 92, 5, 68, 70, 65, + 78, 73, 88, 13, 1, 64, 68, 8, 3, 22, 0, 0, 0, 2, 95, 97, 8, + 71, 69, 18, 68, 88, 2, 32, 22, 51, 48, 17, 18, 6, 22, 4, 1, 5, + 75, 77, 79, 77, 25, 1, 9, 71, 66, 70, 68, 71, 65, 74, 73, 83, 18, + 68, 4, 69, 80, 0, 72, 3, 5, 2, 3, 11, 7, 71, 65, 3, 67, 2, + 71, 2, 73, 0, 2, 67, 11, 4, 66, 69, 7, 68, 76, 70, 83, 74, 6, + 64, 7, 7, 7, 31, 15, 4, 66, 9, 66, 72, 3, 98, 71, 0, 84, 66, + 77, 8, 2, 74, 15, 7, 4, 85, 8, 76, 75, 3, 90, 9, 19, 12, 14, + 17, 15, 8, 16, 15, 66, 3, 9, 2, 1, 72, 65, 71, 69, 66, 69, 71, + 69, 72, 79, 71, 70, 81, 83, 81, 66, 64, 69, 70, 76, 79, 77, 77, 85, + 92, 88, 91, 107, 100, 110, 69, 70, 88, 68, 74, 77, 81, 88, 92, 85, 88, + 89, 80, 85, 91, 84, 86, 2, 24, 18, 11, 7, 9, 3, 65, 0, 2, 8, + 32, 21, 15, 8, 19, 9, 9, 3, 16, 3, 42, 30, 23, 16, 23, 5, 64, + 66, 66, 70, 44, 21, 5, 2, 10, 64, 73, 70, 5, 39, 26, 12, 6, 19, + 5, 1, 64, 66, 62, 75, 68, 0, 64, 67, 64, 6, 7, 3, 6, 10, 13, + 72, 66, 79, 71, 22, 74, 88, 70, 2, 67, 76, 79, 72, 78, 85, 91, 94, + 64, 1, 15, 65, 71, 1, 2, 4, 71, 67, 64, 64, 79, 72, 78, 64, 9, + 84, 67, 11, 71, 3, 67, 2, 19, 70, 72, 3, 9, 80, 80, 104, 19, 23, + 31, 10, 5, 11, 2, 2, 4, 69, 66, 66, 81, 78, 78, 82, 80, 88, 111, + 83, 82, 80, 68, 74, 6, 15, 0, 12, 22, 64, 66, 69, 70, 88, 81, 79, + 97, 84, 92, 90, 105, 101, 103, 104, 79, 77, 105, 73, 74, 84, 96, 89, 90, + 94, 89, 95, 96, 102, 105, 113, 112, 68, 77, 92, 69, 9, 6, 15, 16, 19, + 36, 23, 31, 32, 49, 39, 35, 51, 48, 44, 11, 69, 85, 95, 109, 125, 126, + 126, 12, 45, 38, 37, 27, 36, 18, 13, 16, 6, 73, 66, 10, 64, 24, 33, + 64, 4, 14, 18, 5, 16, 31, 4, 0, 27, 64, 85, 107, 123, 126, 126, 126, + 126, 75, 69, 0, 11, 11, 14, 41}, + + { + + 30, 5, 81, 30, 5, 81, 5, 16, 25, 12, 68, 89, 5, 12, 52, 14, 36, + 69, 17, 25, 64, 78, 0, 91, 107, 64, 70, 126, 126, 125, 49, 8, 69, 17, + 25, 64, 71, 10, 17, 66, 2, 66, 69, 69, 84, 73, 92, 5, 68, 69, 66, + 78, 73, 88, 14, 2, 64, 67, 9, 3, 22, 0, 0, 0, 2, 95, 97, 9, + 72, 69, 18, 68, 88, 3, 34, 23, 53, 50, 18, 19, 7, 24, 5, 2, 7, + 74, 76, 78, 76, 25, 1, 9, 71, 66, 70, 67, 69, 66, 74, 73, 83, 18, + 68, 4, 68, 80, 0, 72, 4, 5, 1, 3, 11, 7, 71, 65, 3, 67, 2, + 71, 2, 73, 0, 2, 68, 10, 3, 67, 68, 8, 68, 77, 70, 82, 74, 6, + 65, 7, 7, 7, 32, 16, 4, 66, 9, 66, 73, 3, 99, 71, 0, 84, 67, + 78, 7, 2, 74, 16, 7, 4, 86, 7, 77, 75, 3, 90, 9, 19, 12, 14, + 17, 15, 8, 16, 15, 66, 3, 9, 2, 1, 72, 65, 71, 70, 66, 69, 71, + 70, 72, 79, 72, 70, 82, 84, 81, 67, 66, 71, 72, 78, 81, 79, 79, 87, + 94, 90, 93, 110, 102, 111, 69, 71, 88, 69, 75, 78, 82, 89, 94, 86, 89, + 89, 80, 85, 91, 84, 86, 2, 24, 18, 11, 7, 9, 3, 65, 1, 3, 8, + 32, 21, 15, 8, 19, 10, 10, 3, 18, 3, 42, 30, 23, 16, 23, 5, 64, + 66, 65, 70, 44, 20, 4, 2, 10, 64, 73, 70, 5, 39, 25, 11, 5, 19, + 5, 1, 64, 65, 62, 74, 67, 1, 1, 66, 0, 7, 9, 4, 8, 12, 15, + 71, 65, 78, 71, 23, 73, 88, 70, 3, 67, 77, 79, 73, 79, 86, 92, 95, + 65, 0, 15, 66, 71, 1, 2, 4, 72, 68, 64, 65, 80, 72, 78, 64, 9, + 84, 68, 11, 72, 3, 68, 1, 19, 71, 73, 2, 8, 81, 81, 106, 18, 23, + 31, 9, 3, 9, 0, 0, 2, 72, 69, 68, 84, 80, 80, 85, 84, 91, 115, + 85, 85, 82, 69, 75, 6, 16, 1, 13, 24, 65, 68, 71, 71, 90, 83, 80, + 99, 85, 94, 92, 107, 103, 104, 105, 79, 77, 106, 75, 76, 86, 97, 91, 92, + 96, 90, 97, 98, 103, 106, 114, 112, 69, 77, 93, 69, 10, 7, 15, 17, 20, + 37, 24, 32, 33, 50, 40, 36, 52, 49, 44, 9, 71, 88, 97, 112, 126, 126, + 126, 12, 46, 39, 38, 28, 37, 18, 14, 17, 7, 72, 66, 11, 0, 25, 34, + 64, 5, 14, 18, 5, 17, 32, 4, 0, 25, 66, 88, 110, 126, 126, 126, 126, + 126, 75, 69, 0, 11, 11, 16, 43}, + + { + + 28, 4, 81, 28, 4, 81, 6, 17, 25, 12, 68, 90, 4, 11, 52, 14, 38, + 69, 18, 26, 64, 79, 0, 92, 109, 65, 72, 126, 126, 126, 51, 9, 69, 18, + 26, 64, 71, 11, 17, 67, 2, 66, 68, 70, 84, 73, 93, 5, 68, 69, 66, + 79, 73, 88, 14, 2, 64, 67, 9, 3, 22, 0, 0, 0, 3, 96, 97, 9, + 73, 69, 18, 68, 88, 5, 35, 25, 54, 51, 19, 20, 8, 25, 6, 3, 9, + 74, 76, 78, 75, 25, 1, 9, 70, 66, 69, 67, 68, 66, 75, 74, 84, 18, + 68, 4, 68, 80, 0, 72, 4, 4, 1, 3, 11, 6, 71, 65, 3, 67, 1, + 71, 1, 74, 0, 1, 70, 9, 2, 68, 68, 8, 68, 78, 71, 82, 75, 5, + 65, 7, 7, 7, 33, 16, 4, 67, 9, 67, 74, 2, 100, 71, 0, 85, 67, + 79, 7, 1, 75, 16, 7, 4, 88, 7, 78, 75, 3, 91, 8, 18, 12, 14, + 17, 14, 7, 16, 14, 67, 2, 9, 2, 0, 73, 66, 72, 70, 67, 70, 72, + 71, 73, 79, 72, 70, 83, 86, 81, 69, 68, 73, 74, 80, 84, 81, 81, 89, + 96, 92, 95, 112, 104, 112, 69, 71, 89, 70, 77, 80, 84, 91, 95, 88, 91, + 90, 81, 86, 91, 84, 85, 2, 24, 18, 11, 7, 9, 3, 65, 1, 4, 9, + 32, 21, 15, 8, 19, 10, 10, 4, 19, 3, 42, 30, 23, 15, 23, 5, 64, + 66, 65, 70, 44, 20, 4, 2, 10, 64, 73, 69, 5, 38, 24, 10, 4, 18, + 5, 1, 64, 65, 62, 73, 66, 1, 2, 65, 0, 8, 10, 5, 9, 13, 16, + 71, 65, 78, 70, 24, 73, 89, 70, 3, 67, 77, 80, 73, 80, 87, 94, 96, + 66, 0, 15, 66, 72, 0, 1, 3, 73, 69, 65, 65, 81, 73, 79, 64, 9, + 85, 69, 10, 73, 3, 69, 0, 19, 72, 74, 2, 8, 82, 82, 108, 17, 22, + 30, 7, 2, 8, 65, 65, 64, 74, 72, 71, 87, 83, 81, 88, 87, 94, 119, + 88, 87, 84, 71, 77, 6, 16, 1, 14, 26, 67, 70, 73, 73, 93, 85, 82, + 101, 87, 96, 93, 109, 104, 105, 106, 80, 78, 107, 76, 77, 88, 99, 93, 94, + 97, 92, 99, 99, 104, 108, 116, 113, 70, 78, 94, 69, 10, 7, 16, 17, 20, + 38, 24, 33, 34, 51, 41, 37, 53, 50, 43, 7, 73, 90, 100, 115, 126, 126, + 126, 12, 46, 39, 38, 28, 37, 18, 14, 17, 7, 72, 66, 11, 0, 26, 35, + 64, 5, 15, 19, 5, 17, 32, 4, 64, 24, 68, 90, 113, 126, 126, 126, 126, + 126, 75, 70, 64, 11, 11, 17, 45}, + + { + + 27, 4, 81, 27, 4, 81, 8, 18, 26, 12, 68, 91, 3, 10, 52, 14, + 40, 69, 19, 27, 64, 79, 1, 93, 110, 66, 74, 126, 126, 126, 54, 11, + 69, 19, 27, 64, 70, 12, 18, 68, 2, 65, 67, 70, 84, 73, 93, 5, + 68, 68, 66, 79, 73, 88, 14, 2, 0, 67, 9, 3, 22, 0, 0, 0, + 4, 96, 97, 9, 74, 69, 18, 67, 88, 7, 37, 27, 55, 53, 21, 21, + 10, 26, 8, 4, 11, 74, 76, 78, 74, 25, 1, 9, 69, 66, 68, 66, + 67, 66, 75, 74, 84, 18, 68, 5, 67, 79, 1, 72, 4, 4, 1, 3, + 11, 6, 70, 65, 4, 67, 1, 70, 0, 74, 0, 0, 71, 9, 1, 69, + 67, 8, 67, 79, 72, 82, 76, 5, 65, 8, 7, 7, 34, 16, 4, 67, + 10, 67, 74, 2, 101, 71, 0, 86, 67, 80, 7, 1, 76, 16, 7, 4, + 89, 7, 78, 75, 3, 92, 7, 18, 12, 14, 17, 14, 7, 16, 14, 67, + 2, 9, 2, 0, 73, 66, 72, 70, 67, 70, 73, 71, 73, 79, 72, 70, + 84, 87, 81, 71, 69, 74, 75, 82, 86, 82, 82, 91, 98, 93, 96, 114, + 105, 113, 69, 71, 90, 71, 78, 81, 85, 92, 96, 89, 92, 91, 82, 87, + 91, 83, 84, 3, 25, 18, 11, 7, 10, 4, 64, 2, 5, 10, 32, 21, + 15, 8, 20, 10, 11, 5, 21, 3, 42, 30, 23, 15, 24, 5, 64, 66, + 64, 70, 45, 20, 4, 2, 11, 64, 73, 68, 5, 38, 24, 10, 3, 18, + 5, 1, 0, 64, 62, 72, 65, 2, 3, 0, 1, 10, 11, 7, 10, 14, + 18, 70, 64, 78, 69, 26, 73, 90, 69, 4, 67, 77, 81, 73, 80, 88, + 95, 97, 66, 0, 15, 66, 72, 64, 1, 3, 74, 69, 65, 65, 82, 73, + 79, 0, 10, 85, 69, 9, 73, 3, 69, 0, 19, 73, 75, 2, 8, 83, + 82, 109, 17, 21, 29, 6, 1, 7, 67, 67, 66, 76, 74, 74, 89, 85, + 82, 91, 90, 97, 123, 91, 89, 85, 72, 78, 6, 17, 1, 15, 28, 69, + 71, 75, 75, 95, 86, 83, 103, 88, 97, 94, 110, 105, 106, 106, 80, 78, + 108, 77, 78, 89, 101, 94, 95, 98, 93, 100, 100, 105, 109, 117, 114, 70, + 79, 94, 68, 10, 7, 17, 18, 21, 39, 25, 34, 35, 53, 42, 38, 55, + 52, 42, 6, 75, 92, 103, 118, 126, 126, 126, 13, 46, 39, 39, 28, 38, + 19, 14, 18, 8, 72, 65, 12, 0, 27, 37, 0, 6, 16, 20, 5, 18, + 33, 4, 64, 23, 70, 92, 115, 126, 126, 126, 126, 126, 75, 70, 64, 12, + 12, 18, 47}, + + { + + 26, 4, 81, 26, 4, 81, 10, 20, 26, 12, 69, 93, 1, 8, 52, 14, + 43, 70, 20, 27, 64, 80, 1, 94, 111, 66, 76, 126, 126, 126, 57, 12, + 69, 20, 27, 64, 70, 13, 18, 68, 2, 65, 67, 70, 85, 72, 93, 5, + 68, 67, 67, 79, 73, 88, 15, 3, 0, 66, 10, 3, 22, 0, 0, 0, + 4, 96, 97, 10, 75, 69, 18, 67, 88, 8, 39, 28, 57, 55, 22, 22, + 11, 28, 9, 5, 13, 73, 75, 77, 73, 25, 1, 9, 69, 66, 68, 66, + 65, 67, 75, 74, 84, 18, 68, 5, 67, 79, 1, 72, 5, 4, 0, 3, + 11, 6, 70, 65, 4, 67, 1, 70, 0, 74, 0, 0, 72, 8, 0, 70, + 66, 9, 67, 80, 72, 81, 76, 5, 66, 8, 7, 7, 35, 17, 4, 67, + 10, 67, 75, 2, 102, 71, 0, 86, 68, 81, 6, 1, 76, 17, 7, 4, + 90, 6, 79, 75, 3, 92, 7, 18, 12, 14, 17, 14, 7, 16, 14, 67, + 2, 9, 2, 0, 73, 66, 72, 71, 67, 70, 73, 72, 74, 79, 73, 70, + 85, 88, 81, 72, 71, 76, 77, 84, 88, 84, 84, 93, 100, 95, 98, 117, + 107, 114, 69, 72, 90, 72, 79, 82, 86, 94, 98, 90, 93, 91, 82, 87, + 91, 83, 84, 3, 25, 18, 11, 7, 10, 4, 64, 2, 6, 10, 32, 21, + 15, 8, 20, 11, 12, 5, 23, 3, 42, 30, 23, 15, 24, 5, 64, 66, + 0, 70, 45, 19, 3, 2, 11, 64, 73, 68, 5, 37, 23, 9, 2, 18, + 5, 1, 0, 64, 62, 71, 64, 3, 5, 1, 2, 11, 13, 8, 12, 16, + 20, 69, 0, 77, 69, 27, 72, 90, 69, 5, 67, 78, 81, 74, 81, 89, + 96, 98, 67, 64, 15, 67, 73, 64, 1, 3, 75, 70, 65, 66, 83, 73, + 79, 0, 10, 85, 70, 9, 74, 3, 70, 64, 19, 74, 76, 1, 7, 84, + 83, 111, 16, 21, 29, 5, 64, 5, 69, 69, 68, 79, 77, 76, 92, 87, + 84, 94, 94, 100, 126, 93, 92, 87, 73, 79, 6, 18, 2, 16, 30, 70, + 73, 77, 76, 97, 88, 85, 105, 89, 99, 96, 112, 107, 107, 107, 81, 78, + 109, 79, 80, 91, 102, 96, 97, 100, 95, 102, 102, 106, 110, 118, 114, 71, + 79, 95, 68, 11, 8, 17, 18, 22, 40, 26, 35, 36, 54, 43, 39, 56, + 53, 42, 4, 77, 95, 105, 121, 126, 126, 126, 13, 47, 40, 39, 29, 39, + 19, 15, 18, 8, 71, 65, 13, 1, 28, 38, 0, 7, 16, 20, 5, 18, + 34, 4, 64, 21, 72, 95, 118, 126, 126, 126, 126, 126, 75, 70, 64, 12, + 12, 20, 49}, + + { + + 25, 4, 82, 25, 4, 82, 12, 21, 27, 12, 69, 94, 0, 7, 52, 14, + 45, 70, 20, 28, 64, 81, 1, 95, 112, 67, 77, 126, 126, 126, 60, 13, + 69, 20, 28, 64, 69, 14, 19, 69, 3, 64, 66, 71, 85, 72, 93, 5, + 67, 67, 67, 79, 72, 88, 15, 3, 0, 66, 10, 3, 22, 0, 0, 0, + 5, 96, 97, 10, 75, 70, 18, 67, 87, 10, 41, 30, 58, 57, 23, 23, + 13, 29, 10, 5, 14, 73, 75, 77, 73, 26, 1, 9, 68, 65, 67, 65, + 64, 67, 76, 75, 85, 18, 68, 5, 66, 79, 2, 72, 5, 4, 0, 4, + 11, 5, 70, 65, 4, 67, 1, 70, 64, 74, 0, 64, 73, 7, 0, 70, + 66, 9, 67, 80, 73, 81, 77, 5, 66, 8, 7, 7, 35, 17, 4, 67, + 11, 67, 76, 2, 102, 72, 0, 87, 68, 82, 6, 1, 77, 17, 7, 4, + 92, 6, 80, 75, 3, 93, 6, 18, 11, 14, 17, 14, 7, 16, 14, 67, + 1, 9, 1, 0, 73, 67, 73, 71, 68, 71, 74, 73, 74, 79, 73, 69, + 86, 90, 81, 74, 73, 78, 79, 86, 90, 86, 86, 95, 102, 96, 99, 119, + 109, 115, 69, 72, 91, 73, 80, 83, 88, 95, 99, 91, 94, 92, 83, 88, + 91, 83, 83, 4, 25, 18, 11, 8, 10, 4, 64, 3, 7, 11, 31, 21, + 15, 8, 21, 11, 13, 6, 25, 3, 43, 30, 23, 15, 25, 5, 64, 65, + 0, 70, 45, 19, 3, 2, 11, 0, 72, 67, 5, 37, 23, 8, 2, 18, + 5, 2, 1, 0, 62, 71, 0, 3, 6, 2, 3, 13, 14, 9, 13, 17, + 21, 69, 1, 77, 68, 29, 72, 91, 69, 5, 67, 78, 82, 74, 82, 90, + 97, 99, 67, 64, 15, 67, 73, 65, 0, 3, 75, 70, 66, 66, 83, 73, + 80, 0, 10, 86, 70, 8, 75, 2, 71, 64, 20, 74, 77, 1, 7, 84, + 83, 112, 15, 20, 28, 4, 65, 4, 71, 71, 70, 81, 79, 79, 95, 90, + 85, 96, 97, 103, 126, 96, 94, 89, 74, 80, 6, 19, 2, 18, 33, 72, + 75, 79, 78, 99, 90, 86, 107, 91, 100, 97, 113, 108, 108, 108, 81, 79, + 110, 80, 81, 92, 104, 98, 99, 101, 96, 104, 103, 108, 112, 120, 115, 72, + 80, 96, 68, 11, 8, 18, 19, 22, 42, 27, 36, 37, 55, 45, 40, 58, + 54, 41, 3, 79, 97, 108, 123, 126, 126, 126, 14, 47, 40, 40, 29, 40, + 20, 15, 19, 9, 71, 64, 13, 1, 29, 39, 0, 7, 17, 21, 5, 19, + 35, 4, 64, 20, 74, 97, 121, 126, 126, 126, 126, 126, 75, 70, 64, 13, + 13, 21, 52}, + + { + + 23, 4, 82, 23, 4, 82, 13, 23, 27, 12, 70, 96, 65, 6, 52, 14, + 47, 70, 21, 29, 64, 82, 1, 96, 113, 67, 79, 126, 126, 126, 62, 14, + 69, 21, 29, 64, 69, 15, 19, 69, 3, 64, 65, 71, 86, 72, 93, 5, + 67, 66, 67, 80, 72, 88, 16, 3, 0, 66, 11, 3, 22, 0, 0, 0, + 5, 97, 97, 11, 76, 70, 18, 67, 87, 12, 42, 31, 60, 58, 24, 24, + 14, 30, 11, 6, 16, 72, 75, 76, 72, 26, 1, 9, 68, 65, 67, 65, + 1, 67, 76, 75, 85, 18, 68, 5, 66, 79, 2, 72, 6, 4, 0, 4, + 11, 5, 70, 65, 4, 67, 1, 70, 65, 75, 0, 65, 74, 6, 64, 71, + 65, 10, 67, 81, 73, 80, 77, 5, 66, 8, 7, 7, 36, 17, 4, 68, + 11, 68, 77, 1, 103, 72, 0, 87, 69, 83, 6, 0, 78, 17, 7, 4, + 93, 6, 81, 75, 3, 93, 5, 18, 11, 14, 17, 14, 7, 16, 13, 67, + 1, 9, 1, 0, 74, 67, 73, 72, 68, 71, 75, 74, 75, 79, 74, 69, + 87, 91, 81, 75, 75, 80, 81, 88, 92, 88, 88, 97, 104, 98, 101, 121, + 111, 116, 69, 72, 91, 74, 81, 84, 89, 97, 101, 92, 96, 93, 83, 88, + 91, 83, 83, 4, 25, 18, 11, 8, 10, 4, 64, 3, 8, 11, 31, 21, + 15, 8, 21, 12, 13, 7, 27, 3, 43, 30, 23, 15, 25, 5, 64, 65, + 1, 70, 45, 19, 2, 2, 11, 0, 72, 66, 5, 36, 22, 7, 1, 18, + 5, 2, 1, 0, 62, 70, 1, 4, 7, 3, 4, 14, 15, 10, 14, 19, + 23, 68, 1, 77, 68, 30, 72, 91, 69, 6, 67, 79, 82, 74, 83, 91, + 98, 100, 68, 65, 15, 68, 74, 65, 0, 3, 76, 71, 66, 66, 84, 74, + 80, 0, 10, 86, 71, 8, 76, 2, 72, 65, 20, 75, 78, 0, 6, 85, + 84, 114, 14, 19, 28, 3, 66, 2, 73, 73, 72, 84, 82, 81, 98, 92, + 86, 99, 100, 106, 126, 99, 97, 91, 75, 82, 6, 20, 2, 19, 35, 74, + 77, 81, 80, 101, 92, 88, 109, 92, 102, 98, 115, 110, 109, 109, 82, 79, + 111, 81, 83, 94, 106, 100, 101, 103, 98, 106, 105, 109, 113, 121, 116, 73, + 81, 97, 68, 12, 8, 18, 19, 23, 43, 27, 37, 38, 56, 46, 41, 59, + 55, 41, 1, 81, 100, 110, 126, 126, 126, 126, 14, 48, 41, 40, 29, 40, + 20, 15, 19, 9, 71, 64, 14, 2, 30, 40, 0, 8, 17, 21, 5, 19, + 36, 4, 64, 19, 76, 100, 124, 126, 126, 126, 126, 126, 75, 70, 64, 13, + 13, 23, 54}, + + { + + 22, 4, 82, 22, 4, 82, 15, 24, 27, 12, 70, 97, 66, 4, 52, 14, + 50, 71, 22, 29, 64, 82, 2, 97, 114, 68, 81, 126, 126, 126, 62, 16, + 69, 22, 29, 64, 69, 16, 19, 70, 3, 64, 65, 71, 86, 71, 93, 5, + 67, 65, 68, 80, 72, 88, 16, 4, 1, 65, 11, 3, 22, 0, 0, 0, + 6, 97, 97, 11, 77, 70, 18, 66, 87, 13, 44, 33, 61, 60, 26, 25, + 15, 32, 12, 7, 18, 72, 74, 76, 71, 26, 1, 9, 67, 65, 66, 64, + 2, 68, 76, 75, 85, 18, 68, 6, 65, 79, 2, 72, 6, 4, 64, 4, + 11, 5, 69, 65, 5, 67, 1, 70, 65, 75, 0, 65, 75, 6, 65, 72, + 64, 10, 67, 82, 74, 80, 78, 5, 67, 8, 7, 7, 37, 18, 4, 68, + 11, 68, 78, 1, 104, 72, 0, 88, 69, 84, 5, 0, 78, 18, 7, 4, + 94, 5, 82, 75, 3, 94, 5, 18, 11, 14, 17, 14, 7, 16, 13, 67, + 1, 9, 1, 0, 74, 67, 73, 72, 68, 71, 75, 75, 75, 79, 74, 69, + 88, 92, 81, 77, 77, 81, 82, 90, 94, 90, 90, 99, 106, 100, 103, 124, + 112, 117, 69, 73, 92, 75, 82, 85, 90, 98, 102, 93, 97, 93, 84, 89, + 91, 83, 82, 4, 25, 18, 11, 8, 10, 5, 0, 4, 9, 12, 31, 21, + 15, 8, 21, 12, 14, 7, 29, 3, 43, 30, 23, 15, 25, 5, 64, 65, + 2, 70, 46, 18, 2, 2, 11, 0, 72, 66, 5, 36, 21, 6, 0, 18, + 5, 2, 1, 1, 62, 69, 2, 5, 9, 4, 5, 15, 17, 11, 16, 20, + 25, 67, 2, 76, 67, 31, 71, 92, 68, 7, 67, 79, 83, 75, 83, 92, + 99, 101, 69, 65, 15, 68, 74, 66, 0, 3, 77, 72, 66, 67, 85, 74, + 80, 0, 10, 86, 72, 7, 76, 2, 73, 66, 20, 76, 79, 0, 6, 86, + 85, 116, 13, 19, 27, 2, 68, 1, 75, 75, 74, 86, 85, 84, 101, 94, + 88, 102, 104, 109, 126, 101, 99, 93, 76, 83, 6, 21, 3, 20, 37, 75, + 78, 83, 81, 103, 94, 89, 111, 93, 104, 100, 117, 111, 110, 110, 82, 79, + 112, 83, 84, 96, 107, 101, 102, 104, 99, 107, 106, 110, 114, 122, 116, 73, + 81, 98, 67, 12, 9, 19, 20, 24, 44, 28, 38, 39, 58, 47, 42, 60, + 57, 40, 64, 83, 102, 113, 126, 126, 126, 126, 14, 48, 41, 41, 30, 41, + 20, 16, 20, 10, 70, 64, 15, 2, 31, 41, 1, 9, 18, 22, 5, 20, + 37, 4, 64, 17, 78, 102, 126, 126, 126, 126, 126, 126, 75, 70, 64, 13, + 13, 24, 56}, + + { + + 21, 4, 82, 21, 4, 82, 17, 26, 28, 12, 71, 99, 68, 3, 52, 14, + 52, 71, 23, 30, 64, 83, 2, 98, 115, 68, 83, 126, 126, 126, 62, 17, + 69, 23, 30, 64, 68, 17, 20, 70, 3, 0, 64, 72, 87, 71, 93, 5, + 67, 65, 68, 80, 72, 88, 17, 4, 1, 65, 12, 3, 22, 0, 0, 0, + 6, 97, 97, 12, 78, 70, 18, 66, 87, 15, 46, 34, 62, 62, 27, 26, + 17, 33, 13, 8, 20, 71, 74, 75, 70, 26, 1, 9, 67, 65, 66, 64, + 4, 68, 77, 76, 86, 18, 68, 6, 65, 79, 3, 72, 7, 4, 64, 4, + 11, 4, 69, 65, 5, 67, 1, 70, 66, 75, 0, 66, 76, 5, 66, 73, + 64, 11, 67, 83, 74, 79, 78, 5, 67, 8, 7, 7, 38, 18, 4, 68, + 12, 68, 79, 1, 105, 72, 0, 88, 70, 85, 5, 0, 79, 18, 7, 4, + 96, 5, 83, 75, 3, 94, 4, 18, 11, 14, 17, 14, 7, 16, 13, 67, + 0, 9, 1, 0, 74, 68, 74, 73, 69, 72, 76, 76, 76, 79, 75, 69, + 89, 94, 81, 78, 79, 83, 84, 92, 96, 92, 92, 101, 108, 101, 104, 126, + 114, 118, 69, 73, 92, 76, 83, 86, 92, 100, 104, 94, 98, 94, 84, 89, + 91, 83, 82, 5, 25, 18, 11, 8, 10, 5, 0, 4, 10, 12, 31, 21, + 15, 8, 22, 13, 15, 8, 31, 3, 43, 30, 23, 15, 26, 5, 64, 65, + 2, 70, 46, 18, 1, 2, 11, 0, 72, 65, 5, 35, 21, 5, 64, 18, + 5, 2, 2, 1, 62, 68, 3, 5, 10, 5, 6, 17, 18, 12, 17, 22, + 26, 67, 3, 76, 67, 33, 71, 92, 68, 7, 67, 80, 83, 75, 84, 93, + 100, 102, 69, 66, 15, 69, 75, 66, 64, 3, 78, 72, 67, 67, 86, 74, + 81, 0, 10, 87, 72, 7, 77, 2, 74, 66, 20, 77, 80, 64, 5, 87, + 85, 117, 12, 18, 27, 1, 69, 64, 77, 77, 76, 89, 87, 86, 104, 97, + 89, 105, 107, 112, 126, 104, 102, 95, 77, 84, 6, 22, 3, 21, 39, 77, + 80, 85, 83, 105, 96, 91, 113, 95, 105, 101, 118, 113, 111, 111, 83, 80, + 113, 84, 86, 97, 109, 103, 104, 106, 101, 109, 108, 111, 116, 124, 117, 74, + 82, 99, 67, 13, 9, 19, 20, 24, 45, 29, 39, 40, 59, 48, 43, 62, + 58, 40, 65, 85, 105, 115, 126, 126, 126, 126, 15, 49, 42, 41, 30, 42, + 21, 16, 20, 10, 70, 0, 15, 3, 32, 42, 1, 9, 18, 22, 5, 20, + 38, 4, 64, 16, 80, 105, 126, 126, 126, 126, 126, 126, 75, 70, 64, 14, + 14, 26, 58}, + + { + + 20, 4, 82, 20, 4, 82, 19, 27, 28, 12, 71, 100, 69, 2, 52, 14, + 54, 71, 24, 31, 64, 84, 2, 99, 116, 69, 85, 126, 126, 126, 62, 18, + 69, 24, 31, 64, 68, 18, 20, 71, 3, 0, 0, 72, 87, 71, 93, 5, + 67, 64, 68, 80, 72, 88, 17, 4, 1, 65, 12, 3, 22, 0, 0, 0, + 7, 97, 97, 12, 79, 70, 18, 66, 87, 17, 48, 36, 62, 62, 28, 27, + 18, 34, 14, 9, 22, 71, 74, 75, 69, 26, 1, 9, 66, 65, 65, 0, + 5, 68, 77, 76, 86, 18, 68, 6, 64, 79, 3, 72, 7, 4, 64, 4, + 11, 4, 69, 65, 5, 67, 1, 70, 67, 75, 0, 67, 77, 4, 67, 74, + 0, 11, 67, 84, 75, 79, 79, 5, 67, 8, 7, 7, 39, 18, 4, 68, + 12, 68, 80, 1, 106, 72, 0, 89, 70, 86, 5, 0, 80, 18, 7, 4, + 97, 5, 84, 75, 3, 95, 3, 18, 11, 14, 17, 14, 7, 16, 13, 67, + 0, 9, 1, 0, 74, 68, 74, 73, 69, 72, 77, 77, 76, 79, 75, 69, + 90, 95, 81, 80, 81, 85, 86, 94, 98, 94, 94, 103, 110, 103, 106, 126, + 116, 119, 69, 73, 93, 77, 84, 87, 93, 101, 105, 95, 99, 95, 85, 90, + 91, 83, 81, 5, 25, 18, 11, 8, 10, 5, 0, 5, 11, 13, 31, 21, + 15, 8, 22, 13, 16, 9, 33, 3, 43, 30, 23, 15, 26, 5, 64, 65, + 3, 70, 46, 18, 1, 2, 11, 0, 72, 64, 5, 35, 20, 4, 65, 18, + 5, 2, 2, 2, 62, 67, 4, 6, 11, 6, 7, 18, 19, 13, 18, 23, + 28, 66, 4, 76, 66, 34, 71, 93, 68, 8, 67, 80, 84, 75, 85, 94, + 101, 103, 70, 66, 15, 69, 75, 67, 64, 3, 79, 73, 67, 67, 87, 74, + 81, 0, 10, 87, 73, 6, 78, 2, 75, 67, 20, 78, 81, 64, 5, 88, + 86, 119, 11, 17, 26, 0, 70, 65, 79, 79, 78, 91, 90, 89, 107, 99, + 90, 108, 110, 115, 126, 107, 104, 97, 78, 85, 6, 23, 3, 22, 41, 79, + 82, 87, 85, 107, 98, 92, 115, 96, 107, 102, 120, 114, 112, 112, 83, 80, + 114, 85, 87, 99, 111, 105, 106, 107, 102, 111, 109, 112, 117, 125, 118, 75, + 83, 100, 67, 13, 9, 20, 21, 25, 46, 30, 40, 41, 60, 49, 44, 62, + 59, 39, 67, 87, 107, 118, 126, 126, 126, 126, 15, 49, 42, 42, 30, 43, + 21, 16, 21, 11, 70, 0, 16, 3, 33, 43, 1, 10, 19, 23, 5, 21, + 39, 4, 64, 15, 82, 107, 126, 126, 126, 126, 126, 126, 75, 70, 64, 14, + 14, 27, 60}, + + { + + 18, 3, 83, 18, 3, 83, 20, 28, 28, 12, 72, 102, 71, 0, 51, 14, + 56, 72, 24, 31, 65, 85, 2, 101, 118, 70, 87, 126, 126, 126, 62, 19, + 70, 24, 31, 65, 68, 19, 20, 72, 3, 0, 0, 73, 88, 71, 94, 5, + 67, 64, 69, 81, 72, 88, 17, 4, 1, 65, 12, 2, 22, 0, 0, 0, + 7, 98, 97, 12, 80, 71, 18, 66, 87, 18, 49, 37, 62, 62, 29, 28, + 19, 35, 15, 9, 23, 71, 74, 75, 69, 26, 1, 9, 66, 65, 65, 0, + 6, 69, 78, 77, 87, 18, 68, 6, 64, 79, 3, 72, 7, 3, 65, 4, + 10, 3, 69, 66, 5, 67, 0, 70, 68, 76, 64, 68, 79, 3, 68, 75, + 0, 11, 67, 85, 76, 79, 80, 4, 68, 8, 7, 7, 39, 18, 4, 69, + 12, 69, 81, 0, 107, 73, 64, 90, 71, 87, 4, 64, 81, 18, 7, 4, + 99, 4, 85, 75, 3, 96, 2, 17, 10, 14, 17, 13, 6, 16, 12, 68, + 64, 9, 0, 64, 75, 69, 75, 74, 70, 73, 78, 78, 77, 79, 76, 69, + 91, 97, 81, 82, 83, 87, 88, 96, 101, 96, 96, 105, 112, 105, 108, 126, + 118, 121, 70, 74, 94, 78, 86, 89, 95, 103, 107, 97, 101, 96, 86, 91, + 91, 83, 81, 5, 25, 18, 11, 8, 10, 5, 0, 5, 12, 13, 30, 21, + 15, 8, 22, 13, 16, 9, 34, 2, 43, 30, 22, 14, 26, 5, 64, 65, + 3, 70, 46, 17, 0, 1, 11, 0, 72, 64, 5, 34, 19, 3, 66, 17, + 5, 2, 2, 2, 62, 67, 5, 6, 12, 7, 7, 19, 20, 14, 19, 24, + 29, 66, 4, 76, 66, 35, 71, 94, 68, 8, 67, 81, 85, 76, 86, 95, + 103, 105, 71, 67, 15, 70, 76, 68, 65, 2, 80, 74, 68, 68, 88, 75, + 82, 0, 10, 88, 74, 5, 79, 1, 76, 68, 20, 79, 83, 65, 4, 89, + 87, 121, 10, 16, 25, 65, 72, 67, 81, 81, 81, 94, 93, 92, 110, 102, + 92, 111, 114, 118, 126, 110, 107, 99, 80, 87, 6, 23, 3, 23, 43, 81, + 84, 89, 87, 110, 100, 94, 118, 98, 109, 104, 122, 116, 113, 113, 84, 81, + 116, 87, 89, 101, 113, 107, 108, 109, 104, 113, 111, 114, 119, 126, 119, 76, + 84, 101, 67, 13, 9, 20, 21, 25, 47, 30, 41, 41, 61, 50, 45, 62, + 60, 38, 69, 90, 110, 121, 126, 126, 126, 126, 15, 49, 42, 42, 30, 43, + 21, 16, 21, 11, 70, 0, 16, 3, 34, 44, 1, 10, 19, 23, 5, 21, + 39, 4, 65, 13, 85, 110, 126, 126, 126, 126, 126, 126, 75, 71, 65, 14, + 14, 28, 62}, + + { + + 17, 3, 83, 17, 3, 83, 22, 30, 29, 13, 72, 103, 72, 64, 51, 14, + 59, 72, 25, 32, 65, 85, 3, 102, 119, 70, 88, 126, 126, 126, 62, 21, + 70, 25, 32, 65, 67, 21, 21, 72, 4, 1, 1, 73, 88, 70, 94, 5, + 66, 0, 69, 81, 71, 88, 18, 5, 2, 64, 13, 2, 22, 0, 0, 0, + 8, 98, 97, 13, 80, 71, 18, 65, 86, 20, 51, 39, 62, 62, 31, 29, + 21, 37, 17, 10, 25, 70, 73, 74, 68, 27, 2, 10, 65, 64, 64, 1, + 8, 69, 78, 77, 87, 19, 68, 7, 0, 78, 4, 71, 8, 3, 65, 5, + 10, 3, 68, 66, 6, 67, 0, 69, 68, 76, 64, 68, 80, 3, 68, 75, + 1, 12, 66, 85, 76, 78, 80, 4, 68, 9, 7, 7, 40, 19, 5, 69, + 13, 69, 81, 0, 107, 73, 64, 90, 71, 88, 4, 64, 81, 19, 8, 4, + 100, 4, 85, 74, 3, 96, 2, 17, 10, 14, 17, 13, 6, 16, 12, 68, + 64, 9, 0, 64, 75, 69, 75, 74, 70, 73, 78, 78, 77, 78, 76, 68, + 91, 98, 80, 83, 84, 88, 89, 98, 103, 97, 97, 107, 113, 106, 109, 126, + 119, 122, 70, 74, 94, 79, 87, 90, 96, 104, 108, 98, 102, 96, 86, 91, + 90, 82, 80, 6, 26, 18, 11, 9, 11, 6, 1, 6, 14, 14, 30, 21, + 15, 8, 23, 14, 17, 10, 36, 2, 44, 31, 22, 14, 27, 5, 64, 64, + 4, 70, 47, 17, 0, 1, 12, 1, 71, 0, 5, 34, 19, 3, 66, 17, + 5, 3, 3, 3, 62, 66, 6, 7, 14, 9, 8, 21, 22, 16, 21, 26, + 31, 65, 5, 75, 65, 37, 70, 94, 67, 9, 66, 81, 85, 76, 86, 95, + 104, 106, 71, 67, 16, 70, 76, 68, 65, 2, 80, 74, 68, 68, 88, 75, + 82, 1, 11, 88, 74, 5, 79, 1, 76, 68, 21, 79, 84, 65, 4, 89, + 87, 122, 10, 16, 25, 66, 73, 68, 83, 83, 83, 96, 95, 94, 112, 104, + 93, 113, 117, 121, 126, 112, 109, 100, 81, 88, 6, 24, 4, 25, 46, 82, + 85, 90, 88, 112, 101, 95, 120, 99, 110, 105, 123, 117, 114, 113, 84, 81, + 117, 88, 90, 102, 114, 108, 109, 110, 105, 114, 112, 115, 120, 126, 119, 76, + 84, 101, 66, 14, 10, 21, 22, 26, 49, 31, 42, 42, 62, 52, 46, 62, + 62, 38, 70, 92, 112, 123, 126, 126, 126, 126, 16, 50, 43, 43, 31, 44, + 22, 17, 22, 12, 69, 1, 17, 4, 36, 46, 2, 11, 20, 24, 6, 22, + 40, 4, 65, 12, 87, 112, 126, 126, 126, 126, 126, 126, 75, 71, 65, 15, + 15, 30, 62}, + + { + + 16, 3, 83, 16, 3, 83, 24, 31, 29, 13, 72, 104, 73, 65, 51, 14, + 61, 72, 26, 33, 65, 86, 3, 103, 120, 71, 90, 126, 126, 126, 62, 22, + 70, 26, 33, 65, 67, 22, 21, 73, 4, 1, 2, 73, 88, 70, 94, 5, + 66, 1, 69, 81, 71, 88, 18, 5, 2, 64, 13, 2, 22, 0, 0, 0, + 9, 98, 97, 13, 81, 71, 18, 65, 86, 22, 53, 41, 62, 62, 32, 30, + 22, 38, 18, 11, 27, 70, 73, 74, 67, 27, 2, 10, 64, 64, 0, 1, + 9, 69, 78, 77, 87, 19, 68, 7, 0, 78, 4, 71, 8, 3, 65, 5, + 10, 3, 68, 66, 6, 67, 0, 69, 69, 76, 64, 69, 81, 2, 69, 76, + 2, 12, 66, 86, 77, 78, 81, 4, 68, 9, 7, 7, 41, 19, 5, 69, + 13, 69, 82, 0, 108, 73, 64, 91, 71, 89, 4, 64, 82, 19, 8, 4, + 101, 4, 86, 74, 3, 97, 1, 17, 10, 14, 17, 13, 6, 16, 12, 68, + 64, 9, 0, 64, 75, 69, 75, 74, 70, 73, 79, 79, 78, 78, 76, 68, + 92, 99, 80, 85, 86, 90, 91, 100, 105, 99, 99, 109, 115, 108, 111, 126, + 121, 123, 70, 74, 95, 80, 88, 91, 97, 106, 109, 99, 103, 97, 87, 92, + 90, 82, 79, 6, 26, 18, 11, 9, 11, 6, 1, 6, 15, 15, 30, 21, + 15, 8, 23, 14, 18, 11, 38, 2, 44, 31, 22, 14, 27, 5, 64, 64, + 5, 70, 47, 17, 0, 1, 12, 1, 71, 1, 5, 33, 18, 2, 67, 17, + 5, 3, 3, 3, 62, 65, 7, 8, 15, 10, 9, 22, 23, 17, 22, 27, + 33, 64, 6, 75, 64, 38, 70, 95, 67, 10, 66, 81, 86, 76, 87, 96, + 105, 107, 72, 67, 16, 70, 77, 69, 65, 2, 81, 75, 68, 68, 89, 75, + 82, 1, 11, 88, 75, 4, 80, 1, 77, 69, 21, 80, 85, 65, 4, 90, + 88, 124, 9, 15, 24, 67, 74, 69, 85, 85, 85, 98, 98, 97, 115, 106, + 94, 116, 120, 124, 126, 115, 111, 102, 82, 89, 6, 25, 4, 26, 48, 84, + 87, 92, 90, 114, 103, 97, 122, 100, 112, 106, 125, 118, 115, 114, 85, 81, + 118, 89, 91, 104, 116, 110, 111, 111, 107, 116, 113, 116, 121, 126, 120, 77, + 85, 102, 66, 14, 10, 22, 22, 27, 50, 32, 43, 43, 62, 53, 47, 62, + 62, 37, 72, 94, 114, 126, 126, 126, 126, 126, 16, 50, 43, 43, 31, 45, + 22, 17, 22, 12, 69, 1, 18, 4, 37, 47, 2, 12, 21, 25, 6, 22, + 41, 4, 65, 11, 89, 114, 126, 126, 126, 126, 126, 126, 75, 71, 65, 15, + 15, 31, 62}, + + { + + 15, 3, 83, 15, 3, 83, 26, 33, 30, 13, 73, 106, 75, 66, 51, 14, + 62, 72, 27, 34, 65, 87, 3, 104, 121, 71, 92, 126, 126, 126, 62, 23, + 70, 27, 34, 65, 66, 23, 22, 73, 4, 2, 3, 74, 89, 70, 94, 5, + 66, 1, 69, 81, 71, 88, 19, 5, 2, 64, 14, 2, 22, 0, 0, 0, + 9, 98, 97, 14, 82, 71, 18, 65, 86, 24, 55, 42, 62, 62, 33, 31, + 24, 39, 19, 12, 29, 69, 73, 73, 66, 27, 2, 10, 64, 64, 0, 2, + 11, 69, 79, 78, 88, 19, 68, 7, 1, 78, 5, 71, 9, 3, 65, 5, + 10, 2, 68, 66, 6, 67, 0, 69, 70, 76, 64, 70, 82, 1, 70, 77, + 2, 13, 66, 87, 77, 77, 81, 4, 68, 9, 7, 7, 42, 19, 5, 69, + 14, 69, 83, 0, 109, 73, 64, 91, 72, 90, 4, 64, 83, 19, 8, 4, + 103, 4, 87, 74, 3, 97, 0, 17, 10, 14, 17, 13, 6, 16, 12, 68, + 65, 9, 0, 64, 75, 70, 76, 75, 71, 74, 80, 80, 78, 78, 77, 68, + 93, 101, 80, 86, 88, 92, 93, 102, 107, 101, 101, 111, 117, 109, 112, 126, + 123, 124, 70, 74, 95, 81, 89, 92, 99, 107, 111, 100, 104, 98, 87, 92, + 90, 82, 79, 7, 26, 18, 11, 9, 11, 6, 1, 7, 16, 15, 30, 21, + 15, 8, 24, 15, 19, 12, 40, 2, 44, 31, 22, 14, 28, 5, 64, 64, + 5, 70, 47, 17, 64, 1, 12, 1, 71, 2, 5, 33, 18, 1, 68, 17, + 5, 3, 4, 4, 62, 64, 8, 8, 16, 11, 10, 24, 24, 18, 23, 29, + 34, 64, 7, 75, 64, 40, 70, 95, 67, 10, 66, 82, 86, 76, 88, 97, + 106, 108, 72, 68, 16, 71, 77, 69, 66, 2, 82, 75, 69, 68, 90, 75, + 83, 1, 11, 89, 75, 4, 81, 1, 78, 69, 21, 81, 86, 66, 3, 91, + 88, 125, 8, 14, 24, 68, 75, 71, 87, 87, 87, 101, 100, 99, 118, 109, + 95, 119, 123, 126, 126, 118, 114, 104, 83, 90, 6, 26, 4, 27, 50, 86, + 89, 94, 92, 116, 105, 98, 124, 102, 113, 107, 126, 120, 116, 115, 85, 82, + 119, 90, 93, 105, 118, 112, 113, 113, 108, 118, 115, 117, 123, 126, 121, 78, + 86, 103, 66, 15, 10, 22, 23, 27, 51, 33, 44, 44, 62, 54, 48, 62, + 62, 37, 73, 96, 117, 126, 126, 126, 126, 126, 17, 51, 44, 44, 31, 46, + 23, 17, 23, 13, 69, 2, 18, 5, 38, 48, 2, 12, 21, 25, 6, 23, + 42, 4, 65, 10, 91, 117, 126, 126, 126, 126, 126, 126, 75, 71, 65, 16, + 16, 33, 62}, + + }, + + { + + { + + 62, 9, 74, 62, 9, 74, 126, 104, 10, 9, 12, 38, 62, 62, 54, 22, + 118, 65, 71, 79, 11, 13, 70, 9, 29, 41, 62, 61, 27, 69, 126, 101, + 76, 71, 79, 11, 69, 90, 11, 20, 69, 82, 96, 4, 75, 87, 100, 7, + 74, 85, 4, 81, 86, 95, 66, 77, 70, 86, 72, 2, 22, 0, 0, 0, + 83, 86, 97, 72, 22, 1, 48, 12, 80, 126, 91, 96, 81, 98, 102, 97, + 119, 99, 110, 102, 126, 80, 89, 94, 92, 24, 65, 84, 126, 73, 104, 91, + 126, 8, 7, 8, 2, 10, 68, 74, 88, 103, 91, 89, 92, 76, 87, 110, + 105, 78, 112, 99, 126, 126, 126, 126, 66, 78, 71, 72, 4, 8, 70, 75, + 89, 119, 75, 43, 41, 126, 9, 2, 5, 3, 2, 67, 84, 74, 65, 11, + 6, 2, 69, 70, 8, 71, 5, 2, 22, 38, 31, 20, 16, 19, 12, 17, + 25, 66, 25, 21, 29, 89, 18, 35, 32, 62, 62, 48, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 53, 62, 62, 62, 62, 62, 62, 62, 56, 62, + 62, 62, 27, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 53, 45, + 38, 22, 75, 72, 77, 28, 32, 28, 33, 18, 21, 18, 37, 9, 66, 7, + 73, 67, 116, 112, 71, 2, 10, 66, 77, 80, 84, 87, 126, 101, 24, 10, + 2, 75, 77, 91, 107, 111, 122, 76, 19, 11, 6, 5, 72, 69, 69, 74, + 86, 66, 29, 31, 32, 11, 8, 67, 73, 89, 11, 59, 55, 55, 44, 26, + 2, 73, 70, 78, 62, 126, 124, 110, 126, 124, 105, 121, 117, 102, 117, 116, + 122, 95, 100, 95, 111, 114, 89, 80, 82, 85, 81, 72, 64, 67, 7, 69, + 69, 69, 69, 67, 77, 64, 2, 67, 64, 6, 65, 66, 1, 12, 66, 71, + 75, 70, 72, 3, 26, 16, 28, 26, 22, 22, 15, 22, 22, 4, 13, 23, + 66, 13, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, + 54, 62, 62, 62, 62, 62, 62, 62, 62, 62, 49, 37, 26, 8, 65, 62, + 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 43, 33, 19, 15, 14, + 18, 41, 41, 42, 43, 35, 39, 29, 21, 24, 13, 70, 9, 71, 83, 31, + 14, 9, 85, 81, 77, 81, 80, 73, 74, 83, 71, 67, 2, 66, 66, 4, + 4, 62, 62, 62, 62, 62, 60, 53, 36, 6, 71, 39, 27, 21, 11, 6, + 0, 65, 67, 82, 81, 76, 72, 78, 72, 68, 70, 76, 66, 1, 6, 2, + 3, 9, 5, 62, 62, 62, 62, 62, 60, 53, 36, 6, 75, 65, 4, 67, + 67, 104, 106}, + + { + + 62, 9, 74, 62, 9, 74, 125, 102, 11, 10, 12, 37, 61, 62, 55, 22, + 116, 65, 70, 78, 11, 13, 69, 9, 28, 40, 61, 58, 25, 70, 124, 100, + 75, 70, 78, 11, 69, 89, 11, 20, 68, 81, 95, 4, 75, 86, 99, 7, + 73, 84, 4, 80, 85, 94, 65, 76, 70, 85, 71, 2, 22, 0, 0, 0, + 82, 86, 97, 71, 22, 1, 48, 12, 80, 124, 89, 94, 79, 95, 100, 95, + 117, 97, 108, 100, 124, 80, 88, 93, 91, 24, 65, 83, 124, 72, 103, 90, + 125, 8, 7, 8, 2, 11, 68, 73, 87, 102, 90, 88, 91, 75, 86, 108, + 103, 77, 110, 97, 122, 122, 123, 124, 65, 77, 70, 71, 4, 9, 69, 74, + 88, 116, 74, 41, 40, 124, 9, 3, 5, 4, 3, 66, 82, 73, 64, 11, + 6, 2, 68, 69, 7, 70, 5, 2, 22, 37, 31, 20, 16, 19, 12, 17, + 24, 65, 25, 21, 29, 89, 18, 35, 32, 62, 62, 47, 62, 62, 62, 61, + 62, 62, 62, 62, 62, 62, 52, 62, 62, 62, 62, 62, 62, 62, 54, 62, + 60, 62, 26, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 61, 52, 44, + 37, 21, 75, 72, 77, 28, 31, 27, 32, 17, 20, 17, 36, 8, 66, 6, + 73, 67, 115, 110, 70, 3, 10, 65, 76, 79, 83, 86, 124, 99, 25, 11, + 3, 74, 76, 89, 105, 109, 120, 75, 20, 12, 7, 6, 71, 68, 68, 73, + 85, 66, 30, 31, 32, 11, 9, 66, 73, 88, 11, 59, 55, 54, 43, 26, + 3, 72, 69, 77, 62, 124, 122, 108, 124, 122, 103, 119, 115, 100, 115, 114, + 119, 94, 99, 94, 109, 112, 88, 79, 81, 84, 80, 71, 64, 67, 7, 69, + 69, 69, 68, 66, 76, 0, 2, 66, 0, 6, 64, 65, 1, 12, 65, 70, + 74, 69, 71, 3, 25, 16, 27, 26, 22, 22, 15, 22, 22, 4, 13, 22, + 66, 12, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, + 52, 62, 62, 62, 62, 62, 62, 62, 61, 62, 48, 36, 25, 8, 65, 62, + 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 42, 32, 18, 15, 14, + 17, 40, 40, 41, 41, 34, 38, 28, 20, 23, 12, 70, 8, 71, 83, 30, + 13, 8, 84, 80, 76, 80, 78, 71, 73, 82, 70, 66, 3, 65, 65, 4, + 4, 62, 62, 62, 62, 60, 56, 49, 32, 4, 70, 39, 28, 22, 12, 7, + 1, 64, 66, 81, 80, 75, 71, 77, 71, 67, 69, 75, 65, 2, 6, 3, + 4, 9, 5, 62, 62, 62, 62, 60, 56, 49, 32, 4, 75, 65, 4, 66, + 66, 102, 103}, + + { + + 62, 9, 74, 62, 9, 74, 123, 101, 11, 10, 12, 36, 59, 61, 55, 22, + 114, 65, 70, 77, 11, 12, 69, 8, 26, 39, 58, 54, 22, 72, 121, 99, + 75, 70, 77, 11, 69, 88, 11, 19, 68, 81, 94, 4, 75, 86, 99, 7, + 73, 84, 4, 80, 85, 94, 65, 76, 70, 85, 71, 2, 22, 0, 0, 0, + 81, 86, 97, 71, 21, 1, 47, 12, 80, 122, 88, 93, 77, 93, 99, 94, + 115, 96, 107, 99, 122, 80, 88, 93, 91, 24, 65, 82, 122, 72, 102, 89, + 123, 8, 7, 8, 1, 11, 68, 73, 86, 101, 89, 87, 90, 75, 85, 107, + 102, 76, 109, 96, 117, 118, 120, 121, 65, 77, 70, 71, 4, 9, 69, 74, + 88, 114, 74, 39, 38, 121, 9, 3, 5, 4, 3, 66, 80, 72, 64, 11, + 6, 2, 67, 68, 6, 70, 5, 2, 21, 36, 30, 20, 15, 19, 12, 17, + 23, 65, 24, 20, 28, 89, 18, 34, 31, 62, 62, 46, 60, 62, 62, 59, + 62, 62, 62, 62, 62, 62, 50, 62, 62, 62, 62, 62, 62, 62, 52, 62, + 58, 62, 24, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 59, 50, 42, + 35, 19, 75, 72, 78, 27, 30, 26, 31, 16, 19, 16, 34, 7, 66, 5, + 74, 68, 114, 109, 69, 3, 10, 65, 75, 78, 82, 85, 122, 98, 25, 11, + 3, 73, 75, 88, 103, 107, 118, 74, 21, 13, 8, 7, 70, 68, 68, 73, + 84, 66, 31, 31, 31, 11, 9, 66, 73, 88, 11, 59, 54, 53, 42, 26, + 3, 72, 69, 77, 62, 123, 121, 107, 122, 120, 102, 117, 113, 99, 113, 112, + 117, 93, 98, 94, 108, 110, 88, 79, 81, 83, 80, 71, 64, 67, 6, 69, + 69, 69, 68, 66, 75, 0, 2, 66, 0, 6, 64, 65, 1, 11, 65, 70, + 74, 69, 70, 2, 24, 16, 26, 25, 21, 21, 15, 21, 21, 4, 13, 21, + 66, 11, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, + 50, 62, 62, 62, 62, 62, 62, 62, 59, 59, 46, 34, 24, 7, 66, 62, + 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 40, 30, 16, 14, 13, + 15, 39, 39, 39, 39, 32, 36, 26, 19, 21, 11, 71, 7, 72, 84, 28, + 12, 7, 84, 80, 75, 80, 77, 70, 73, 81, 69, 65, 3, 65, 64, 4, + 4, 62, 62, 62, 62, 57, 52, 45, 28, 1, 70, 39, 28, 22, 12, 8, + 1, 64, 66, 81, 80, 75, 71, 77, 70, 66, 69, 75, 65, 2, 6, 3, + 5, 9, 5, 62, 62, 62, 62, 57, 52, 45, 28, 1, 75, 65, 4, 66, + 66, 101, 101}, + + { + + 62, 9, 74, 62, 9, 74, 121, 99, 12, 10, 11, 34, 57, 60, 55, 22, + 112, 65, 69, 76, 11, 12, 69, 8, 25, 38, 56, 51, 20, 73, 118, 98, + 75, 69, 76, 11, 70, 87, 11, 19, 68, 81, 94, 4, 75, 86, 99, 7, + 73, 83, 4, 80, 84, 94, 65, 76, 70, 85, 71, 2, 22, 0, 0, 0, + 81, 86, 97, 70, 20, 1, 46, 11, 80, 119, 87, 92, 76, 91, 97, 92, + 113, 94, 106, 98, 120, 80, 88, 92, 91, 24, 65, 81, 120, 72, 101, 89, + 121, 8, 6, 7, 1, 11, 68, 72, 86, 100, 88, 87, 89, 74, 84, 105, + 100, 76, 108, 95, 112, 113, 117, 118, 65, 77, 70, 70, 4, 9, 68, 73, + 87, 112, 74, 37, 36, 118, 9, 3, 5, 4, 3, 65, 79, 71, 64, 11, + 6, 2, 67, 67, 5, 70, 5, 1, 21, 35, 30, 20, 15, 19, 12, 17, + 22, 65, 23, 19, 28, 89, 18, 34, 31, 62, 62, 45, 58, 62, 62, 57, + 62, 62, 62, 62, 62, 61, 48, 62, 62, 62, 62, 62, 62, 60, 50, 62, + 56, 62, 22, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 57, 48, 40, + 34, 17, 75, 72, 78, 26, 29, 25, 30, 15, 18, 15, 32, 6, 67, 4, + 75, 68, 114, 107, 68, 4, 10, 65, 74, 78, 82, 85, 120, 97, 25, 11, + 4, 72, 74, 87, 102, 106, 116, 73, 21, 13, 8, 7, 69, 67, 68, 73, + 84, 66, 31, 31, 30, 11, 9, 66, 73, 87, 11, 58, 54, 52, 41, 26, + 3, 72, 69, 77, 62, 122, 119, 106, 121, 119, 101, 115, 111, 98, 112, 110, + 115, 93, 97, 93, 107, 108, 87, 79, 81, 83, 79, 71, 64, 67, 6, 69, + 69, 70, 67, 65, 74, 0, 2, 65, 0, 6, 64, 65, 1, 11, 65, 70, + 74, 69, 70, 1, 23, 16, 25, 24, 20, 21, 15, 20, 20, 4, 13, 20, + 66, 10, 62, 62, 61, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, + 48, 62, 62, 62, 62, 62, 62, 62, 57, 57, 44, 32, 22, 6, 67, 62, + 62, 62, 62, 62, 62, 62, 62, 62, 62, 59, 60, 38, 28, 15, 13, 12, + 14, 37, 37, 37, 37, 31, 34, 24, 18, 20, 10, 72, 6, 73, 85, 27, + 11, 6, 84, 79, 75, 79, 76, 69, 73, 81, 69, 65, 3, 64, 0, 4, + 4, 62, 62, 62, 59, 54, 48, 41, 24, 65, 70, 39, 28, 22, 12, 8, + 2, 64, 66, 80, 80, 75, 70, 76, 69, 65, 69, 74, 65, 2, 6, 3, + 5, 9, 5, 62, 62, 62, 59, 54, 48, 41, 24, 65, 75, 65, 4, 65, + 65, 99, 99}, + + { + + 62, 9, 74, 62, 9, 74, 120, 98, 12, 10, 11, 33, 55, 59, 55, 21, + 110, 65, 69, 75, 10, 11, 69, 7, 23, 37, 53, 47, 17, 75, 115, 97, + 75, 69, 75, 10, 70, 86, 11, 18, 68, 80, 93, 4, 75, 86, 99, 7, + 73, 83, 4, 80, 84, 93, 65, 76, 70, 85, 70, 2, 22, 0, 0, 0, + 80, 87, 97, 70, 19, 1, 45, 11, 80, 117, 86, 91, 74, 89, 96, 91, + 112, 93, 104, 97, 118, 80, 87, 92, 91, 24, 65, 80, 118, 72, 101, 88, + 119, 8, 6, 7, 0, 11, 68, 72, 85, 99, 87, 86, 88, 74, 84, 104, + 99, 75, 107, 94, 107, 109, 114, 115, 65, 76, 70, 70, 4, 9, 68, 73, + 87, 110, 74, 35, 34, 116, 9, 4, 5, 4, 3, 65, 77, 70, 0, 10, + 6, 2, 66, 67, 4, 70, 5, 1, 20, 34, 29, 19, 14, 19, 12, 17, + 21, 65, 22, 18, 27, 89, 17, 33, 30, 62, 62, 44, 56, 62, 62, 55, + 62, 62, 62, 62, 62, 59, 46, 59, 62, 62, 62, 62, 62, 57, 48, 62, + 54, 62, 21, 62, 62, 62, 62, 62, 62, 62, 62, 62, 60, 55, 46, 38, + 32, 15, 75, 72, 79, 25, 28, 24, 28, 14, 16, 14, 31, 5, 67, 3, + 75, 69, 113, 106, 67, 4, 10, 64, 74, 77, 81, 84, 118, 95, 25, 12, + 4, 72, 73, 86, 100, 104, 115, 73, 22, 14, 9, 8, 68, 67, 68, 72, + 83, 66, 32, 31, 30, 10, 9, 66, 73, 87, 11, 58, 53, 51, 40, 26, + 3, 71, 69, 77, 62, 120, 118, 105, 119, 117, 100, 114, 110, 97, 110, 109, + 113, 92, 96, 93, 106, 107, 87, 79, 81, 82, 79, 71, 65, 67, 5, 69, + 69, 70, 67, 65, 73, 0, 2, 65, 0, 6, 64, 65, 1, 10, 65, 70, + 74, 69, 69, 0, 22, 16, 24, 24, 19, 20, 15, 19, 19, 4, 13, 19, + 66, 9, 62, 62, 60, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, + 46, 62, 62, 62, 62, 62, 62, 62, 54, 54, 42, 30, 21, 5, 67, 62, + 62, 62, 62, 62, 62, 62, 62, 62, 62, 57, 57, 36, 26, 13, 12, 12, + 12, 36, 36, 36, 35, 29, 32, 23, 17, 18, 9, 73, 4, 74, 85, 25, + 9, 4, 83, 79, 74, 79, 75, 68, 73, 80, 68, 64, 3, 64, 1, 4, + 4, 62, 62, 62, 56, 50, 44, 36, 20, 68, 69, 39, 28, 22, 12, 9, + 2, 64, 66, 80, 80, 75, 70, 76, 69, 64, 69, 74, 64, 3, 6, 3, + 6, 9, 5, 62, 62, 62, 56, 50, 44, 36, 20, 68, 75, 65, 4, 65, + 65, 98, 97}, + + { + + 62, 9, 74, 62, 9, 74, 118, 96, 12, 10, 10, 32, 53, 58, 55, 21, + 108, 65, 69, 74, 10, 11, 69, 6, 21, 36, 51, 44, 15, 77, 112, 96, + 74, 69, 74, 10, 70, 85, 11, 18, 68, 80, 92, 4, 75, 86, 99, 7, + 73, 83, 4, 80, 83, 93, 65, 76, 70, 85, 70, 2, 22, 0, 0, 0, + 80, 87, 97, 69, 18, 1, 44, 10, 80, 114, 85, 90, 72, 87, 94, 89, + 110, 91, 103, 96, 115, 80, 87, 91, 90, 24, 65, 79, 116, 72, 100, 88, + 117, 8, 5, 6, 0, 11, 68, 71, 85, 98, 86, 86, 87, 73, 83, 102, + 97, 74, 105, 93, 102, 105, 111, 112, 64, 76, 69, 69, 4, 9, 67, 73, + 86, 108, 74, 33, 32, 113, 9, 4, 5, 4, 3, 64, 76, 69, 0, 10, + 6, 2, 66, 66, 3, 69, 5, 0, 20, 33, 29, 19, 14, 19, 12, 17, + 20, 64, 21, 18, 27, 89, 17, 32, 29, 62, 62, 43, 55, 62, 62, 53, + 62, 62, 62, 62, 61, 57, 44, 57, 62, 60, 62, 62, 62, 55, 46, 62, + 52, 62, 19, 62, 62, 62, 62, 62, 62, 62, 62, 61, 58, 53, 44, 37, + 30, 13, 75, 72, 79, 24, 27, 23, 27, 13, 15, 13, 29, 4, 68, 2, + 76, 70, 112, 104, 66, 5, 10, 64, 73, 77, 81, 83, 116, 94, 25, 12, + 5, 71, 72, 85, 99, 103, 113, 72, 23, 15, 10, 8, 67, 66, 67, 72, + 83, 66, 32, 31, 29, 10, 9, 66, 73, 86, 11, 57, 52, 50, 39, 26, + 3, 71, 69, 76, 62, 119, 116, 103, 117, 116, 99, 112, 108, 96, 108, 107, + 111, 91, 95, 92, 105, 105, 87, 79, 80, 82, 78, 71, 65, 67, 5, 69, + 69, 71, 66, 65, 72, 0, 2, 65, 0, 6, 64, 65, 1, 10, 65, 70, + 74, 69, 69, 64, 21, 16, 23, 23, 19, 19, 15, 19, 18, 4, 13, 18, + 66, 8, 62, 62, 59, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, + 44, 62, 62, 62, 62, 62, 62, 61, 52, 52, 40, 29, 19, 5, 68, 62, + 62, 62, 62, 62, 62, 62, 62, 62, 61, 55, 54, 34, 24, 12, 12, 11, + 10, 35, 34, 34, 33, 27, 30, 21, 16, 17, 8, 73, 3, 75, 86, 24, + 8, 3, 83, 79, 73, 78, 74, 67, 72, 79, 68, 64, 3, 0, 2, 4, + 4, 62, 62, 59, 53, 47, 40, 32, 16, 71, 69, 39, 28, 22, 12, 9, + 2, 0, 65, 79, 80, 75, 69, 76, 68, 0, 69, 74, 64, 3, 6, 4, + 6, 9, 5, 62, 62, 59, 53, 47, 40, 32, 16, 71, 75, 65, 4, 65, + 65, 96, 95}, + + { + + 62, 9, 75, 62, 9, 75, 116, 95, 13, 10, 10, 30, 51, 57, 55, 21, + 107, 65, 68, 74, 10, 10, 68, 6, 20, 34, 48, 40, 12, 78, 110, 95, + 74, 68, 74, 10, 71, 85, 11, 17, 68, 80, 92, 4, 75, 85, 98, 7, + 72, 82, 4, 79, 83, 93, 65, 76, 70, 85, 70, 2, 22, 0, 0, 0, + 79, 87, 97, 69, 18, 0, 44, 10, 80, 112, 84, 89, 71, 84, 93, 88, + 108, 90, 102, 95, 113, 80, 87, 91, 90, 24, 65, 78, 113, 72, 99, 87, + 115, 7, 5, 6, 64, 12, 68, 71, 84, 98, 86, 85, 86, 73, 82, 101, + 96, 74, 104, 92, 97, 100, 108, 109, 64, 76, 69, 69, 4, 9, 67, 72, + 86, 106, 73, 31, 30, 110, 9, 4, 5, 4, 4, 64, 74, 68, 0, 10, + 6, 2, 65, 65, 2, 69, 5, 0, 19, 32, 28, 19, 13, 19, 12, 17, + 18, 64, 20, 17, 26, 89, 17, 32, 29, 62, 62, 42, 53, 62, 62, 51, + 62, 62, 62, 62, 57, 55, 43, 55, 62, 58, 62, 62, 62, 52, 44, 62, + 50, 62, 17, 62, 62, 62, 62, 62, 62, 62, 62, 59, 56, 50, 42, 35, + 29, 12, 75, 72, 80, 23, 26, 22, 26, 12, 14, 12, 27, 3, 68, 1, + 77, 70, 112, 103, 65, 5, 10, 64, 72, 76, 80, 83, 114, 93, 26, 12, + 5, 70, 71, 84, 97, 101, 111, 71, 23, 15, 10, 9, 66, 66, 67, 72, + 82, 66, 33, 31, 28, 10, 9, 66, 73, 86, 10, 57, 52, 49, 38, 25, + 3, 71, 69, 76, 62, 118, 115, 102, 116, 114, 98, 110, 106, 95, 107, 105, + 109, 91, 94, 92, 104, 103, 86, 79, 80, 81, 78, 71, 65, 67, 4, 69, + 69, 71, 66, 64, 71, 0, 2, 64, 1, 6, 0, 64, 1, 9, 65, 70, + 74, 69, 68, 65, 20, 16, 22, 22, 18, 19, 15, 18, 18, 4, 12, 16, + 67, 7, 62, 62, 58, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, + 42, 62, 62, 62, 62, 62, 62, 58, 50, 49, 38, 27, 18, 4, 69, 62, + 62, 62, 62, 62, 62, 62, 62, 61, 58, 52, 51, 32, 23, 10, 11, 10, + 9, 33, 33, 32, 31, 26, 28, 19, 15, 15, 7, 74, 2, 76, 87, 22, + 7, 2, 83, 78, 73, 78, 73, 66, 72, 79, 67, 0, 3, 0, 3, 4, + 4, 62, 62, 57, 50, 44, 36, 28, 12, 74, 69, 39, 28, 22, 12, 10, + 3, 0, 65, 79, 79, 74, 69, 75, 67, 1, 68, 73, 64, 3, 6, 4, + 7, 9, 5, 62, 62, 57, 50, 44, 36, 28, 12, 74, 75, 65, 4, 64, + 64, 95, 92}, + + { + + 62, 9, 75, 62, 9, 75, 114, 93, 13, 10, 9, 29, 49, 56, 55, 21, + 105, 65, 68, 73, 9, 10, 68, 5, 18, 33, 46, 37, 10, 80, 107, 94, + 74, 68, 73, 9, 71, 84, 11, 17, 68, 79, 91, 4, 75, 85, 98, 7, + 72, 82, 4, 79, 82, 92, 65, 76, 70, 85, 69, 2, 22, 0, 0, 0, + 79, 87, 97, 68, 17, 0, 43, 9, 80, 109, 83, 88, 69, 82, 91, 86, + 107, 88, 100, 94, 111, 80, 86, 90, 90, 24, 65, 77, 111, 72, 98, 87, + 113, 7, 4, 5, 64, 12, 68, 70, 84, 97, 85, 85, 85, 72, 81, 99, + 94, 73, 103, 91, 92, 96, 105, 106, 64, 75, 69, 68, 4, 9, 66, 72, + 85, 104, 73, 29, 28, 107, 9, 5, 5, 4, 4, 0, 73, 67, 1, 9, + 6, 2, 65, 65, 1, 69, 5, 64, 19, 31, 28, 18, 13, 19, 12, 17, + 17, 64, 19, 16, 26, 89, 17, 31, 28, 60, 62, 41, 51, 62, 62, 49, + 62, 61, 62, 62, 54, 53, 41, 52, 62, 55, 62, 62, 62, 49, 42, 62, + 48, 62, 16, 62, 62, 62, 62, 62, 62, 62, 62, 57, 53, 48, 40, 33, + 27, 10, 75, 72, 80, 22, 25, 21, 24, 11, 13, 11, 26, 2, 69, 0, + 77, 71, 111, 101, 64, 6, 10, 0, 72, 76, 80, 82, 112, 91, 26, 13, + 6, 70, 70, 83, 96, 100, 109, 71, 24, 16, 11, 9, 65, 65, 67, 71, + 82, 66, 33, 31, 28, 9, 9, 66, 73, 85, 10, 56, 51, 48, 37, 25, + 3, 70, 69, 76, 62, 116, 113, 101, 114, 113, 97, 109, 105, 94, 105, 104, + 107, 90, 93, 91, 103, 101, 86, 79, 80, 81, 77, 71, 66, 67, 4, 69, + 69, 72, 65, 64, 70, 0, 2, 64, 1, 6, 0, 64, 1, 9, 65, 70, + 74, 69, 68, 66, 19, 16, 21, 22, 17, 18, 15, 17, 17, 4, 12, 15, + 67, 6, 61, 62, 57, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, + 40, 62, 62, 62, 62, 62, 62, 56, 48, 47, 36, 25, 16, 3, 69, 62, + 62, 62, 62, 62, 62, 62, 62, 59, 56, 50, 48, 30, 21, 9, 10, 10, + 7, 32, 31, 31, 29, 24, 26, 18, 14, 14, 6, 75, 0, 77, 87, 21, + 5, 0, 82, 78, 72, 77, 72, 65, 72, 78, 67, 0, 3, 1, 4, 4, + 4, 62, 62, 54, 47, 40, 32, 24, 8, 77, 68, 39, 28, 22, 12, 10, + 3, 0, 65, 78, 79, 74, 68, 75, 66, 2, 68, 73, 0, 4, 6, 4, + 7, 9, 5, 62, 62, 54, 47, 40, 32, 24, 8, 77, 75, 65, 4, 64, + 64, 93, 90}, + + { + + 62, 8, 75, 62, 8, 75, 113, 92, 13, 10, 9, 27, 46, 55, 55, 20, + 103, 66, 68, 72, 9, 9, 68, 4, 16, 32, 43, 33, 7, 82, 104, 93, + 74, 68, 72, 9, 72, 83, 11, 16, 68, 79, 91, 3, 76, 85, 98, 7, + 72, 82, 4, 79, 82, 92, 65, 76, 70, 85, 69, 2, 22, 0, 0, 0, + 78, 88, 97, 68, 16, 0, 42, 9, 81, 107, 82, 87, 68, 80, 90, 85, + 105, 87, 99, 93, 109, 80, 86, 90, 90, 24, 65, 76, 109, 72, 98, 86, + 111, 7, 4, 5, 65, 12, 68, 70, 83, 96, 84, 84, 85, 72, 81, 98, + 93, 73, 102, 90, 88, 92, 102, 104, 64, 75, 69, 68, 3, 9, 66, 72, + 85, 102, 73, 27, 26, 105, 9, 5, 5, 4, 4, 0, 71, 67, 1, 9, + 5, 2, 64, 64, 64, 69, 5, 64, 18, 29, 27, 18, 12, 19, 12, 16, + 16, 64, 18, 15, 25, 89, 16, 30, 27, 58, 62, 39, 49, 62, 62, 46, + 62, 59, 62, 62, 50, 51, 39, 50, 62, 53, 62, 62, 62, 46, 40, 62, + 46, 62, 14, 62, 62, 62, 62, 62, 62, 62, 60, 55, 51, 46, 38, 31, + 25, 8, 75, 73, 81, 21, 23, 20, 23, 10, 11, 9, 24, 1, 69, 64, + 78, 72, 111, 100, 0, 6, 10, 0, 71, 75, 79, 82, 110, 90, 26, 13, + 6, 69, 69, 82, 94, 98, 108, 70, 24, 16, 11, 10, 64, 65, 67, 71, + 81, 67, 34, 31, 27, 9, 9, 66, 73, 85, 10, 56, 50, 47, 36, 25, + 3, 70, 69, 76, 62, 115, 112, 100, 113, 111, 96, 107, 103, 93, 104, 102, + 105, 90, 93, 91, 102, 100, 86, 79, 80, 80, 77, 71, 66, 67, 3, 69, + 69, 72, 65, 64, 69, 0, 1, 64, 1, 5, 0, 64, 1, 8, 65, 70, + 74, 69, 67, 67, 18, 16, 19, 21, 16, 17, 14, 16, 16, 4, 12, 14, + 67, 4, 60, 60, 56, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 60, + 38, 62, 62, 62, 62, 62, 62, 53, 45, 44, 34, 23, 15, 2, 70, 62, + 62, 62, 62, 62, 62, 62, 62, 56, 53, 47, 45, 28, 19, 7, 9, 9, + 5, 30, 30, 29, 27, 22, 24, 16, 12, 12, 4, 76, 64, 78, 88, 19, + 4, 64, 82, 78, 72, 77, 71, 64, 72, 78, 66, 1, 3, 1, 4, 4, + 3, 62, 60, 51, 44, 37, 28, 19, 3, 80, 68, 39, 28, 22, 12, 11, + 3, 0, 65, 78, 79, 74, 68, 75, 66, 2, 68, 73, 0, 4, 6, 4, + 8, 9, 4, 62, 60, 51, 44, 37, 28, 19, 3, 80, 75, 66, 3, 64, + 64, 92, 88}, + + { + + 62, 8, 75, 62, 8, 75, 111, 91, 14, 10, 9, 26, 44, 54, 56, 20, 101, + 66, 67, 71, 9, 8, 68, 4, 15, 31, 41, 29, 4, 83, 101, 92, 73, 67, + 71, 9, 72, 82, 11, 16, 67, 79, 90, 3, 76, 85, 98, 7, 72, 81, 4, + 79, 82, 92, 65, 76, 70, 84, 69, 2, 22, 0, 0, 0, 77, 88, 97, 68, + 15, 0, 41, 9, 81, 105, 80, 86, 66, 78, 88, 84, 103, 85, 98, 91, 106, + 80, 86, 90, 89, 24, 65, 75, 107, 71, 97, 85, 109, 7, 4, 5, 65, 12, + 68, 70, 82, 95, 83, 83, 84, 71, 80, 97, 91, 72, 100, 89, 83, 87, 98, + 101, 0, 75, 68, 67, 3, 9, 66, 71, 84, 99, 73, 25, 25, 102, 9, 5, + 5, 4, 4, 1, 69, 66, 1, 9, 5, 2, 0, 0, 65, 68, 5, 64, 17, + 28, 26, 18, 11, 19, 12, 16, 15, 0, 17, 15, 24, 89, 16, 30, 27, 56, + 62, 38, 48, 62, 62, 44, 60, 57, 62, 62, 47, 49, 37, 48, 62, 51, 62, + 62, 62, 44, 38, 62, 44, 62, 12, 62, 62, 62, 62, 62, 62, 60, 58, 53, + 49, 44, 37, 30, 24, 6, 75, 73, 81, 21, 22, 19, 22, 9, 10, 8, 22, + 0, 69, 65, 79, 72, 110, 99, 1, 6, 10, 0, 70, 74, 78, 81, 107, 89, + 26, 13, 6, 68, 68, 81, 92, 96, 106, 69, 25, 17, 12, 11, 0, 65, 66, + 71, 80, 67, 35, 31, 26, 9, 10, 65, 73, 84, 10, 56, 50, 46, 35, 25, + 3, 70, 69, 75, 62, 114, 111, 98, 111, 109, 95, 105, 101, 92, 102, 100, 103, + 89, 92, 90, 101, 98, 85, 78, 79, 79, 76, 71, 66, 67, 2, 69, 69, 72, + 65, 0, 68, 1, 1, 0, 1, 5, 0, 64, 1, 7, 65, 69, 73, 69, 66, + 67, 17, 16, 18, 20, 16, 17, 14, 16, 15, 4, 12, 13, 67, 3, 59, 59, + 56, 61, 62, 62, 62, 62, 62, 62, 62, 62, 62, 57, 36, 62, 62, 62, 62, + 62, 62, 50, 43, 42, 33, 22, 14, 2, 71, 62, 62, 62, 62, 62, 62, 62, + 62, 54, 51, 45, 43, 26, 17, 5, 9, 8, 4, 29, 29, 27, 25, 21, 23, + 14, 11, 10, 3, 76, 65, 78, 89, 17, 3, 65, 82, 77, 71, 77, 70, 1, + 71, 77, 65, 2, 3, 2, 5, 4, 3, 62, 58, 49, 41, 34, 24, 15, 64, + 83, 68, 39, 28, 23, 13, 12, 4, 1, 64, 78, 79, 74, 68, 74, 65, 3, + 68, 72, 0, 4, 6, 5, 9, 9, 4, 62, 58, 49, 41, 34, 24, 15, 64, + 83, 75, 66, 3, 0, 0, 91, 86}, + + { + + 62, 8, 75, 62, 8, 75, 109, 89, 14, 10, 8, 25, 42, 53, 56, 20, 99, + 66, 67, 70, 8, 8, 68, 3, 13, 30, 38, 26, 2, 85, 98, 91, 73, 67, + 70, 8, 72, 81, 11, 15, 67, 78, 89, 3, 76, 85, 98, 7, 72, 81, 4, + 79, 81, 91, 65, 76, 70, 84, 68, 2, 22, 0, 0, 0, 77, 88, 97, 67, + 14, 0, 40, 8, 81, 102, 79, 85, 64, 76, 87, 82, 102, 84, 96, 90, 104, + 80, 85, 89, 89, 24, 65, 74, 105, 71, 96, 85, 107, 7, 3, 4, 66, 12, + 68, 69, 82, 94, 82, 83, 83, 71, 79, 95, 90, 71, 99, 88, 78, 83, 95, + 98, 0, 74, 68, 67, 3, 9, 65, 71, 84, 97, 73, 23, 23, 99, 9, 6, + 5, 4, 4, 1, 68, 65, 2, 8, 5, 2, 0, 0, 66, 68, 5, 65, 17, + 27, 26, 17, 11, 19, 12, 16, 14, 0, 16, 14, 24, 89, 16, 29, 26, 54, + 62, 37, 46, 62, 62, 42, 57, 55, 62, 62, 43, 47, 35, 45, 61, 48, 62, + 62, 62, 41, 36, 58, 42, 62, 11, 62, 62, 62, 62, 62, 60, 58, 56, 51, + 46, 42, 35, 28, 22, 4, 75, 73, 82, 20, 21, 18, 20, 8, 9, 7, 21, + 64, 70, 66, 79, 73, 109, 97, 2, 7, 10, 1, 70, 74, 78, 80, 105, 87, + 26, 14, 7, 68, 67, 80, 91, 95, 104, 69, 26, 18, 13, 11, 1, 64, 66, + 70, 80, 67, 35, 31, 26, 8, 10, 65, 73, 84, 10, 55, 49, 45, 34, 25, + 3, 69, 69, 75, 62, 112, 109, 97, 109, 108, 94, 104, 100, 91, 100, 99, 101, + 88, 91, 90, 100, 96, 85, 78, 79, 79, 76, 71, 67, 67, 2, 69, 69, 73, + 64, 0, 67, 1, 1, 0, 1, 5, 0, 64, 1, 7, 65, 69, 73, 69, 66, + 68, 16, 16, 17, 20, 15, 16, 14, 15, 14, 4, 12, 12, 67, 2, 58, 58, + 55, 59, 60, 62, 62, 62, 62, 62, 62, 62, 62, 55, 34, 62, 62, 62, 62, + 62, 62, 48, 41, 39, 31, 20, 12, 1, 71, 62, 62, 62, 62, 62, 62, 62, + 62, 52, 48, 43, 40, 24, 15, 4, 8, 8, 2, 28, 27, 26, 23, 19, 21, + 13, 10, 9, 2, 77, 67, 79, 89, 16, 1, 67, 81, 77, 70, 76, 69, 2, + 71, 76, 65, 2, 3, 2, 6, 4, 3, 62, 56, 46, 38, 30, 20, 11, 68, + 86, 67, 39, 28, 23, 13, 12, 4, 1, 64, 77, 79, 74, 67, 74, 64, 4, + 68, 72, 1, 5, 6, 5, 9, 9, 4, 62, 56, 46, 38, 30, 20, 11, 68, + 86, 75, 66, 3, 0, 0, 89, 84}, + + { + + 62, 8, 76, 62, 8, 76, 107, 88, 15, 10, 8, 23, 40, 52, 56, 20, 98, 66, + 66, 70, 8, 7, 67, 3, 12, 28, 36, 22, 64, 86, 96, 90, 73, 66, 70, 8, + 73, 81, 11, 15, 67, 78, 89, 3, 76, 84, 97, 7, 71, 80, 4, 78, 81, 91, + 65, 76, 70, 84, 68, 2, 22, 0, 0, 0, 76, 88, 97, 67, 14, 64, 40, 8, + 81, 100, 78, 84, 0, 73, 85, 81, 100, 82, 95, 89, 102, 80, 85, 89, 89, 24, + 65, 73, 102, 71, 95, 84, 105, 6, 3, 4, 66, 13, 68, 69, 81, 94, 82, 82, + 82, 70, 78, 94, 88, 71, 98, 87, 73, 78, 92, 95, 0, 74, 68, 66, 3, 9, + 65, 70, 83, 95, 72, 21, 21, 96, 9, 6, 5, 4, 5, 2, 66, 64, 2, 8, + 5, 2, 1, 1, 67, 68, 5, 65, 16, 26, 25, 17, 10, 19, 12, 16, 12, 0, + 15, 13, 23, 89, 16, 29, 26, 52, 62, 36, 44, 61, 62, 40, 55, 53, 62, 62, + 40, 45, 34, 43, 57, 46, 62, 62, 62, 38, 34, 55, 40, 62, 9, 62, 62, 62, + 62, 62, 58, 55, 54, 49, 44, 39, 33, 26, 21, 3, 75, 73, 82, 19, 20, 17, + 19, 7, 8, 6, 19, 65, 70, 67, 80, 73, 109, 96, 3, 7, 10, 1, 69, 73, + 77, 80, 103, 86, 27, 14, 7, 67, 66, 79, 89, 93, 102, 68, 26, 18, 13, 12, + 2, 64, 66, 70, 79, 67, 36, 31, 25, 8, 10, 65, 73, 83, 9, 55, 49, 44, + 33, 24, 3, 69, 69, 75, 62, 111, 108, 96, 108, 106, 93, 102, 98, 90, 99, 97, + 99, 88, 90, 89, 99, 94, 84, 78, 79, 78, 75, 71, 67, 67, 1, 69, 69, 73, + 64, 1, 66, 1, 1, 1, 2, 5, 1, 0, 1, 6, 65, 69, 73, 69, 65, 69, + 15, 16, 16, 19, 14, 16, 14, 14, 14, 4, 11, 10, 68, 1, 56, 57, 54, 58, + 58, 62, 62, 62, 62, 62, 62, 62, 62, 52, 32, 62, 62, 62, 62, 62, 62, 45, + 39, 37, 29, 18, 11, 0, 72, 62, 62, 62, 62, 62, 62, 60, 59, 49, 46, 40, + 37, 22, 14, 2, 7, 7, 1, 26, 26, 24, 21, 18, 19, 11, 9, 7, 1, 78, + 68, 80, 90, 14, 0, 68, 81, 76, 70, 76, 68, 3, 71, 76, 64, 3, 3, 3, + 7, 4, 3, 62, 54, 44, 35, 27, 16, 7, 72, 89, 67, 39, 28, 23, 13, 13, + 5, 1, 64, 77, 78, 73, 67, 73, 0, 5, 67, 71, 1, 5, 6, 5, 10, 9, + 4, 62, 54, 44, 35, 27, 16, 7, 72, 89, 75, 66, 3, 1, 1, 88, 81}, + + { + + 62, 8, 76, 62, 8, 76, 106, 86, 15, 10, 7, 22, 38, 51, 56, 19, 96, 66, + 66, 69, 8, 7, 67, 2, 10, 27, 33, 19, 66, 88, 93, 89, 73, 66, 69, 8, + 73, 80, 11, 14, 67, 78, 88, 3, 76, 84, 97, 7, 71, 80, 4, 78, 80, 91, + 65, 76, 70, 84, 68, 2, 22, 0, 0, 0, 76, 89, 97, 66, 13, 64, 39, 7, + 81, 97, 77, 83, 2, 71, 84, 79, 98, 81, 94, 88, 100, 80, 85, 88, 89, 24, + 65, 72, 100, 71, 95, 84, 103, 6, 2, 3, 67, 13, 68, 68, 81, 93, 81, 82, + 81, 70, 78, 92, 87, 70, 97, 86, 68, 74, 89, 92, 0, 74, 68, 66, 3, 9, + 64, 70, 83, 93, 72, 19, 19, 94, 9, 6, 5, 4, 5, 2, 65, 0, 2, 8, + 5, 2, 1, 2, 68, 68, 5, 66, 16, 25, 25, 17, 10, 19, 12, 16, 11, 0, + 14, 12, 23, 89, 15, 28, 25, 50, 62, 35, 42, 59, 60, 38, 52, 51, 62, 62, + 36, 43, 32, 41, 54, 43, 58, 62, 62, 35, 32, 51, 38, 62, 7, 62, 62, 62, + 62, 62, 56, 53, 52, 47, 42, 37, 31, 24, 19, 1, 75, 73, 83, 18, 19, 16, + 18, 6, 6, 5, 17, 66, 71, 68, 81, 74, 108, 94, 4, 8, 10, 1, 68, 73, + 77, 79, 101, 85, 27, 14, 8, 66, 65, 78, 88, 92, 101, 67, 27, 19, 14, 12, + 3, 0, 66, 70, 79, 67, 36, 31, 24, 8, 10, 65, 73, 83, 9, 54, 48, 43, + 32, 24, 3, 69, 69, 75, 62, 110, 106, 95, 106, 105, 92, 100, 96, 89, 97, 95, + 97, 87, 89, 89, 98, 93, 84, 78, 79, 78, 75, 71, 67, 67, 1, 69, 69, 74, + 0, 1, 65, 1, 1, 1, 2, 5, 1, 0, 1, 6, 65, 69, 73, 69, 65, 70, + 14, 16, 15, 18, 13, 15, 14, 13, 13, 4, 11, 9, 68, 0, 55, 56, 53, 56, + 56, 62, 61, 62, 62, 62, 62, 62, 61, 50, 30, 62, 62, 62, 62, 62, 59, 43, + 36, 34, 27, 16, 9, 64, 73, 62, 62, 62, 62, 62, 62, 57, 56, 47, 43, 38, + 34, 20, 12, 1, 6, 6, 64, 25, 24, 22, 19, 16, 17, 9, 8, 6, 0, 79, + 69, 81, 91, 13, 64, 69, 81, 76, 69, 75, 67, 4, 71, 75, 64, 3, 3, 3, + 8, 4, 3, 61, 52, 41, 32, 24, 12, 2, 76, 92, 67, 39, 28, 23, 13, 13, + 5, 1, 64, 76, 78, 73, 66, 73, 0, 6, 67, 71, 1, 5, 6, 5, 10, 9, + 4, 61, 52, 41, 32, 24, 12, 2, 76, 92, 75, 66, 3, 1, 1, 86, 79}, + + { + + 62, 8, 76, 62, 8, 76, 104, 85, 15, 10, 7, 21, 36, 50, 56, 19, 94, 66, + 66, 68, 7, 6, 67, 1, 8, 26, 31, 15, 69, 90, 90, 88, 72, 66, 68, 7, + 73, 79, 11, 14, 67, 77, 87, 3, 76, 84, 97, 7, 71, 80, 4, 78, 80, 90, + 65, 76, 70, 84, 67, 2, 22, 0, 0, 0, 75, 89, 97, 66, 12, 64, 38, 7, + 81, 95, 76, 82, 4, 69, 82, 78, 97, 79, 92, 87, 97, 80, 84, 88, 88, 24, + 65, 71, 98, 71, 94, 83, 101, 6, 2, 3, 67, 13, 68, 68, 80, 92, 80, 81, + 80, 69, 77, 91, 85, 69, 95, 85, 0, 70, 86, 89, 1, 73, 67, 65, 3, 9, + 64, 70, 82, 91, 72, 17, 17, 91, 9, 7, 5, 4, 5, 3, 0, 1, 3, 7, + 5, 2, 2, 2, 69, 67, 5, 66, 15, 24, 24, 16, 9, 19, 12, 16, 10, 1, + 13, 12, 22, 89, 15, 27, 24, 48, 62, 34, 41, 57, 58, 36, 50, 49, 62, 62, + 33, 41, 30, 38, 51, 41, 55, 62, 62, 33, 30, 48, 36, 62, 6, 62, 62, 62, + 61, 60, 54, 51, 50, 45, 39, 35, 29, 23, 17, 64, 75, 73, 83, 17, 18, 15, + 16, 5, 5, 4, 16, 67, 71, 69, 81, 75, 107, 93, 5, 8, 10, 2, 68, 72, + 76, 78, 99, 83, 27, 15, 8, 66, 64, 77, 86, 90, 99, 67, 28, 20, 15, 13, + 4, 0, 65, 69, 78, 67, 37, 31, 24, 7, 10, 65, 73, 82, 9, 54, 47, 42, + 31, 24, 3, 68, 69, 74, 62, 108, 105, 93, 104, 103, 91, 99, 95, 88, 95, 94, + 95, 86, 88, 88, 97, 91, 84, 78, 78, 77, 74, 71, 68, 67, 0, 69, 69, 74, + 0, 1, 64, 1, 1, 1, 2, 5, 1, 0, 1, 5, 65, 69, 73, 69, 64, 71, + 13, 16, 14, 18, 13, 14, 14, 13, 12, 4, 11, 8, 68, 64, 54, 55, 52, 54, + 54, 62, 59, 61, 62, 59, 62, 62, 58, 47, 28, 62, 62, 62, 62, 59, 56, 40, + 34, 32, 25, 15, 8, 64, 73, 62, 62, 62, 62, 59, 59, 55, 53, 45, 41, 36, + 31, 18, 10, 64, 6, 6, 66, 24, 23, 21, 17, 14, 15, 8, 7, 4, 64, 79, + 71, 82, 91, 11, 66, 71, 80, 76, 68, 75, 66, 5, 70, 74, 0, 4, 3, 4, + 9, 4, 3, 60, 50, 38, 29, 20, 8, 65, 80, 95, 66, 39, 28, 23, 13, 14, + 5, 2, 0, 76, 78, 73, 66, 73, 1, 7, 67, 71, 2, 6, 6, 6, 11, 9, + 4, 60, 50, 38, 29, 20, 8, 65, 80, 95, 75, 66, 3, 1, 1, 85, 77}, + + { + + 61, 8, 76, 61, 8, 76, 102, 83, 16, 10, 6, 19, 34, 49, 56, 19, 92, 66, + 65, 67, 7, 6, 67, 1, 7, 25, 28, 12, 71, 91, 87, 87, 72, 65, 67, 7, + 74, 78, 11, 13, 67, 77, 87, 3, 76, 84, 97, 7, 71, 79, 4, 78, 79, 90, + 65, 76, 70, 84, 67, 2, 22, 0, 0, 0, 75, 89, 97, 65, 11, 64, 37, 6, + 81, 92, 75, 81, 5, 67, 81, 76, 95, 78, 91, 86, 95, 80, 84, 87, 88, 24, + 65, 70, 96, 71, 93, 83, 99, 6, 1, 2, 68, 13, 68, 67, 80, 91, 79, 81, + 79, 69, 76, 89, 84, 69, 94, 84, 5, 65, 83, 86, 1, 73, 67, 65, 3, 9, + 0, 69, 82, 89, 72, 15, 15, 88, 9, 7, 5, 4, 5, 3, 1, 2, 3, 7, + 5, 2, 2, 3, 70, 67, 5, 67, 15, 23, 24, 16, 9, 19, 12, 16, 9, 1, + 12, 11, 22, 89, 15, 27, 24, 46, 61, 33, 39, 55, 55, 34, 47, 47, 62, 62, + 29, 39, 28, 36, 48, 38, 52, 61, 62, 30, 28, 44, 34, 62, 4, 60, 62, 60, + 58, 57, 52, 49, 48, 43, 37, 33, 27, 21, 16, 66, 75, 73, 84, 16, 17, 14, + 15, 4, 4, 3, 14, 68, 72, 70, 82, 75, 107, 91, 6, 9, 10, 2, 67, 72, + 76, 78, 97, 82, 27, 15, 9, 65, 0, 76, 85, 89, 97, 66, 28, 20, 15, 13, + 5, 1, 65, 69, 78, 67, 37, 31, 23, 7, 10, 65, 73, 82, 9, 53, 47, 41, + 30, 24, 3, 68, 69, 74, 62, 107, 103, 92, 103, 102, 90, 97, 93, 87, 94, 92, + 93, 86, 87, 88, 96, 89, 83, 78, 78, 77, 74, 71, 68, 67, 0, 69, 69, 75, + 1, 2, 0, 1, 1, 2, 2, 5, 1, 0, 1, 5, 65, 69, 73, 69, 64, 72, + 12, 16, 13, 17, 12, 14, 14, 12, 11, 4, 11, 7, 68, 65, 53, 54, 51, 53, + 52, 60, 57, 59, 59, 57, 62, 60, 55, 45, 26, 62, 62, 62, 62, 55, 53, 38, + 32, 29, 23, 13, 6, 65, 74, 62, 62, 62, 60, 56, 57, 52, 50, 42, 38, 33, + 28, 16, 8, 65, 5, 5, 67, 22, 21, 19, 15, 13, 13, 6, 6, 3, 65, 80, + 72, 83, 92, 10, 67, 72, 80, 75, 68, 74, 65, 6, 70, 74, 0, 4, 3, 4, + 10, 4, 3, 59, 48, 36, 26, 17, 4, 69, 84, 98, 66, 39, 28, 23, 13, 14, + 6, 2, 0, 75, 78, 73, 65, 72, 2, 8, 67, 70, 2, 6, 6, 6, 11, 9, + 4, 59, 48, 36, 26, 17, 4, 69, 84, 98, 75, 66, 3, 2, 2, 83, 75}, + + { + + 60, 8, 76, 60, 8, 76, 100, 82, 16, 10, 6, 18, 32, 48, 56, 19, 90, 66, + 65, 66, 7, 5, 67, 0, 5, 24, 26, 8, 74, 93, 84, 86, 72, 65, 66, 7, + 74, 77, 11, 13, 67, 77, 86, 3, 76, 84, 97, 7, 71, 79, 4, 78, 79, 90, + 65, 76, 70, 84, 67, 2, 22, 0, 0, 0, 74, 89, 97, 65, 10, 64, 36, 6, + 81, 90, 74, 80, 7, 65, 79, 75, 93, 76, 90, 85, 93, 80, 84, 87, 88, 24, + 65, 69, 94, 71, 92, 82, 97, 6, 1, 2, 68, 13, 68, 67, 79, 90, 78, 80, + 78, 68, 75, 88, 82, 68, 93, 83, 10, 2, 80, 83, 1, 73, 67, 64, 3, 9, + 0, 69, 81, 87, 72, 13, 13, 85, 9, 7, 5, 4, 5, 4, 3, 3, 3, 7, + 5, 2, 3, 4, 71, 67, 5, 67, 14, 22, 23, 16, 8, 19, 12, 16, 8, 1, + 11, 10, 21, 89, 15, 26, 23, 44, 58, 32, 37, 53, 53, 32, 45, 45, 62, 62, + 26, 37, 26, 34, 45, 36, 49, 57, 62, 27, 26, 41, 32, 62, 2, 58, 62, 58, + 56, 55, 50, 47, 46, 41, 35, 31, 25, 19, 14, 68, 75, 73, 84, 15, 16, 13, + 14, 3, 3, 2, 12, 69, 72, 71, 83, 76, 106, 90, 7, 9, 10, 2, 66, 71, + 75, 77, 95, 81, 27, 15, 9, 64, 1, 75, 83, 87, 95, 65, 29, 21, 16, 14, + 6, 1, 65, 69, 77, 67, 38, 31, 22, 7, 10, 65, 73, 81, 9, 53, 46, 40, + 29, 24, 3, 68, 69, 74, 62, 106, 102, 91, 101, 100, 89, 95, 91, 86, 92, 90, + 91, 85, 86, 87, 95, 87, 83, 78, 78, 76, 73, 71, 68, 67, 64, 69, 69, 75, + 1, 2, 1, 1, 1, 2, 2, 5, 1, 0, 1, 4, 65, 69, 73, 69, 0, 73, + 11, 16, 12, 16, 11, 13, 14, 11, 10, 4, 11, 6, 68, 66, 52, 53, 50, 51, + 50, 58, 55, 57, 57, 54, 61, 57, 52, 42, 24, 62, 62, 62, 62, 52, 50, 35, + 30, 27, 21, 11, 5, 66, 75, 62, 62, 62, 58, 53, 54, 50, 47, 40, 36, 31, + 25, 14, 6, 67, 4, 4, 69, 21, 20, 17, 13, 11, 11, 4, 5, 1, 66, 81, + 73, 84, 93, 8, 68, 73, 80, 75, 67, 74, 64, 7, 70, 73, 1, 5, 3, 5, + 11, 4, 3, 58, 46, 33, 23, 14, 0, 73, 88, 101, 66, 39, 28, 23, 13, 15, + 6, 2, 0, 75, 78, 73, 65, 72, 3, 9, 67, 70, 2, 6, 6, 6, 12, 9, + 4, 58, 46, 33, 23, 14, 0, 73, 88, 101, 75, 66, 3, 2, 2, 82, 73}, + + { + + 58, 7, 77, 58, 7, 77, 99, 81, 16, 10, 5, 16, 29, 47, 56, 18, 89, 67, + 65, 66, 6, 4, 67, 64, 3, 22, 23, 4, 77, 95, 82, 86, 72, 65, 66, 6, + 75, 77, 11, 12, 67, 77, 86, 2, 77, 84, 97, 6, 71, 79, 4, 78, 79, 90, + 65, 76, 71, 84, 67, 2, 22, 0, 0, 0, 74, 90, 97, 65, 9, 65, 35, 5, + 82, 88, 73, 79, 8, 0, 78, 74, 92, 75, 89, 84, 91, 80, 84, 87, 88, 24, + 65, 69, 92, 71, 92, 82, 96, 5, 0, 1, 69, 13, 68, 67, 79, 90, 78, 80, + 78, 68, 75, 87, 81, 68, 92, 82, 14, 6, 77, 81, 1, 73, 67, 64, 2, 9, + 0, 69, 81, 85, 72, 11, 11, 83, 9, 7, 5, 4, 5, 4, 4, 3, 3, 6, + 4, 2, 3, 4, 73, 67, 5, 68, 13, 20, 22, 15, 7, 19, 12, 15, 6, 1, + 10, 9, 20, 89, 14, 25, 22, 41, 54, 30, 35, 50, 50, 29, 42, 43, 55, 62, + 22, 34, 24, 31, 41, 33, 45, 52, 59, 24, 24, 37, 30, 62, 0, 55, 59, 55, + 53, 52, 47, 44, 43, 39, 32, 28, 23, 17, 12, 70, 75, 74, 85, 14, 14, 11, + 12, 1, 1, 0, 10, 70, 73, 72, 84, 77, 106, 89, 7, 9, 10, 2, 66, 71, + 75, 77, 93, 80, 27, 15, 9, 64, 1, 74, 82, 86, 94, 65, 29, 21, 16, 14, + 7, 1, 65, 69, 77, 68, 38, 30, 21, 6, 10, 65, 73, 81, 8, 52, 45, 38, + 28, 23, 3, 68, 69, 74, 62, 105, 101, 90, 100, 99, 88, 94, 90, 85, 91, 89, + 89, 85, 86, 87, 94, 86, 83, 78, 78, 76, 73, 71, 69, 68, 65, 69, 70, 76, + 1, 2, 2, 1, 0, 2, 2, 4, 1, 0, 1, 3, 65, 69, 73, 69, 0, 74, + 10, 16, 10, 15, 10, 12, 13, 10, 9, 4, 10, 4, 69, 68, 50, 51, 49, 49, + 48, 55, 52, 54, 54, 51, 58, 54, 48, 39, 22, 62, 62, 61, 60, 48, 46, 32, + 27, 24, 19, 9, 3, 67, 76, 59, 60, 60, 55, 50, 51, 47, 43, 37, 33, 28, + 22, 12, 4, 69, 3, 3, 71, 19, 18, 15, 10, 9, 9, 2, 3, 64, 68, 82, + 75, 85, 94, 6, 70, 75, 80, 75, 67, 74, 0, 8, 70, 73, 1, 5, 3, 5, + 11, 4, 2, 56, 44, 30, 19, 10, 67, 78, 93, 104, 66, 39, 28, 23, 13, 15, + 6, 2, 0, 75, 78, 73, 65, 72, 3, 9, 67, 70, 2, 6, 6, 6, 12, 8, + 3, 56, 44, 30, 19, 10, 67, 78, 93, 104, 75, 67, 2, 2, 2, 81, 71}, + + { + + 57, 7, 77, 57, 7, 77, 97, 79, 17, 11, 5, 15, 27, 46, 57, 18, 87, 67, 64, + 65, 6, 4, 66, 64, 2, 21, 21, 1, 79, 96, 79, 85, 71, 64, 65, 6, 75, 76, + 11, 12, 66, 76, 85, 2, 77, 83, 96, 6, 70, 78, 4, 77, 78, 89, 64, 75, 71, + 83, 66, 2, 22, 0, 0, 0, 73, 90, 97, 64, 9, 65, 35, 5, 82, 85, 71, 77, + 10, 3, 76, 72, 90, 73, 87, 82, 88, 80, 83, 86, 87, 24, 65, 68, 89, 70, 91, + 81, 94, 5, 0, 1, 69, 14, 68, 66, 78, 89, 77, 79, 77, 67, 74, 85, 79, 67, + 90, 80, 19, 11, 73, 78, 2, 72, 66, 0, 2, 10, 1, 68, 80, 82, 71, 9, 10, + 80, 9, 8, 5, 5, 6, 5, 6, 4, 4, 6, 4, 2, 4, 5, 74, 66, 5, 68, + 13, 19, 22, 15, 7, 19, 12, 15, 5, 2, 10, 9, 20, 89, 14, 25, 22, 39, 51, + 29, 34, 48, 48, 27, 40, 41, 49, 62, 19, 32, 23, 29, 38, 31, 42, 48, 55, 22, + 22, 34, 28, 62, 64, 53, 57, 53, 51, 50, 45, 42, 41, 37, 30, 26, 22, 16, 11, + 71, 75, 74, 85, 14, 13, 10, 11, 0, 0, 64, 9, 71, 73, 73, 84, 77, 105, 87, + 8, 10, 10, 3, 65, 70, 74, 76, 90, 78, 28, 16, 10, 0, 2, 72, 80, 84, 92, + 64, 30, 22, 17, 15, 8, 2, 64, 68, 76, 68, 39, 30, 21, 6, 11, 64, 73, 80, + 8, 52, 45, 37, 27, 23, 4, 67, 68, 73, 62, 103, 99, 88, 98, 97, 86, 92, 88, + 83, 89, 87, 86, 84, 85, 86, 92, 84, 82, 77, 77, 75, 72, 70, 69, 68, 65, 69, + 70, 76, 2, 3, 3, 2, 0, 3, 3, 4, 2, 1, 1, 3, 64, 68, 72, 68, 1, + 74, 9, 16, 9, 15, 10, 12, 13, 10, 9, 4, 10, 3, 69, 69, 49, 50, 49, 48, + 47, 53, 50, 52, 52, 49, 56, 52, 45, 37, 20, 61, 60, 57, 56, 45, 43, 30, 25, + 22, 18, 8, 2, 67, 76, 57, 58, 58, 53, 48, 49, 45, 40, 35, 31, 26, 20, 11, + 3, 70, 3, 3, 72, 18, 17, 14, 8, 8, 8, 1, 2, 65, 69, 82, 76, 85, 94, + 5, 71, 76, 79, 74, 66, 73, 2, 10, 69, 72, 2, 6, 4, 6, 12, 4, 2, 55, + 42, 28, 16, 7, 71, 82, 97, 106, 65, 39, 29, 24, 14, 16, 7, 3, 1, 74, 77, + 72, 64, 71, 4, 10, 66, 69, 3, 7, 6, 7, 13, 8, 3, 55, 42, 28, 16, 7, + 71, 82, 97, 106, 75, 67, 2, 3, 3, 79, 68}, + + { + + 56, 7, 77, 56, 7, 77, 95, 78, 17, 11, 5, 14, 25, 45, 57, 18, 85, 67, + 64, 64, 6, 3, 66, 65, 0, 20, 18, 66, 82, 98, 76, 84, 71, 64, 64, 6, + 75, 75, 11, 11, 66, 76, 84, 2, 77, 83, 96, 6, 70, 78, 4, 77, 78, 89, + 64, 75, 71, 83, 66, 2, 22, 0, 0, 0, 72, 90, 97, 64, 8, 65, 34, 5, + 82, 83, 70, 76, 12, 5, 75, 71, 88, 72, 86, 81, 86, 80, 83, 86, 87, 24, + 65, 67, 87, 70, 90, 80, 92, 5, 0, 1, 70, 14, 68, 66, 77, 88, 76, 78, + 76, 67, 73, 84, 78, 66, 89, 79, 24, 15, 70, 75, 2, 72, 66, 0, 2, 10, + 1, 68, 80, 80, 71, 7, 8, 77, 9, 8, 5, 5, 6, 5, 8, 5, 4, 6, + 4, 2, 5, 6, 75, 66, 5, 68, 12, 18, 21, 15, 6, 19, 12, 15, 4, 2, + 9, 8, 19, 89, 14, 24, 21, 37, 48, 28, 32, 46, 46, 25, 38, 39, 43, 62, + 15, 30, 21, 27, 35, 29, 39, 44, 51, 19, 20, 31, 26, 62, 66, 51, 55, 51, + 49, 48, 43, 40, 39, 35, 28, 24, 20, 14, 9, 73, 75, 74, 86, 13, 12, 9, + 10, 64, 64, 65, 7, 72, 73, 74, 85, 78, 104, 86, 9, 10, 10, 3, 64, 69, + 73, 75, 88, 77, 28, 16, 10, 1, 3, 71, 78, 82, 90, 0, 31, 23, 18, 16, + 9, 2, 64, 68, 75, 68, 40, 30, 20, 6, 11, 64, 73, 80, 8, 52, 44, 36, + 26, 23, 4, 67, 68, 73, 62, 102, 98, 87, 96, 95, 85, 90, 86, 82, 87, 85, + 84, 83, 84, 86, 91, 82, 82, 77, 77, 74, 72, 70, 69, 68, 66, 69, 70, 76, + 2, 3, 4, 2, 0, 3, 3, 4, 2, 1, 1, 2, 64, 68, 72, 68, 2, 75, + 8, 16, 8, 14, 9, 11, 13, 9, 8, 4, 10, 2, 69, 70, 48, 49, 48, 46, + 45, 51, 48, 50, 50, 46, 53, 49, 42, 34, 18, 57, 56, 53, 51, 42, 40, 27, + 23, 19, 16, 6, 1, 68, 77, 55, 56, 55, 51, 45, 46, 42, 37, 33, 28, 24, + 17, 9, 1, 72, 2, 2, 74, 17, 16, 12, 6, 6, 6, 64, 1, 67, 70, 83, + 77, 86, 95, 3, 72, 77, 79, 74, 65, 73, 3, 11, 69, 71, 3, 7, 4, 6, + 13, 4, 2, 54, 40, 25, 13, 4, 75, 86, 101, 109, 65, 39, 29, 24, 14, 17, + 7, 3, 1, 74, 77, 72, 64, 71, 5, 11, 66, 69, 3, 7, 6, 7, 14, 8, + 3, 54, 40, 25, 13, 4, 75, 86, 101, 109, 75, 67, 2, 3, 3, 78, 66}, + + { + + 55, 7, 77, 55, 7, 77, 93, 76, 18, 11, 4, 12, 23, 44, 57, 18, 83, 67, + 0, 0, 6, 3, 66, 65, 64, 19, 16, 69, 84, 99, 73, 83, 71, 0, 0, 6, + 76, 74, 11, 11, 66, 76, 84, 2, 77, 83, 96, 6, 70, 77, 4, 77, 77, 89, + 64, 75, 71, 83, 66, 2, 22, 0, 0, 0, 72, 90, 97, 0, 7, 65, 33, 4, + 82, 80, 69, 75, 13, 7, 73, 69, 86, 70, 85, 80, 84, 80, 83, 85, 87, 24, + 65, 66, 85, 70, 89, 80, 90, 5, 64, 0, 70, 14, 68, 65, 77, 87, 75, 78, + 75, 66, 72, 82, 76, 66, 88, 78, 29, 20, 67, 72, 2, 72, 66, 1, 2, 10, + 2, 67, 79, 78, 71, 5, 6, 74, 9, 8, 5, 5, 6, 6, 9, 6, 4, 6, + 4, 2, 5, 7, 76, 66, 5, 69, 12, 17, 21, 15, 6, 19, 12, 15, 3, 2, + 8, 7, 19, 89, 14, 24, 21, 35, 45, 27, 30, 44, 43, 23, 35, 37, 36, 62, + 12, 28, 19, 25, 32, 26, 36, 40, 47, 16, 18, 27, 24, 62, 68, 49, 53, 49, + 46, 45, 41, 38, 37, 33, 26, 22, 18, 12, 8, 75, 75, 74, 86, 12, 11, 8, + 9, 65, 65, 66, 5, 73, 74, 75, 86, 78, 104, 84, 10, 11, 10, 3, 0, 69, + 73, 75, 86, 76, 28, 16, 11, 2, 4, 70, 77, 81, 88, 1, 31, 23, 18, 16, + 10, 3, 64, 68, 75, 68, 40, 30, 19, 6, 11, 64, 73, 79, 8, 51, 44, 35, + 25, 23, 4, 67, 68, 73, 62, 101, 96, 86, 95, 94, 84, 88, 84, 81, 86, 83, + 82, 83, 83, 85, 90, 80, 81, 77, 77, 74, 71, 70, 69, 68, 66, 69, 70, 77, + 3, 4, 5, 2, 0, 4, 3, 4, 2, 1, 1, 2, 64, 68, 72, 68, 2, 76, + 7, 16, 7, 13, 8, 11, 13, 8, 7, 4, 10, 1, 69, 71, 47, 48, 47, 45, + 43, 49, 46, 48, 47, 44, 50, 46, 39, 32, 16, 53, 52, 49, 46, 38, 37, 25, + 21, 17, 14, 4, 64, 69, 78, 53, 53, 53, 48, 42, 44, 40, 34, 30, 26, 21, + 14, 7, 64, 73, 1, 1, 75, 15, 14, 10, 4, 5, 4, 66, 0, 68, 71, 84, + 78, 87, 96, 2, 73, 78, 79, 73, 65, 72, 4, 12, 69, 71, 3, 7, 4, 7, + 14, 4, 2, 53, 38, 23, 10, 1, 79, 90, 105, 112, 65, 39, 29, 24, 14, 17, + 8, 3, 1, 73, 77, 72, 0, 70, 6, 12, 66, 68, 3, 7, 6, 7, 14, 8, + 3, 53, 38, 23, 10, 1, 79, 90, 105, 112, 75, 67, 2, 4, 4, 76, 64}, + + { + + 53, 7, 77, 53, 7, 77, 92, 75, 18, 11, 4, 11, 21, 43, 57, 17, 81, 67, + 0, 1, 5, 2, 66, 66, 66, 18, 13, 73, 87, 101, 70, 82, 71, 0, 1, 5, + 76, 73, 11, 10, 66, 75, 83, 2, 77, 83, 96, 6, 70, 77, 4, 77, 77, 88, + 64, 75, 71, 83, 65, 2, 22, 0, 0, 0, 71, 91, 97, 0, 6, 65, 32, 4, + 82, 78, 68, 74, 15, 9, 72, 68, 85, 69, 83, 79, 82, 80, 82, 85, 87, 24, + 65, 65, 83, 70, 89, 79, 88, 5, 64, 0, 71, 14, 68, 65, 76, 86, 74, 77, + 74, 66, 72, 81, 75, 65, 87, 77, 34, 24, 64, 69, 2, 71, 66, 1, 2, 10, + 2, 67, 79, 76, 71, 3, 4, 72, 9, 9, 5, 5, 6, 6, 11, 7, 5, 5, + 4, 2, 6, 7, 77, 66, 5, 69, 11, 16, 20, 14, 5, 19, 12, 15, 2, 2, + 7, 6, 18, 89, 13, 23, 20, 33, 41, 26, 28, 42, 41, 21, 33, 35, 30, 62, + 8, 26, 17, 22, 29, 24, 32, 35, 43, 13, 16, 24, 22, 62, 69, 47, 51, 46, + 44, 43, 39, 36, 35, 31, 23, 20, 16, 10, 6, 77, 75, 74, 87, 11, 10, 7, + 7, 66, 67, 67, 4, 74, 74, 76, 86, 79, 103, 83, 11, 11, 10, 4, 0, 68, + 72, 74, 84, 74, 28, 17, 11, 2, 5, 69, 75, 79, 87, 1, 32, 24, 19, 17, + 11, 3, 64, 67, 74, 68, 41, 30, 19, 5, 11, 64, 73, 79, 8, 51, 43, 34, + 24, 23, 4, 66, 68, 73, 62, 99, 95, 85, 93, 92, 83, 87, 83, 80, 84, 82, + 80, 82, 82, 85, 89, 79, 81, 77, 77, 73, 71, 70, 70, 68, 67, 69, 70, 77, + 3, 4, 6, 2, 0, 4, 3, 4, 2, 1, 1, 1, 64, 68, 72, 68, 3, 77, + 6, 16, 6, 13, 7, 10, 13, 7, 6, 4, 10, 0, 69, 72, 46, 47, 46, 43, + 41, 47, 44, 45, 45, 41, 47, 43, 36, 29, 14, 48, 48, 45, 41, 35, 33, 22, + 18, 14, 12, 2, 65, 70, 78, 50, 51, 50, 46, 39, 41, 37, 31, 28, 23, 19, + 11, 5, 66, 75, 0, 1, 77, 14, 13, 9, 2, 3, 2, 67, 64, 70, 72, 85, + 80, 88, 96, 0, 75, 80, 78, 73, 64, 72, 5, 13, 69, 70, 4, 8, 4, 7, + 15, 4, 2, 52, 36, 20, 7, 66, 83, 95, 109, 115, 64, 39, 29, 24, 14, 18, + 8, 3, 1, 73, 77, 72, 0, 70, 6, 13, 66, 68, 4, 8, 6, 7, 15, 8, + 3, 52, 36, 20, 7, 66, 83, 95, 109, 115, 75, 67, 2, 4, 4, 75, 1}, + + { + + 52, 7, 77, 52, 7, 77, 90, 73, 18, 11, 3, 10, 19, 42, 57, 17, 79, 67, + 0, 2, 5, 2, 66, 67, 68, 17, 11, 76, 89, 103, 67, 81, 70, 0, 2, 5, + 76, 72, 11, 10, 66, 75, 82, 2, 77, 83, 96, 6, 70, 77, 4, 77, 76, 88, + 64, 75, 71, 83, 65, 2, 22, 0, 0, 0, 71, 91, 97, 1, 5, 65, 31, 3, + 82, 75, 67, 73, 17, 11, 70, 66, 83, 67, 82, 78, 79, 80, 82, 84, 86, 24, + 65, 64, 81, 70, 88, 79, 86, 5, 65, 64, 71, 14, 68, 64, 76, 85, 73, 77, + 73, 65, 71, 79, 73, 64, 85, 76, 39, 28, 2, 66, 3, 71, 65, 2, 2, 10, + 3, 67, 78, 74, 71, 1, 2, 69, 9, 9, 5, 5, 6, 7, 12, 8, 5, 5, + 4, 2, 6, 8, 78, 65, 5, 70, 11, 15, 20, 14, 5, 19, 12, 15, 1, 3, + 6, 6, 18, 89, 13, 22, 19, 31, 38, 25, 27, 40, 39, 19, 30, 33, 24, 62, + 5, 24, 15, 20, 26, 21, 29, 31, 39, 11, 14, 20, 20, 62, 71, 45, 49, 44, + 42, 41, 37, 34, 33, 29, 21, 18, 14, 9, 4, 79, 75, 74, 87, 10, 9, 6, + 6, 67, 68, 68, 2, 75, 75, 77, 87, 80, 102, 81, 12, 12, 10, 4, 1, 68, + 72, 73, 82, 73, 28, 17, 12, 3, 6, 68, 74, 78, 85, 2, 33, 25, 20, 17, + 12, 4, 0, 67, 74, 68, 41, 30, 18, 5, 11, 64, 73, 78, 8, 50, 42, 33, + 23, 23, 4, 66, 68, 72, 62, 98, 93, 83, 91, 91, 82, 85, 81, 79, 82, 80, + 78, 81, 81, 84, 88, 77, 81, 77, 76, 73, 70, 70, 70, 68, 67, 69, 70, 78, + 4, 4, 7, 2, 0, 4, 3, 4, 2, 1, 1, 1, 64, 68, 72, 68, 3, 78, + 5, 16, 5, 12, 7, 9, 13, 7, 5, 4, 10, 64, 69, 73, 45, 46, 45, 41, + 39, 45, 42, 43, 42, 38, 44, 40, 33, 27, 12, 44, 44, 41, 36, 32, 30, 20, + 16, 12, 10, 1, 67, 70, 79, 48, 48, 48, 44, 36, 38, 35, 28, 26, 21, 17, + 8, 3, 68, 76, 0, 0, 79, 13, 11, 7, 0, 1, 0, 69, 65, 71, 73, 85, + 81, 89, 97, 64, 76, 81, 78, 73, 0, 71, 6, 14, 68, 69, 4, 8, 4, 8, + 16, 4, 2, 51, 34, 17, 4, 69, 87, 99, 113, 118, 64, 39, 29, 24, 14, 18, + 8, 4, 2, 72, 77, 72, 1, 70, 7, 14, 66, 68, 4, 8, 6, 8, 15, 8, + 3, 51, 34, 17, 4, 69, 87, 99, 113, 118, 75, 67, 2, 4, 4, 73, 3}, + + { + + 51, 7, 78, 51, 7, 78, 88, 72, 19, 11, 3, 8, 17, 41, 57, 17, 78, 67, + 1, 2, 5, 1, 65, 67, 69, 15, 8, 80, 92, 104, 65, 80, 70, 1, 2, 5, + 77, 72, 11, 9, 66, 75, 82, 2, 77, 82, 95, 6, 69, 76, 4, 76, 76, 88, + 64, 75, 71, 83, 65, 2, 22, 0, 0, 0, 70, 91, 97, 1, 5, 66, 31, 3, + 82, 73, 66, 72, 18, 14, 69, 65, 81, 66, 81, 77, 77, 80, 82, 84, 86, 24, + 65, 0, 78, 70, 87, 78, 84, 4, 65, 64, 72, 15, 68, 64, 75, 85, 73, 76, + 72, 65, 70, 78, 72, 64, 84, 75, 44, 33, 5, 0, 3, 71, 65, 2, 2, 10, + 3, 66, 78, 72, 70, 64, 0, 66, 9, 9, 5, 5, 7, 7, 14, 9, 5, 5, + 4, 2, 7, 9, 79, 65, 5, 70, 10, 14, 19, 14, 4, 19, 12, 15, 64, 3, + 5, 5, 17, 89, 13, 22, 19, 29, 35, 24, 25, 37, 36, 17, 28, 31, 17, 62, + 1, 22, 14, 18, 22, 19, 26, 27, 34, 8, 12, 17, 18, 62, 73, 43, 47, 42, + 39, 38, 35, 31, 31, 27, 19, 15, 12, 7, 3, 80, 75, 74, 88, 9, 8, 5, + 5, 68, 69, 69, 0, 76, 75, 78, 88, 80, 102, 80, 13, 12, 10, 4, 2, 67, + 71, 73, 80, 72, 29, 17, 12, 4, 7, 67, 72, 76, 83, 3, 33, 25, 20, 18, + 13, 4, 0, 67, 73, 68, 42, 30, 17, 5, 11, 64, 73, 78, 7, 50, 42, 32, + 22, 22, 4, 66, 68, 72, 62, 97, 92, 82, 90, 89, 81, 83, 79, 78, 81, 78, + 76, 81, 80, 84, 87, 75, 80, 77, 76, 72, 70, 70, 70, 68, 68, 69, 70, 78, + 4, 5, 8, 2, 0, 5, 4, 4, 3, 2, 1, 0, 64, 68, 72, 68, 4, 79, + 4, 16, 4, 11, 6, 9, 13, 6, 5, 4, 9, 66, 70, 74, 43, 45, 44, 40, + 37, 43, 40, 41, 40, 36, 41, 38, 30, 24, 10, 40, 40, 37, 32, 28, 27, 17, + 14, 9, 8, 64, 68, 71, 80, 46, 46, 45, 41, 33, 36, 32, 25, 23, 18, 14, + 5, 1, 69, 78, 64, 64, 80, 11, 10, 5, 65, 0, 65, 71, 66, 73, 74, 86, + 82, 90, 98, 66, 77, 82, 78, 72, 0, 71, 7, 15, 68, 69, 5, 9, 4, 8, + 17, 4, 2, 50, 32, 15, 1, 72, 91, 103, 117, 121, 64, 39, 29, 24, 14, 19, + 9, 4, 2, 72, 76, 71, 1, 69, 8, 15, 65, 67, 4, 8, 6, 8, 16, 8, + 3, 50, 32, 15, 1, 72, 91, 103, 117, 121, 75, 67, 2, 5, 5, 72, 6}, + + { + + 50, 7, 78, 50, 7, 78, 86, 70, 19, 11, 2, 7, 15, 40, 57, 17, 76, 67, + 1, 3, 4, 1, 65, 68, 71, 14, 6, 83, 94, 106, 1, 79, 70, 1, 3, 4, + 77, 71, 11, 9, 66, 74, 81, 2, 77, 82, 95, 6, 69, 76, 4, 76, 75, 87, + 64, 75, 71, 83, 64, 2, 22, 0, 0, 0, 70, 91, 97, 2, 4, 66, 30, 2, + 82, 70, 65, 71, 20, 16, 67, 0, 80, 64, 79, 76, 75, 80, 81, 83, 86, 24, + 65, 1, 76, 70, 86, 78, 82, 4, 66, 65, 72, 15, 68, 0, 75, 84, 72, 76, + 71, 64, 69, 76, 70, 0, 83, 74, 49, 37, 8, 3, 3, 70, 65, 3, 2, 10, + 4, 66, 77, 70, 70, 66, 65, 0, 9, 10, 5, 5, 7, 8, 15, 10, 6, 4, + 4, 2, 7, 9, 80, 65, 5, 71, 10, 13, 19, 13, 4, 19, 12, 15, 65, 3, + 4, 4, 17, 89, 13, 21, 18, 27, 32, 23, 23, 35, 34, 15, 25, 29, 11, 62, + 65, 20, 12, 15, 19, 16, 23, 22, 30, 5, 10, 13, 16, 62, 74, 41, 45, 40, + 37, 36, 33, 29, 29, 25, 16, 13, 10, 5, 1, 82, 75, 74, 88, 8, 7, 4, + 3, 69, 70, 70, 64, 77, 76, 79, 88, 81, 101, 78, 14, 13, 10, 5, 2, 67, + 71, 72, 78, 70, 29, 18, 13, 4, 8, 66, 71, 75, 81, 3, 34, 26, 21, 18, + 14, 5, 0, 66, 73, 68, 42, 30, 17, 4, 11, 64, 73, 77, 7, 49, 41, 31, + 21, 22, 4, 65, 68, 72, 62, 95, 90, 81, 88, 88, 80, 82, 78, 77, 79, 77, + 74, 80, 79, 83, 86, 73, 80, 77, 76, 72, 69, 70, 71, 68, 68, 69, 70, 79, + 5, 5, 9, 2, 0, 5, 4, 4, 3, 2, 1, 0, 64, 68, 72, 68, 4, 80, + 3, 16, 3, 11, 5, 8, 13, 5, 4, 4, 9, 67, 70, 75, 42, 44, 43, 38, + 35, 41, 38, 38, 37, 33, 38, 35, 27, 22, 8, 35, 36, 33, 27, 25, 24, 15, + 12, 7, 6, 66, 70, 72, 80, 43, 43, 43, 39, 30, 33, 30, 22, 21, 16, 12, + 2, 64, 71, 79, 65, 64, 82, 10, 8, 4, 67, 65, 67, 72, 67, 74, 75, 87, + 84, 91, 98, 67, 79, 84, 77, 72, 1, 70, 8, 16, 68, 68, 5, 9, 4, 9, + 18, 4, 2, 49, 30, 12, 65, 76, 95, 107, 121, 124, 0, 39, 29, 24, 14, 19, + 9, 4, 2, 71, 76, 71, 2, 69, 9, 16, 65, 67, 5, 9, 6, 8, 16, 8, + 3, 49, 30, 12, 65, 76, 95, 107, 121, 124, 75, 67, 2, 5, 5, 70, 8}, + + { + + 48, 6, 78, 48, 6, 78, 85, 69, 19, 11, 2, 5, 12, 39, 57, 16, 74, 68, + 1, 4, 4, 0, 65, 69, 73, 13, 3, 87, 97, 108, 4, 78, 70, 1, 4, 4, + 78, 70, 11, 8, 66, 74, 81, 1, 78, 82, 95, 6, 69, 76, 4, 76, 75, 87, + 64, 75, 71, 83, 64, 2, 22, 0, 0, 0, 69, 92, 97, 2, 3, 66, 29, 2, + 83, 68, 64, 70, 21, 18, 66, 1, 78, 0, 78, 75, 73, 80, 81, 83, 86, 24, + 65, 2, 74, 70, 86, 77, 80, 4, 66, 65, 73, 15, 68, 0, 74, 83, 71, 75, + 71, 64, 69, 75, 69, 0, 82, 73, 53, 41, 11, 5, 3, 70, 65, 3, 1, 10, + 4, 66, 77, 68, 70, 68, 67, 2, 9, 10, 5, 5, 7, 8, 17, 10, 6, 4, + 3, 2, 8, 10, 82, 65, 5, 71, 9, 11, 18, 13, 3, 19, 12, 14, 66, 3, + 3, 3, 16, 89, 12, 20, 17, 25, 28, 21, 21, 33, 31, 12, 23, 27, 4, 62, + 69, 18, 10, 13, 16, 14, 19, 18, 26, 2, 8, 10, 14, 62, 76, 39, 42, 37, + 34, 33, 30, 27, 26, 23, 14, 11, 8, 3, 64, 84, 75, 75, 89, 7, 5, 3, + 2, 70, 72, 72, 66, 78, 76, 80, 89, 82, 101, 77, 15, 13, 10, 5, 3, 66, + 70, 72, 76, 69, 29, 18, 13, 5, 9, 65, 69, 73, 80, 4, 34, 26, 21, 19, + 15, 5, 0, 66, 72, 69, 43, 30, 16, 4, 11, 64, 73, 77, 7, 49, 40, 30, + 20, 22, 4, 65, 68, 72, 62, 94, 89, 80, 87, 86, 79, 80, 76, 76, 78, 75, + 72, 80, 79, 83, 85, 72, 80, 77, 76, 71, 69, 70, 71, 68, 69, 69, 70, 79, + 5, 5, 10, 2, 64, 5, 4, 3, 3, 2, 1, 64, 64, 68, 72, 68, 5, 81, + 2, 16, 1, 10, 4, 7, 12, 4, 3, 4, 9, 68, 70, 77, 41, 42, 42, 36, + 33, 39, 36, 36, 35, 30, 35, 32, 24, 19, 6, 31, 32, 28, 22, 21, 20, 12, + 9, 4, 4, 68, 71, 73, 81, 41, 41, 40, 36, 27, 30, 27, 19, 18, 13, 9, + 64, 66, 73, 81, 66, 65, 84, 8, 7, 2, 69, 67, 69, 74, 69, 76, 77, 88, + 85, 92, 99, 69, 80, 85, 77, 72, 1, 70, 9, 17, 68, 68, 6, 10, 4, 9, + 18, 4, 1, 48, 28, 9, 68, 79, 99, 112, 126, 126, 0, 39, 29, 24, 14, 20, + 9, 4, 2, 71, 76, 71, 2, 69, 9, 16, 65, 67, 5, 9, 6, 8, 17, 8, + 2, 48, 28, 9, 68, 79, 99, 112, 126, 126, 75, 68, 1, 5, 5, 69, 10}, + + { + + 47, 6, 78, 47, 6, 78, 83, 68, 20, 11, 2, 4, 10, 38, 58, 16, 72, 68, + 2, 5, 4, 64, 65, 69, 74, 12, 1, 91, 100, 109, 7, 77, 69, 2, 5, 4, + 78, 69, 11, 8, 65, 74, 80, 1, 78, 82, 95, 6, 69, 75, 4, 76, 75, 87, + 64, 75, 71, 82, 64, 2, 22, 0, 0, 0, 68, 92, 97, 2, 2, 66, 28, 2, + 83, 66, 1, 69, 23, 20, 64, 2, 76, 2, 77, 73, 70, 80, 81, 83, 85, 24, + 65, 3, 72, 69, 85, 76, 78, 4, 66, 65, 73, 15, 68, 0, 73, 82, 70, 74, + 70, 0, 68, 74, 67, 1, 80, 72, 58, 46, 15, 8, 4, 70, 64, 4, 1, 10, + 4, 65, 76, 65, 70, 70, 68, 5, 9, 10, 5, 5, 7, 9, 19, 11, 6, 4, + 3, 2, 9, 11, 83, 64, 5, 71, 8, 10, 17, 13, 2, 19, 12, 14, 67, 4, + 2, 3, 15, 89, 12, 20, 17, 23, 25, 20, 20, 31, 29, 10, 21, 25, 65, 62, + 72, 16, 8, 11, 13, 12, 16, 14, 22, 0, 6, 7, 12, 62, 78, 37, 40, 35, + 32, 31, 28, 25, 24, 21, 12, 9, 7, 2, 65, 86, 75, 75, 89, 7, 4, 2, + 1, 71, 73, 73, 68, 79, 76, 81, 90, 82, 100, 76, 16, 13, 10, 5, 4, 65, + 69, 71, 73, 68, 29, 18, 13, 6, 10, 64, 67, 71, 78, 5, 35, 27, 22, 20, + 16, 5, 1, 66, 71, 69, 44, 30, 15, 4, 12, 0, 73, 76, 7, 49, 40, 29, + 19, 22, 4, 65, 68, 71, 62, 93, 88, 78, 85, 84, 78, 78, 74, 75, 76, 73, + 70, 79, 78, 82, 84, 70, 79, 76, 75, 70, 68, 70, 71, 68, 70, 69, 70, 79, + 5, 6, 11, 3, 64, 6, 4, 3, 3, 2, 1, 65, 64, 67, 71, 68, 6, 81, + 1, 16, 0, 9, 4, 7, 12, 4, 2, 4, 9, 69, 70, 78, 40, 41, 42, 35, + 31, 37, 34, 34, 33, 28, 32, 29, 21, 16, 4, 27, 28, 24, 17, 18, 17, 9, + 7, 2, 3, 69, 72, 73, 82, 39, 39, 38, 34, 25, 28, 25, 16, 16, 11, 7, + 66, 68, 75, 83, 66, 66, 85, 7, 6, 0, 71, 68, 70, 76, 70, 78, 78, 88, + 86, 92, 100, 71, 81, 86, 77, 71, 2, 70, 10, 19, 67, 67, 7, 11, 4, 10, + 19, 4, 1, 47, 26, 7, 71, 82, 103, 116, 126, 126, 0, 39, 29, 25, 15, 21, + 10, 5, 3, 71, 76, 71, 2, 68, 10, 17, 65, 66, 5, 9, 6, 9, 18, 8, + 2, 47, 26, 7, 71, 82, 103, 116, 126, 126, 75, 68, 1, 6, 6, 68, 12}, + + { + + 46, 6, 78, 46, 6, 78, 81, 66, 20, 11, 1, 3, 8, 37, 58, 16, 70, 68, + 2, 6, 3, 64, 65, 70, 76, 11, 65, 94, 102, 111, 10, 76, 69, 2, 6, 3, + 78, 68, 11, 7, 65, 73, 79, 1, 78, 82, 95, 6, 69, 75, 4, 76, 74, 86, + 64, 75, 71, 82, 0, 2, 22, 0, 0, 0, 68, 92, 97, 3, 1, 66, 27, 1, + 83, 0, 2, 68, 25, 22, 0, 4, 75, 3, 75, 72, 68, 80, 80, 82, 85, 24, + 65, 4, 70, 69, 84, 76, 76, 4, 67, 66, 74, 15, 68, 1, 73, 81, 69, 74, + 69, 0, 67, 72, 66, 2, 79, 71, 62, 50, 18, 11, 4, 69, 64, 4, 1, 10, + 5, 65, 76, 0, 70, 72, 70, 8, 9, 11, 5, 5, 7, 9, 20, 12, 7, 3, + 3, 2, 9, 11, 84, 64, 5, 72, 8, 9, 17, 12, 2, 19, 12, 14, 68, 4, + 1, 2, 15, 89, 12, 19, 16, 21, 22, 19, 18, 29, 27, 8, 18, 23, 71, 62, + 76, 14, 6, 8, 10, 9, 13, 9, 18, 66, 4, 3, 10, 62, 79, 35, 38, 33, + 30, 29, 26, 23, 22, 19, 9, 7, 5, 0, 67, 88, 75, 75, 90, 6, 3, 1, + 64, 72, 74, 74, 69, 80, 77, 82, 90, 83, 99, 74, 17, 14, 10, 6, 4, 65, + 69, 70, 71, 66, 29, 19, 14, 6, 11, 0, 66, 70, 76, 5, 36, 28, 23, 20, + 17, 6, 1, 65, 71, 69, 44, 30, 15, 3, 12, 0, 73, 76, 7, 48, 39, 28, + 18, 22, 4, 64, 68, 71, 62, 91, 86, 77, 83, 83, 77, 77, 73, 74, 74, 72, + 68, 78, 77, 82, 83, 68, 79, 76, 75, 70, 68, 70, 72, 68, 70, 69, 70, 80, + 6, 6, 12, 3, 64, 6, 4, 3, 3, 2, 1, 65, 64, 67, 71, 68, 6, 82, + 0, 16, 64, 9, 3, 6, 12, 3, 1, 4, 9, 70, 70, 79, 39, 40, 41, 33, + 29, 35, 32, 31, 30, 25, 29, 26, 18, 14, 2, 22, 24, 20, 12, 15, 14, 7, + 5, 64, 1, 71, 74, 74, 82, 36, 36, 35, 32, 22, 25, 22, 13, 14, 8, 5, + 69, 70, 77, 84, 67, 66, 87, 6, 4, 64, 73, 70, 72, 77, 71, 79, 79, 89, + 88, 93, 100, 72, 83, 88, 76, 71, 3, 69, 11, 20, 67, 66, 7, 11, 4, 10, + 20, 4, 1, 46, 24, 4, 74, 86, 107, 120, 126, 126, 1, 39, 29, 25, 15, 21, + 10, 5, 3, 70, 76, 71, 3, 68, 11, 18, 65, 66, 6, 10, 6, 9, 18, 8, + 2, 46, 24, 4, 74, 86, 107, 120, 126, 126, 75, 68, 1, 6, 6, 66, 14}, + + { + + 45, 6, 79, 45, 6, 79, 79, 65, 21, 11, 1, 1, 6, 36, 58, 16, 69, 68, + 3, 6, 3, 65, 64, 70, 77, 9, 67, 98, 105, 112, 12, 75, 69, 3, 6, 3, + 79, 68, 11, 7, 65, 73, 79, 1, 78, 81, 94, 6, 68, 74, 4, 75, 74, 86, + 64, 75, 71, 82, 0, 2, 22, 0, 0, 0, 67, 92, 97, 3, 1, 67, 27, 1, + 83, 2, 3, 67, 26, 25, 2, 5, 73, 5, 74, 71, 66, 80, 80, 82, 85, 24, + 65, 5, 67, 69, 83, 75, 74, 3, 67, 66, 74, 16, 68, 1, 72, 81, 69, 73, + 68, 1, 66, 71, 64, 2, 78, 70, 62, 55, 21, 14, 4, 69, 64, 5, 1, 10, + 5, 64, 75, 2, 69, 74, 72, 11, 9, 11, 5, 5, 8, 10, 22, 13, 7, 3, + 3, 2, 10, 12, 85, 64, 5, 72, 7, 8, 16, 12, 1, 19, 12, 14, 70, 4, + 0, 1, 14, 89, 12, 19, 16, 19, 19, 18, 16, 26, 24, 6, 16, 21, 78, 62, + 79, 12, 5, 6, 6, 7, 10, 5, 13, 69, 2, 0, 8, 62, 81, 33, 36, 31, + 27, 26, 24, 20, 20, 17, 7, 4, 3, 65, 68, 89, 75, 75, 90, 5, 2, 0, + 65, 73, 75, 75, 71, 81, 77, 83, 91, 83, 99, 73, 18, 14, 10, 6, 5, 64, + 68, 70, 69, 65, 30, 19, 14, 7, 12, 1, 64, 68, 74, 6, 36, 28, 23, 21, + 18, 6, 1, 65, 70, 69, 45, 30, 14, 3, 12, 0, 73, 75, 6, 48, 39, 27, + 17, 21, 4, 64, 68, 71, 62, 90, 85, 76, 82, 81, 76, 75, 71, 73, 73, 70, + 66, 78, 76, 81, 82, 66, 78, 76, 75, 69, 67, 70, 72, 68, 71, 69, 70, 80, + 6, 7, 13, 3, 64, 7, 5, 3, 4, 3, 1, 66, 64, 67, 71, 68, 7, 83, + 64, 16, 65, 8, 2, 6, 12, 2, 1, 4, 8, 72, 71, 80, 37, 39, 40, 32, + 27, 33, 30, 29, 28, 23, 26, 24, 15, 11, 0, 18, 20, 16, 8, 11, 11, 4, + 3, 66, 64, 73, 75, 75, 83, 34, 34, 33, 29, 19, 23, 20, 10, 11, 6, 2, + 72, 72, 78, 86, 68, 67, 88, 4, 3, 66, 75, 71, 74, 79, 72, 81, 80, 90, + 89, 94, 101, 74, 84, 89, 76, 70, 3, 69, 12, 21, 67, 66, 8, 12, 4, 11, + 21, 4, 1, 45, 22, 2, 77, 89, 111, 124, 126, 126, 1, 39, 29, 25, 15, 22, + 11, 5, 3, 70, 75, 70, 3, 67, 12, 19, 64, 65, 6, 10, 6, 9, 19, 8, + 2, 45, 22, 2, 77, 89, 111, 124, 126, 126, 75, 68, 1, 7, 7, 65, 17}, + + { + + 43, 6, 79, 43, 6, 79, 78, 0, 21, 11, 0, 0, 4, 35, 58, 15, 67, 68, + 3, 7, 3, 65, 64, 71, 79, 8, 70, 101, 107, 114, 15, 74, 69, 3, 7, 3, + 79, 67, 11, 6, 65, 73, 78, 1, 78, 81, 94, 6, 68, 74, 4, 75, 73, 86, + 64, 75, 71, 82, 0, 2, 22, 0, 0, 0, 67, 93, 97, 4, 0, 67, 26, 0, + 83, 5, 4, 66, 28, 27, 3, 7, 71, 6, 73, 70, 64, 80, 80, 81, 85, 24, + 65, 6, 65, 69, 83, 75, 72, 3, 68, 67, 75, 16, 68, 2, 72, 80, 68, 73, + 67, 1, 66, 69, 0, 3, 77, 69, 62, 59, 24, 17, 4, 69, 64, 5, 1, 10, + 6, 64, 75, 4, 69, 76, 74, 13, 9, 11, 5, 5, 8, 10, 23, 14, 7, 3, + 3, 2, 10, 13, 86, 64, 5, 73, 7, 7, 16, 12, 1, 19, 12, 14, 71, 4, + 64, 0, 14, 89, 11, 18, 15, 17, 15, 17, 14, 24, 22, 4, 13, 19, 84, 62, + 83, 10, 3, 4, 3, 4, 6, 1, 9, 72, 0, 67, 6, 62, 83, 31, 34, 28, + 25, 24, 22, 18, 18, 15, 5, 2, 1, 67, 70, 91, 75, 75, 91, 4, 1, 64, + 66, 74, 77, 76, 73, 82, 78, 84, 92, 84, 98, 71, 19, 15, 10, 6, 6, 64, + 68, 69, 67, 64, 30, 19, 15, 8, 13, 2, 0, 67, 73, 7, 37, 29, 24, 21, + 19, 7, 1, 65, 70, 69, 45, 30, 13, 3, 12, 0, 73, 75, 6, 47, 38, 26, + 16, 21, 4, 64, 68, 71, 62, 89, 83, 75, 80, 80, 75, 73, 69, 72, 71, 68, + 64, 77, 75, 81, 81, 65, 78, 76, 75, 69, 67, 70, 72, 68, 71, 69, 70, 81, + 7, 7, 14, 3, 64, 7, 5, 3, 4, 3, 1, 66, 64, 67, 71, 68, 7, 84, + 65, 16, 66, 7, 1, 5, 12, 1, 0, 4, 8, 73, 71, 81, 36, 38, 39, 30, + 25, 31, 28, 27, 25, 20, 23, 21, 12, 9, 65, 14, 16, 12, 3, 8, 7, 2, + 0, 69, 66, 75, 77, 76, 84, 32, 31, 30, 27, 16, 20, 17, 7, 9, 3, 0, + 75, 74, 80, 87, 69, 68, 90, 3, 1, 68, 77, 73, 76, 81, 73, 82, 81, 91, + 90, 95, 102, 75, 85, 90, 76, 70, 4, 68, 13, 22, 67, 65, 8, 12, 4, 11, + 22, 4, 1, 44, 20, 64, 80, 92, 115, 126, 126, 126, 1, 39, 29, 25, 15, 22, + 11, 5, 3, 69, 75, 70, 4, 67, 12, 20, 64, 65, 6, 10, 6, 9, 19, 8, + 2, 44, 20, 64, 80, 92, 115, 126, 126, 126, 75, 68, 1, 7, 7, 0, 19}, + + { + + 42, 6, 79, 42, 6, 79, 76, 1, 21, 11, 0, 64, 2, 34, 58, 15, 65, 68, + 3, 8, 2, 66, 64, 72, 81, 7, 72, 105, 110, 116, 18, 73, 68, 3, 8, 2, + 79, 66, 11, 6, 65, 72, 77, 1, 78, 81, 94, 6, 68, 74, 4, 75, 73, 85, + 64, 75, 71, 82, 1, 2, 22, 0, 0, 0, 66, 93, 97, 4, 64, 67, 25, 0, + 83, 7, 5, 65, 30, 29, 5, 8, 70, 8, 71, 69, 2, 80, 79, 81, 84, 24, + 65, 7, 0, 69, 82, 74, 70, 3, 68, 67, 75, 16, 68, 2, 71, 79, 67, 72, + 66, 2, 65, 68, 2, 4, 75, 68, 62, 62, 27, 20, 5, 68, 0, 6, 1, 10, + 6, 64, 74, 6, 69, 78, 76, 16, 9, 12, 5, 5, 8, 11, 25, 15, 8, 2, + 3, 2, 11, 13, 87, 0, 5, 73, 6, 6, 15, 11, 0, 19, 12, 14, 72, 5, + 65, 0, 13, 89, 11, 17, 14, 15, 12, 16, 13, 22, 20, 2, 11, 17, 90, 62, + 86, 8, 1, 1, 0, 2, 3, 67, 5, 74, 65, 70, 4, 62, 84, 29, 32, 26, + 23, 22, 20, 16, 16, 13, 2, 0, 64, 68, 72, 93, 75, 75, 91, 3, 0, 65, + 68, 75, 78, 77, 74, 83, 78, 85, 92, 85, 97, 70, 20, 15, 10, 7, 6, 0, + 67, 68, 65, 1, 30, 20, 15, 8, 14, 3, 2, 65, 71, 7, 38, 30, 25, 22, + 20, 7, 2, 64, 69, 69, 46, 30, 13, 2, 12, 0, 73, 74, 6, 47, 37, 25, + 15, 21, 4, 0, 68, 70, 62, 87, 82, 73, 78, 78, 74, 72, 68, 71, 69, 67, + 1, 76, 74, 80, 80, 0, 78, 76, 74, 68, 66, 70, 73, 68, 72, 69, 70, 81, + 7, 7, 15, 3, 64, 7, 5, 3, 4, 3, 1, 67, 64, 67, 71, 68, 8, 85, + 66, 16, 67, 7, 1, 4, 12, 1, 64, 4, 8, 74, 71, 82, 35, 37, 38, 28, + 23, 29, 26, 24, 23, 17, 20, 18, 9, 6, 67, 9, 12, 8, 65, 5, 4, 64, + 65, 71, 68, 76, 78, 76, 84, 29, 29, 28, 25, 13, 17, 15, 4, 7, 1, 65, + 78, 76, 82, 89, 69, 68, 92, 2, 0, 69, 79, 75, 78, 82, 74, 84, 82, 91, + 92, 96, 102, 77, 87, 92, 75, 70, 5, 68, 14, 23, 66, 64, 9, 13, 4, 12, + 23, 4, 1, 43, 18, 67, 83, 96, 119, 126, 126, 126, 2, 39, 29, 25, 15, 23, + 11, 6, 4, 69, 75, 70, 4, 67, 13, 21, 64, 65, 7, 11, 6, 10, 20, 8, + 2, 43, 18, 67, 83, 96, 119, 126, 126, 126, 75, 68, 1, 7, 7, 1, 21}, + + { + + 41, 6, 79, 41, 6, 79, 74, 3, 22, 11, 64, 66, 0, 33, 58, 15, 0, 68, + 4, 9, 2, 66, 64, 72, 82, 6, 75, 108, 112, 117, 21, 72, 68, 4, 9, 2, + 80, 65, 11, 5, 65, 72, 77, 1, 78, 81, 94, 6, 68, 73, 4, 75, 72, 85, + 64, 75, 71, 82, 1, 2, 22, 0, 0, 0, 66, 93, 97, 5, 65, 67, 24, 64, + 83, 10, 6, 64, 31, 31, 6, 10, 68, 9, 70, 68, 4, 80, 79, 80, 84, 24, + 65, 8, 2, 69, 81, 74, 68, 3, 69, 68, 76, 16, 68, 3, 71, 78, 66, 72, + 65, 2, 64, 66, 3, 4, 74, 67, 62, 62, 30, 23, 5, 68, 0, 6, 1, 10, + 7, 0, 74, 8, 69, 80, 78, 19, 9, 12, 5, 5, 8, 11, 26, 16, 8, 2, + 3, 2, 11, 14, 88, 0, 5, 74, 6, 5, 15, 11, 0, 19, 12, 14, 73, 5, + 66, 64, 13, 89, 11, 17, 14, 13, 9, 15, 11, 20, 17, 0, 8, 15, 97, 62, + 90, 6, 64, 64, 66, 64, 0, 71, 1, 77, 67, 74, 2, 62, 86, 27, 30, 24, + 20, 19, 18, 14, 14, 11, 0, 65, 66, 70, 73, 95, 75, 75, 92, 2, 64, 66, + 69, 76, 79, 78, 76, 84, 79, 86, 93, 85, 97, 68, 21, 16, 10, 7, 7, 0, + 67, 68, 0, 2, 30, 20, 16, 9, 15, 4, 3, 64, 69, 8, 38, 30, 25, 22, + 21, 8, 2, 64, 69, 69, 46, 30, 12, 2, 12, 0, 73, 74, 6, 46, 37, 24, + 14, 21, 4, 0, 68, 70, 62, 86, 80, 72, 77, 77, 73, 70, 66, 70, 68, 65, + 3, 76, 73, 80, 79, 2, 77, 76, 74, 68, 66, 70, 73, 68, 72, 69, 70, 82, + 8, 8, 16, 3, 64, 8, 5, 3, 4, 3, 1, 67, 64, 67, 71, 68, 8, 86, + 67, 16, 68, 6, 0, 4, 12, 0, 65, 4, 8, 75, 71, 83, 34, 36, 37, 27, + 21, 27, 24, 22, 20, 15, 17, 15, 6, 4, 69, 5, 8, 4, 70, 1, 1, 66, + 67, 74, 70, 78, 80, 77, 85, 27, 26, 25, 22, 10, 15, 12, 1, 4, 65, 68, + 81, 78, 84, 90, 70, 69, 93, 0, 65, 71, 81, 76, 80, 84, 75, 85, 83, 92, + 93, 97, 103, 78, 88, 93, 75, 69, 5, 67, 15, 24, 66, 64, 9, 13, 4, 12, + 24, 4, 1, 42, 16, 69, 86, 99, 123, 126, 126, 126, 2, 39, 29, 25, 15, 23, + 12, 6, 4, 68, 75, 70, 5, 66, 14, 22, 64, 64, 7, 11, 6, 10, 20, 8, + 2, 42, 16, 69, 86, 99, 123, 126, 126, 126, 75, 68, 1, 8, 8, 3, 23}, + + { + + 40, 6, 79, 40, 6, 79, 72, 4, 22, 11, 64, 67, 65, 32, 58, 15, 2, 68, + 4, 10, 2, 67, 64, 73, 84, 5, 77, 112, 115, 119, 24, 71, 68, 4, 10, 2, + 80, 64, 11, 5, 65, 72, 76, 1, 78, 81, 94, 6, 68, 73, 4, 75, 72, 85, + 64, 75, 71, 82, 1, 2, 22, 0, 0, 0, 65, 93, 97, 5, 66, 67, 23, 64, + 83, 12, 7, 0, 33, 33, 8, 11, 66, 11, 69, 67, 6, 80, 79, 80, 84, 24, + 65, 9, 4, 69, 80, 73, 66, 3, 69, 68, 76, 16, 68, 3, 70, 77, 65, 71, + 64, 3, 0, 65, 5, 5, 73, 66, 62, 62, 33, 26, 5, 68, 0, 7, 1, 10, + 7, 0, 73, 10, 69, 82, 80, 22, 9, 12, 5, 5, 8, 12, 28, 17, 8, 2, + 3, 2, 12, 15, 89, 0, 5, 74, 5, 4, 14, 11, 64, 19, 12, 14, 74, 5, + 67, 65, 12, 89, 11, 16, 13, 11, 6, 14, 9, 18, 15, 65, 6, 13, 103, 62, + 93, 4, 66, 66, 69, 66, 66, 75, 66, 80, 69, 77, 0, 62, 88, 25, 28, 22, + 18, 17, 16, 12, 12, 9, 65, 67, 68, 72, 75, 97, 75, 75, 92, 1, 65, 67, + 70, 77, 80, 79, 78, 85, 79, 87, 94, 86, 96, 67, 22, 16, 10, 7, 8, 1, + 66, 67, 2, 3, 30, 20, 16, 10, 16, 5, 5, 1, 67, 9, 39, 31, 26, 23, + 22, 8, 2, 64, 68, 69, 47, 30, 11, 2, 12, 0, 73, 73, 6, 46, 36, 23, + 13, 21, 4, 0, 68, 70, 62, 85, 79, 71, 75, 75, 72, 68, 64, 69, 66, 0, + 5, 75, 72, 79, 78, 4, 77, 76, 74, 67, 65, 70, 73, 68, 73, 69, 70, 82, + 8, 8, 17, 3, 64, 8, 5, 3, 4, 3, 1, 68, 64, 67, 71, 68, 9, 87, + 68, 16, 69, 5, 64, 3, 12, 64, 66, 4, 8, 76, 71, 84, 33, 35, 36, 25, + 19, 25, 22, 20, 18, 12, 14, 12, 3, 1, 71, 1, 4, 0, 75, 65, 65, 69, + 69, 76, 72, 80, 81, 78, 86, 25, 24, 23, 20, 7, 12, 10, 65, 2, 67, 70, + 84, 80, 86, 92, 71, 70, 95, 64, 66, 73, 83, 78, 82, 86, 76, 87, 84, 93, + 94, 98, 104, 80, 89, 94, 75, 69, 6, 67, 16, 25, 66, 0, 10, 14, 4, 13, + 25, 4, 1, 41, 14, 72, 89, 102, 126, 126, 126, 126, 2, 39, 29, 25, 15, 24, + 12, 6, 4, 68, 75, 70, 5, 66, 15, 23, 64, 64, 7, 11, 6, 10, 21, 8, + 2, 41, 14, 72, 89, 102, 126, 126, 126, 126, 75, 68, 1, 8, 8, 4, 25}, + + { + + 38, 5, 80, 38, 5, 80, 71, 5, 22, 11, 65, 69, 68, 31, 58, 14, 3, 69, + 4, 10, 1, 68, 64, 74, 86, 3, 80, 116, 118, 121, 26, 71, 68, 4, 10, 1, + 81, 64, 11, 4, 65, 72, 76, 0, 79, 81, 94, 5, 68, 73, 4, 75, 72, 85, + 64, 75, 72, 82, 1, 2, 22, 0, 0, 0, 65, 94, 97, 5, 67, 68, 22, 65, + 84, 14, 8, 1, 34, 35, 9, 12, 65, 12, 68, 66, 8, 80, 79, 80, 84, 24, + 65, 9, 6, 69, 80, 73, 65, 2, 70, 69, 77, 16, 68, 3, 70, 77, 65, 71, + 64, 3, 0, 64, 6, 5, 72, 65, 62, 62, 36, 28, 5, 68, 0, 7, 0, 10, + 7, 0, 73, 12, 69, 84, 82, 24, 9, 12, 5, 5, 8, 12, 29, 17, 8, 1, + 2, 2, 12, 15, 91, 0, 5, 75, 4, 2, 13, 10, 65, 19, 12, 13, 76, 5, + 68, 66, 11, 89, 10, 15, 12, 8, 2, 12, 7, 15, 12, 68, 3, 11, 110, 62, + 97, 1, 68, 69, 73, 69, 70, 80, 71, 83, 71, 81, 65, 62, 90, 22, 25, 19, + 15, 14, 13, 9, 9, 7, 68, 70, 70, 74, 77, 99, 75, 76, 93, 0, 67, 69, + 72, 79, 82, 81, 80, 86, 80, 88, 95, 87, 96, 66, 22, 16, 10, 7, 8, 1, + 66, 67, 4, 4, 30, 20, 16, 10, 16, 6, 6, 2, 66, 9, 39, 31, 26, 23, + 23, 8, 2, 64, 68, 70, 47, 29, 10, 1, 12, 0, 73, 73, 5, 45, 35, 21, + 12, 20, 4, 0, 68, 70, 62, 84, 78, 70, 74, 74, 71, 67, 0, 68, 65, 1, + 7, 75, 72, 79, 77, 5, 77, 76, 74, 67, 65, 70, 74, 69, 74, 69, 71, 83, + 8, 8, 18, 3, 65, 8, 5, 2, 4, 3, 1, 69, 64, 67, 71, 68, 9, 88, + 69, 16, 71, 4, 65, 2, 11, 65, 67, 4, 7, 78, 72, 86, 31, 33, 35, 23, + 17, 22, 19, 17, 15, 9, 11, 9, 64, 65, 73, 67, 0, 68, 80, 69, 69, 72, + 72, 79, 74, 82, 83, 79, 87, 22, 21, 20, 17, 4, 9, 7, 69, 64, 70, 73, + 87, 82, 88, 94, 72, 71, 97, 66, 68, 75, 86, 80, 84, 88, 78, 89, 86, 94, + 96, 99, 105, 82, 91, 96, 75, 69, 6, 67, 17, 26, 66, 0, 10, 14, 4, 13, + 25, 4, 0, 39, 12, 75, 93, 106, 126, 126, 126, 126, 2, 39, 29, 25, 15, 24, + 12, 6, 4, 68, 75, 70, 5, 66, 15, 23, 64, 64, 7, 11, 6, 10, 21, 7, + 1, 39, 12, 75, 93, 106, 126, 126, 126, 126, 75, 69, 0, 8, 8, 5, 27}, + + { + + 37, 5, 80, 37, 5, 80, 69, 7, 23, 12, 65, 70, 70, 30, 59, 14, 5, + 69, 5, 11, 1, 68, 0, 74, 87, 2, 82, 119, 120, 122, 29, 70, 67, 5, + 11, 1, 81, 0, 11, 4, 64, 71, 75, 0, 79, 80, 93, 5, 67, 72, 4, + 74, 71, 84, 0, 74, 72, 81, 2, 2, 22, 0, 0, 0, 64, 94, 97, 6, + 67, 68, 22, 65, 84, 17, 10, 3, 36, 38, 11, 14, 0, 14, 66, 64, 11, + 80, 78, 79, 83, 24, 65, 10, 9, 68, 79, 72, 0, 2, 70, 69, 77, 17, + 68, 4, 69, 76, 64, 70, 0, 4, 1, 1, 8, 6, 70, 0, 62, 62, 40, + 31, 6, 67, 1, 8, 0, 11, 8, 1, 72, 15, 68, 86, 83, 27, 9, 13, + 5, 6, 9, 13, 31, 18, 9, 1, 2, 2, 13, 16, 92, 1, 5, 75, 4, + 1, 13, 10, 65, 19, 12, 13, 77, 6, 68, 66, 11, 89, 10, 15, 12, 6, + 64, 11, 6, 13, 10, 70, 1, 9, 116, 62, 100, 64, 69, 71, 76, 71, 73, + 84, 75, 85, 73, 84, 67, 62, 91, 20, 23, 17, 13, 12, 11, 7, 7, 5, + 70, 72, 71, 75, 78, 100, 75, 76, 93, 0, 68, 70, 73, 80, 83, 82, 81, + 87, 80, 89, 95, 87, 95, 64, 23, 17, 10, 8, 9, 2, 65, 66, 7, 6, + 31, 21, 17, 11, 17, 8, 8, 4, 64, 10, 40, 32, 27, 24, 24, 9, 3, + 0, 67, 70, 48, 29, 10, 1, 13, 1, 73, 72, 5, 45, 35, 20, 11, 20, + 5, 1, 67, 69, 62, 82, 76, 68, 72, 72, 69, 65, 2, 66, 0, 3, 10, + 74, 71, 78, 75, 7, 76, 75, 73, 66, 64, 69, 74, 69, 74, 69, 71, 83, + 9, 9, 19, 4, 65, 9, 6, 2, 5, 4, 1, 69, 0, 66, 70, 67, 10, + 88, 70, 16, 72, 4, 65, 2, 11, 65, 67, 4, 7, 79, 72, 87, 30, 32, + 35, 22, 16, 20, 17, 15, 13, 7, 9, 7, 67, 67, 75, 71, 66, 72, 84, + 72, 72, 74, 74, 81, 75, 83, 84, 79, 87, 20, 19, 18, 15, 2, 7, 5, + 72, 66, 72, 75, 89, 83, 89, 95, 72, 71, 98, 67, 69, 76, 88, 81, 85, + 89, 79, 90, 87, 94, 97, 99, 105, 83, 92, 97, 74, 68, 7, 66, 19, 28, + 65, 1, 11, 15, 5, 14, 26, 4, 0, 38, 10, 77, 96, 109, 126, 126, 126, + 126, 3, 39, 30, 26, 16, 25, 13, 7, 5, 67, 74, 69, 6, 65, 16, 24, + 0, 0, 8, 12, 6, 11, 22, 7, 1, 38, 10, 77, 96, 109, 126, 126, 126, + 126, 75, 69, 0, 9, 9, 7, 30}, + + { + + 36, 5, 80, 36, 5, 80, 67, 8, 23, 12, 65, 71, 72, 29, 59, 14, 7, + 69, 5, 12, 1, 69, 0, 75, 89, 1, 85, 123, 123, 124, 32, 69, 67, 5, + 12, 1, 81, 1, 11, 3, 64, 71, 74, 0, 79, 80, 93, 5, 67, 72, 4, + 74, 71, 84, 0, 74, 72, 81, 2, 2, 22, 0, 0, 0, 0, 94, 97, 6, + 68, 68, 21, 65, 84, 19, 11, 4, 38, 40, 12, 15, 2, 15, 65, 0, 13, + 80, 78, 79, 83, 24, 65, 11, 11, 68, 78, 71, 2, 2, 70, 69, 78, 17, + 68, 4, 68, 75, 0, 69, 1, 4, 2, 2, 9, 7, 69, 1, 62, 62, 43, + 34, 6, 67, 1, 8, 0, 11, 8, 1, 72, 17, 68, 88, 85, 30, 9, 13, + 5, 6, 9, 13, 33, 19, 9, 1, 2, 2, 14, 17, 93, 1, 5, 75, 3, + 0, 12, 10, 66, 19, 12, 13, 78, 6, 69, 67, 10, 89, 10, 14, 11, 4, + 67, 10, 4, 11, 8, 72, 64, 7, 122, 62, 104, 66, 71, 73, 79, 73, 76, + 88, 79, 88, 75, 87, 69, 62, 93, 18, 21, 15, 11, 10, 9, 5, 5, 3, + 72, 74, 73, 77, 80, 102, 75, 76, 94, 64, 69, 71, 74, 81, 84, 83, 83, + 88, 80, 90, 96, 88, 94, 0, 24, 17, 10, 8, 10, 3, 64, 65, 9, 7, + 31, 21, 17, 12, 18, 9, 10, 6, 1, 11, 41, 33, 28, 25, 25, 9, 3, + 0, 66, 70, 49, 29, 9, 1, 13, 1, 73, 72, 5, 45, 34, 19, 10, 20, + 5, 1, 67, 69, 62, 81, 75, 67, 70, 70, 68, 0, 4, 65, 2, 5, 12, + 73, 70, 78, 74, 9, 76, 75, 73, 65, 64, 69, 74, 69, 75, 69, 71, 83, + 9, 9, 20, 4, 65, 9, 6, 2, 5, 4, 1, 70, 0, 66, 70, 67, 11, + 89, 71, 16, 73, 3, 66, 1, 11, 66, 68, 4, 7, 80, 72, 88, 29, 31, + 34, 20, 14, 18, 15, 13, 11, 4, 6, 4, 70, 70, 77, 75, 70, 76, 89, + 75, 75, 77, 76, 84, 77, 85, 85, 80, 88, 18, 17, 15, 13, 64, 4, 2, + 75, 68, 75, 77, 92, 85, 91, 97, 73, 72, 100, 68, 70, 78, 90, 83, 87, + 91, 80, 92, 88, 95, 98, 100, 106, 85, 93, 98, 74, 68, 8, 66, 20, 29, + 65, 2, 12, 16, 5, 14, 27, 4, 0, 37, 8, 80, 99, 112, 126, 126, 126, + 126, 3, 39, 30, 26, 16, 26, 13, 7, 5, 67, 74, 69, 6, 65, 17, 25, + 0, 0, 8, 12, 6, 11, 23, 7, 1, 37, 8, 80, 99, 112, 126, 126, 126, + 126, 75, 69, 0, 9, 9, 8, 32}, + + { + + 35, 5, 80, 35, 5, 80, 65, 10, 24, 12, 66, 73, 74, 28, 59, 14, 9, + 69, 6, 13, 1, 69, 0, 75, 90, 0, 87, 126, 125, 125, 35, 68, 67, 6, + 13, 1, 82, 2, 11, 3, 64, 71, 74, 0, 79, 80, 93, 5, 67, 71, 4, + 74, 70, 84, 0, 74, 72, 81, 2, 2, 22, 0, 0, 0, 0, 94, 97, 7, + 69, 68, 20, 66, 84, 22, 12, 5, 39, 42, 14, 17, 4, 17, 64, 1, 15, + 80, 78, 78, 83, 24, 65, 12, 13, 68, 77, 71, 4, 2, 71, 70, 78, 17, + 68, 5, 68, 74, 1, 69, 2, 5, 3, 4, 11, 7, 68, 2, 62, 62, 46, + 37, 6, 67, 1, 9, 0, 11, 9, 2, 71, 19, 68, 90, 87, 33, 9, 13, + 5, 6, 9, 14, 34, 20, 9, 1, 2, 2, 14, 18, 94, 1, 5, 76, 3, + 64, 12, 10, 66, 19, 12, 13, 79, 6, 70, 68, 10, 89, 10, 14, 11, 2, + 70, 9, 2, 9, 5, 74, 67, 5, 126, 62, 107, 68, 73, 75, 82, 76, 79, + 92, 83, 91, 77, 91, 71, 62, 95, 16, 19, 13, 8, 7, 7, 3, 3, 1, + 74, 76, 75, 79, 81, 104, 75, 76, 94, 65, 70, 72, 75, 82, 85, 84, 85, + 89, 81, 91, 97, 88, 94, 2, 25, 18, 10, 8, 11, 3, 64, 65, 11, 8, + 31, 21, 18, 13, 19, 10, 11, 7, 3, 12, 41, 33, 28, 25, 26, 10, 3, + 0, 66, 70, 49, 29, 8, 1, 13, 1, 73, 71, 5, 44, 34, 18, 9, 20, + 5, 1, 67, 69, 62, 80, 73, 66, 69, 69, 67, 2, 6, 64, 3, 7, 14, + 73, 69, 77, 73, 11, 75, 75, 73, 65, 0, 69, 74, 69, 75, 69, 71, 84, + 10, 10, 21, 4, 65, 10, 6, 2, 5, 4, 1, 70, 0, 66, 70, 67, 11, + 90, 72, 16, 74, 2, 67, 1, 11, 67, 69, 4, 7, 81, 72, 89, 28, 30, + 33, 19, 12, 16, 13, 11, 8, 2, 3, 1, 73, 72, 79, 79, 74, 80, 94, + 79, 78, 79, 78, 86, 79, 87, 87, 81, 89, 16, 14, 13, 10, 67, 2, 0, + 78, 71, 77, 80, 95, 87, 93, 98, 74, 73, 101, 70, 72, 80, 92, 84, 89, + 93, 81, 93, 89, 96, 99, 101, 107, 86, 94, 99, 74, 67, 8, 65, 21, 30, + 65, 2, 12, 16, 5, 15, 28, 4, 0, 36, 6, 82, 102, 115, 126, 126, 126, + 126, 3, 39, 30, 26, 16, 26, 14, 7, 5, 66, 74, 69, 7, 64, 18, 26, + 0, 1, 8, 12, 6, 11, 23, 7, 1, 36, 6, 82, 102, 115, 126, 126, 126, + 126, 75, 69, 0, 10, 10, 10, 34}, + + { + + 33, 5, 80, 33, 5, 80, 64, 11, 24, 12, 66, 74, 76, 27, 59, 13, 11, + 69, 6, 14, 0, 70, 0, 76, 92, 64, 90, 126, 126, 126, 38, 67, 67, 6, + 14, 0, 82, 3, 11, 2, 64, 70, 73, 0, 79, 80, 93, 5, 67, 71, 4, + 74, 70, 83, 0, 74, 72, 81, 3, 2, 22, 0, 0, 0, 1, 95, 97, 7, + 70, 68, 19, 66, 84, 24, 13, 6, 41, 44, 15, 18, 5, 18, 1, 2, 17, + 80, 77, 78, 83, 24, 65, 13, 15, 68, 77, 70, 6, 2, 71, 70, 79, 17, + 68, 5, 67, 73, 2, 68, 3, 5, 3, 5, 12, 8, 67, 3, 62, 62, 49, + 40, 6, 66, 1, 9, 0, 11, 9, 2, 71, 21, 68, 92, 89, 35, 9, 14, + 5, 6, 9, 14, 36, 21, 10, 0, 2, 2, 15, 18, 95, 1, 5, 76, 2, + 65, 11, 9, 67, 19, 12, 13, 80, 6, 71, 69, 9, 89, 9, 13, 10, 0, + 74, 8, 0, 7, 3, 76, 69, 3, 126, 62, 111, 70, 75, 78, 85, 78, 83, + 97, 87, 94, 79, 94, 73, 62, 96, 14, 17, 10, 6, 5, 5, 1, 1, 64, + 77, 78, 77, 81, 83, 106, 75, 76, 95, 66, 71, 73, 77, 83, 87, 85, 86, + 90, 81, 92, 97, 89, 93, 3, 26, 18, 10, 9, 11, 4, 0, 64, 13, 10, + 31, 22, 18, 13, 20, 11, 13, 9, 4, 12, 42, 34, 29, 26, 27, 10, 3, + 1, 65, 70, 50, 29, 8, 0, 13, 1, 73, 71, 5, 44, 33, 17, 8, 20, + 5, 2, 67, 69, 62, 78, 72, 65, 67, 67, 66, 3, 7, 0, 5, 8, 16, + 72, 68, 77, 72, 12, 75, 75, 73, 64, 0, 69, 75, 69, 76, 69, 71, 84, + 10, 10, 22, 4, 65, 10, 6, 2, 5, 4, 1, 71, 0, 66, 70, 67, 12, + 91, 73, 16, 75, 2, 68, 0, 11, 68, 70, 4, 7, 82, 72, 90, 27, 29, + 32, 17, 10, 14, 11, 8, 6, 64, 0, 65, 76, 75, 81, 84, 78, 84, 99, + 82, 82, 82, 81, 89, 81, 89, 88, 82, 89, 13, 12, 10, 8, 70, 64, 66, + 81, 73, 80, 82, 98, 89, 95, 100, 75, 73, 103, 71, 73, 81, 94, 86, 91, + 94, 82, 95, 90, 97, 101, 102, 107, 88, 96, 101, 73, 67, 9, 65, 22, 31, + 65, 3, 13, 17, 5, 15, 29, 4, 0, 35, 4, 85, 105, 119, 126, 126, 126, + 126, 4, 39, 30, 26, 16, 27, 14, 7, 5, 66, 74, 69, 7, 64, 18, 27, + 0, 1, 9, 13, 6, 11, 24, 7, 1, 35, 4, 85, 105, 119, 126, 126, 126, + 126, 75, 69, 0, 10, 10, 11, 36}, + + { + + 32, 5, 80, 32, 5, 80, 1, 13, 24, 12, 67, 75, 78, 26, 59, 13, 13, + 69, 6, 15, 0, 70, 0, 77, 94, 65, 92, 126, 126, 126, 41, 66, 66, 6, + 15, 0, 82, 4, 11, 2, 64, 70, 72, 0, 79, 80, 93, 5, 67, 71, 4, + 74, 69, 83, 0, 74, 72, 81, 3, 2, 22, 0, 0, 0, 1, 95, 97, 8, + 71, 68, 18, 67, 84, 27, 14, 7, 43, 46, 17, 20, 7, 20, 2, 3, 20, + 80, 77, 77, 82, 24, 65, 14, 17, 68, 76, 70, 8, 2, 72, 71, 79, 17, + 68, 6, 67, 72, 3, 68, 4, 6, 4, 7, 14, 9, 65, 4, 62, 62, 52, + 43, 7, 66, 2, 10, 0, 11, 10, 2, 70, 23, 68, 94, 91, 38, 9, 14, + 5, 6, 9, 15, 37, 22, 10, 0, 2, 2, 15, 19, 96, 2, 5, 77, 2, + 66, 11, 9, 67, 19, 12, 13, 81, 7, 72, 69, 9, 89, 9, 12, 9, 65, + 77, 7, 64, 5, 1, 78, 72, 1, 126, 62, 114, 72, 77, 80, 88, 81, 86, + 101, 91, 96, 81, 98, 75, 62, 98, 12, 15, 8, 4, 3, 3, 64, 64, 66, + 79, 80, 79, 82, 85, 108, 75, 76, 95, 67, 72, 74, 78, 84, 88, 86, 88, + 91, 82, 93, 98, 90, 92, 5, 27, 19, 10, 9, 12, 4, 0, 0, 15, 11, + 31, 22, 19, 14, 21, 12, 14, 10, 6, 13, 43, 35, 30, 26, 28, 11, 4, + 1, 65, 70, 50, 29, 7, 0, 13, 1, 73, 70, 5, 43, 32, 16, 7, 20, + 5, 2, 67, 68, 62, 77, 70, 0, 65, 66, 65, 5, 9, 1, 7, 10, 18, + 71, 67, 76, 71, 14, 75, 75, 72, 64, 1, 69, 75, 69, 76, 69, 71, 85, + 11, 10, 23, 4, 65, 10, 6, 2, 5, 4, 1, 71, 0, 66, 70, 67, 12, + 92, 74, 16, 76, 1, 68, 64, 11, 68, 71, 4, 7, 83, 72, 91, 26, 28, + 31, 15, 8, 12, 9, 6, 3, 67, 66, 68, 79, 77, 83, 88, 82, 88, 104, + 85, 85, 84, 83, 91, 83, 90, 90, 82, 90, 11, 9, 8, 6, 73, 67, 68, + 84, 75, 82, 84, 101, 91, 97, 101, 75, 74, 105, 72, 75, 83, 96, 88, 93, + 96, 83, 96, 91, 97, 102, 103, 108, 89, 97, 102, 73, 67, 10, 64, 23, 32, + 64, 4, 13, 17, 5, 16, 30, 4, 0, 34, 2, 88, 108, 122, 126, 126, 126, + 126, 4, 39, 30, 26, 16, 27, 14, 8, 6, 65, 74, 69, 8, 64, 19, 28, + 0, 1, 9, 13, 6, 12, 24, 7, 1, 34, 2, 88, 108, 122, 126, 126, 126, + 126, 75, 69, 0, 10, 10, 13, 38}, + + { + + 31, 5, 81, 31, 5, 81, 3, 14, 25, 12, 67, 77, 80, 25, 59, 13, 14, + 69, 7, 15, 0, 71, 1, 77, 95, 67, 95, 126, 126, 126, 43, 65, 66, 7, + 15, 0, 83, 4, 11, 1, 64, 70, 72, 0, 79, 79, 92, 5, 66, 70, 4, + 73, 69, 83, 0, 74, 72, 81, 3, 2, 22, 0, 0, 0, 2, 95, 97, 8, + 71, 69, 18, 67, 84, 29, 15, 8, 44, 49, 18, 21, 9, 21, 3, 4, 22, + 80, 77, 77, 82, 24, 65, 15, 20, 68, 75, 69, 10, 1, 72, 71, 80, 18, + 68, 6, 66, 72, 3, 67, 5, 6, 5, 8, 15, 9, 64, 5, 62, 62, 55, + 46, 7, 66, 2, 10, 0, 11, 10, 3, 70, 25, 67, 96, 93, 41, 9, 14, + 5, 6, 10, 15, 39, 23, 10, 0, 2, 2, 16, 20, 97, 2, 5, 77, 1, + 67, 10, 9, 68, 19, 12, 13, 83, 7, 73, 70, 8, 89, 9, 12, 9, 67, + 80, 6, 66, 2, 65, 80, 74, 64, 126, 62, 118, 74, 78, 82, 92, 83, 89, + 105, 96, 99, 83, 101, 77, 62, 100, 10, 13, 6, 1, 0, 1, 67, 66, 68, + 81, 83, 81, 84, 86, 109, 75, 76, 96, 68, 73, 75, 79, 85, 89, 87, 90, + 92, 82, 94, 99, 90, 92, 6, 28, 19, 10, 9, 13, 5, 1, 0, 17, 12, + 32, 22, 19, 15, 22, 13, 16, 12, 8, 14, 43, 35, 30, 27, 29, 11, 4, + 1, 64, 70, 51, 29, 6, 0, 13, 1, 73, 70, 4, 43, 32, 15, 6, 19, + 5, 2, 67, 68, 62, 76, 69, 1, 64, 64, 64, 7, 11, 2, 8, 12, 20, + 71, 66, 76, 70, 16, 74, 75, 72, 0, 1, 69, 75, 69, 77, 69, 71, 85, + 11, 11, 24, 4, 65, 11, 7, 2, 6, 5, 1, 72, 0, 66, 70, 67, 13, + 93, 75, 16, 77, 0, 69, 64, 11, 69, 71, 4, 6, 85, 73, 92, 24, 27, + 30, 14, 6, 10, 7, 4, 1, 69, 69, 70, 82, 80, 85, 92, 86, 92, 108, + 89, 88, 87, 85, 94, 85, 92, 91, 83, 91, 9, 7, 5, 3, 76, 69, 71, + 87, 78, 85, 87, 104, 93, 98, 103, 76, 75, 106, 74, 76, 85, 98, 89, 95, + 98, 84, 98, 92, 98, 103, 104, 109, 91, 98, 103, 73, 66, 10, 64, 24, 33, + 64, 4, 14, 18, 5, 16, 31, 4, 0, 33, 0, 90, 111, 125, 126, 126, 126, + 126, 4, 39, 30, 26, 16, 28, 15, 8, 6, 65, 73, 68, 8, 0, 20, 29, + 1, 2, 9, 13, 6, 12, 25, 7, 1, 33, 0, 90, 111, 125, 126, 126, 126, + 126, 75, 69, 0, 11, 11, 14, 41}, + + { + + 30, 5, 81, 30, 5, 81, 5, 16, 25, 12, 68, 78, 82, 24, 59, 13, + 16, 69, 7, 16, 64, 71, 1, 78, 97, 68, 97, 126, 126, 126, 46, 64, + 66, 7, 16, 64, 83, 5, 11, 1, 64, 69, 71, 0, 79, 79, 92, 5, + 66, 70, 4, 73, 68, 82, 0, 74, 72, 81, 4, 2, 22, 0, 0, 0, + 2, 95, 97, 9, 72, 69, 17, 68, 84, 32, 16, 9, 46, 51, 20, 23, + 10, 23, 5, 5, 24, 80, 76, 76, 82, 24, 65, 16, 22, 68, 74, 69, + 12, 1, 73, 72, 80, 18, 68, 7, 66, 71, 4, 67, 6, 7, 6, 10, + 17, 10, 0, 6, 62, 62, 58, 49, 7, 65, 2, 11, 0, 11, 11, 3, + 69, 27, 67, 98, 95, 44, 9, 15, 5, 6, 10, 16, 40, 24, 11, 64, + 2, 2, 16, 20, 98, 2, 5, 78, 1, 68, 10, 8, 68, 19, 12, 13, + 84, 7, 74, 71, 8, 89, 9, 11, 8, 69, 83, 5, 68, 0, 67, 82, + 77, 66, 126, 62, 121, 76, 80, 85, 95, 86, 92, 110, 100, 102, 85, 105, + 79, 62, 101, 8, 11, 4, 64, 65, 64, 69, 68, 70, 84, 85, 83, 86, + 88, 111, 75, 76, 96, 69, 74, 76, 81, 86, 90, 88, 91, 93, 83, 95, + 99, 91, 91, 8, 29, 20, 10, 10, 13, 5, 1, 1, 19, 14, 32, 23, + 20, 15, 23, 14, 17, 13, 10, 14, 44, 36, 31, 27, 30, 12, 4, 2, + 64, 70, 51, 29, 6, 64, 13, 1, 73, 69, 4, 42, 31, 14, 5, 19, + 5, 3, 67, 68, 62, 74, 67, 2, 1, 0, 0, 8, 12, 3, 10, 13, + 22, 70, 65, 75, 69, 18, 74, 75, 72, 0, 2, 69, 76, 69, 77, 69, + 71, 86, 12, 11, 25, 4, 65, 11, 7, 2, 6, 5, 1, 72, 0, 66, + 70, 67, 13, 94, 76, 16, 78, 0, 70, 65, 11, 70, 72, 4, 6, 86, + 73, 93, 23, 26, 29, 12, 4, 8, 5, 1, 65, 72, 72, 73, 85, 82, + 87, 97, 90, 96, 113, 92, 91, 89, 87, 96, 87, 94, 93, 84, 91, 6, + 4, 3, 1, 79, 72, 73, 90, 80, 87, 89, 107, 95, 100, 104, 77, 75, + 108, 75, 78, 86, 100, 91, 97, 99, 85, 99, 93, 99, 105, 105, 109, 92, + 100, 105, 72, 66, 11, 0, 25, 34, 64, 5, 14, 18, 5, 17, 32, 4, + 0, 32, 65, 93, 114, 126, 126, 126, 126, 126, 5, 39, 30, 26, 16, 28, + 15, 8, 6, 64, 73, 68, 9, 0, 21, 30, 1, 2, 10, 14, 6, 12, + 25, 7, 1, 32, 65, 93, 114, 126, 126, 126, 126, 126, 75, 69, 0, 11, + 11, 16, 43}, + + { + + 28, 4, 81, 28, 4, 81, 6, 17, 25, 12, 68, 80, 85, 23, 59, 12, + 18, 70, 7, 17, 64, 72, 1, 79, 99, 69, 100, 126, 126, 126, 49, 0, + 66, 7, 17, 64, 84, 6, 11, 0, 64, 69, 71, 64, 80, 79, 92, 5, + 66, 70, 4, 73, 68, 82, 0, 74, 72, 81, 4, 2, 22, 0, 0, 0, + 3, 96, 97, 9, 73, 69, 16, 68, 85, 34, 17, 10, 47, 53, 21, 24, + 12, 24, 6, 6, 26, 80, 76, 76, 82, 24, 65, 17, 24, 68, 74, 68, + 14, 1, 73, 72, 81, 18, 68, 7, 65, 70, 5, 66, 6, 7, 6, 11, + 18, 10, 1, 7, 62, 62, 61, 51, 7, 65, 2, 11, 64, 11, 11, 3, + 69, 29, 67, 100, 97, 46, 9, 15, 5, 6, 10, 16, 42, 24, 11, 64, + 1, 2, 17, 21, 100, 2, 5, 78, 0, 70, 9, 8, 69, 19, 12, 12, + 85, 7, 75, 72, 7, 89, 8, 10, 7, 71, 87, 3, 70, 65, 70, 85, + 79, 68, 126, 62, 125, 78, 82, 87, 98, 88, 96, 114, 104, 105, 87, 108, + 81, 62, 103, 6, 8, 1, 67, 68, 67, 71, 71, 72, 86, 87, 85, 88, + 90, 113, 75, 77, 97, 70, 76, 77, 82, 87, 92, 90, 93, 94, 83, 96, + 100, 92, 91, 9, 30, 20, 10, 10, 14, 6, 2, 1, 21, 15, 32, 23, + 20, 16, 24, 15, 19, 15, 11, 15, 44, 36, 31, 28, 31, 12, 4, 2, + 0, 71, 52, 29, 5, 64, 13, 1, 73, 69, 4, 42, 30, 13, 4, 19, + 5, 3, 67, 68, 62, 73, 66, 3, 2, 2, 1, 10, 14, 4, 11, 15, + 24, 70, 65, 75, 68, 19, 74, 75, 72, 1, 2, 69, 76, 69, 78, 69, + 71, 86, 12, 11, 26, 4, 66, 11, 7, 1, 6, 5, 1, 73, 0, 66, + 70, 67, 14, 95, 77, 16, 80, 64, 71, 66, 10, 71, 73, 4, 6, 87, + 73, 95, 22, 24, 28, 10, 2, 6, 3, 64, 67, 75, 75, 76, 88, 85, + 89, 101, 94, 101, 118, 96, 95, 92, 90, 99, 89, 96, 94, 85, 92, 4, + 2, 0, 65, 82, 75, 76, 93, 83, 90, 92, 110, 97, 102, 106, 78, 76, + 110, 77, 79, 88, 102, 93, 99, 101, 87, 101, 95, 100, 106, 106, 110, 94, + 101, 106, 72, 66, 11, 0, 26, 35, 64, 5, 15, 19, 5, 17, 32, 4, + 64, 31, 67, 96, 117, 126, 126, 126, 126, 126, 5, 39, 30, 26, 16, 29, + 15, 8, 6, 64, 73, 68, 9, 0, 21, 30, 1, 2, 10, 14, 6, 12, + 26, 7, 0, 31, 67, 96, 117, 126, 126, 126, 126, 126, 75, 70, 64, 11, + 11, 17, 45}, + + { + + 27, 4, 81, 27, 4, 81, 8, 18, 26, 12, 68, 81, 87, 22, 60, 12, + 20, 70, 8, 18, 64, 73, 1, 79, 100, 70, 102, 126, 126, 126, 52, 1, + 65, 8, 18, 64, 84, 7, 11, 0, 0, 69, 70, 64, 80, 79, 92, 5, + 66, 69, 4, 73, 68, 82, 0, 74, 72, 80, 4, 2, 22, 0, 0, 0, + 4, 96, 97, 9, 74, 69, 15, 68, 85, 36, 19, 11, 49, 55, 23, 25, + 14, 26, 7, 8, 29, 80, 76, 76, 81, 24, 65, 18, 26, 67, 73, 67, + 16, 1, 73, 72, 81, 18, 68, 7, 64, 69, 6, 65, 7, 8, 7, 12, + 20, 11, 3, 8, 62, 62, 62, 54, 8, 65, 3, 12, 64, 11, 11, 4, + 68, 32, 67, 102, 98, 49, 9, 15, 5, 6, 10, 17, 44, 25, 11, 64, + 1, 2, 18, 22, 101, 3, 5, 78, 64, 71, 8, 8, 70, 19, 12, 12, + 86, 8, 76, 72, 6, 89, 8, 10, 7, 73, 90, 2, 71, 67, 72, 87, + 81, 70, 126, 62, 126, 80, 84, 89, 101, 90, 99, 118, 108, 107, 89, 111, + 83, 62, 105, 4, 6, 64, 69, 70, 69, 73, 73, 74, 88, 89, 86, 89, + 91, 115, 75, 77, 97, 70, 77, 78, 83, 88, 93, 91, 95, 95, 83, 97, + 101, 92, 90, 10, 31, 20, 10, 10, 15, 7, 3, 2, 24, 16, 32, 23, + 20, 17, 25, 16, 21, 17, 13, 16, 45, 37, 32, 29, 32, 12, 5, 2, + 1, 71, 53, 29, 4, 64, 14, 2, 73, 68, 4, 42, 30, 12, 3, 19, + 5, 3, 67, 67, 62, 72, 65, 5, 4, 4, 2, 12, 16, 5, 13, 17, + 26, 69, 64, 74, 67, 21, 73, 74, 71, 2, 3, 69, 76, 69, 79, 69, + 71, 86, 12, 12, 27, 5, 66, 12, 7, 1, 6, 5, 1, 74, 0, 65, + 69, 67, 15, 95, 78, 16, 81, 65, 71, 66, 10, 71, 74, 4, 6, 88, + 73, 96, 21, 23, 28, 9, 0, 4, 1, 66, 69, 77, 78, 79, 91, 88, + 91, 105, 98, 105, 123, 99, 98, 95, 92, 101, 90, 97, 95, 85, 93, 2, + 0, 65, 67, 84, 77, 78, 96, 85, 92, 94, 112, 99, 104, 108, 78, 77, + 111, 78, 80, 90, 104, 94, 100, 103, 88, 103, 96, 100, 107, 106, 111, 96, + 102, 107, 72, 65, 12, 0, 27, 37, 0, 6, 16, 20, 5, 18, 33, 4, + 64, 30, 69, 98, 120, 126, 126, 126, 126, 126, 5, 39, 30, 27, 17, 30, + 16, 9, 7, 64, 73, 68, 9, 1, 22, 31, 1, 3, 10, 14, 6, 13, + 27, 7, 0, 30, 69, 98, 120, 126, 126, 126, 126, 126, 75, 70, 64, 12, + 12, 18, 47}, + + { + + 26, 4, 81, 26, 4, 81, 10, 20, 26, 12, 69, 82, 89, 21, 60, 12, + 22, 70, 8, 19, 65, 73, 1, 80, 102, 71, 105, 126, 126, 126, 55, 2, + 65, 8, 19, 65, 84, 8, 11, 64, 0, 68, 69, 64, 80, 79, 92, 5, + 66, 69, 4, 73, 67, 81, 0, 74, 72, 80, 5, 2, 22, 0, 0, 0, + 4, 96, 97, 10, 75, 69, 14, 69, 85, 39, 20, 12, 51, 57, 24, 27, + 15, 27, 9, 9, 31, 80, 75, 75, 81, 24, 65, 19, 28, 67, 72, 67, + 18, 1, 74, 73, 82, 18, 68, 8, 64, 68, 7, 65, 8, 8, 8, 14, + 21, 12, 4, 9, 62, 62, 62, 57, 8, 64, 3, 12, 64, 11, 12, 4, + 68, 34, 67, 104, 100, 52, 9, 16, 5, 6, 10, 17, 45, 26, 12, 65, + 1, 2, 18, 22, 102, 3, 5, 79, 64, 72, 8, 7, 70, 19, 12, 12, + 87, 8, 77, 73, 6, 89, 8, 9, 6, 75, 93, 1, 73, 69, 74, 89, + 84, 72, 126, 62, 126, 82, 86, 92, 104, 93, 102, 123, 112, 110, 91, 115, + 85, 62, 106, 2, 4, 66, 71, 72, 71, 75, 75, 76, 91, 91, 88, 91, + 93, 117, 75, 77, 98, 71, 78, 79, 85, 89, 94, 92, 96, 96, 84, 98, + 101, 93, 89, 12, 32, 21, 10, 11, 15, 7, 3, 3, 26, 18, 32, 24, + 21, 17, 26, 17, 22, 18, 15, 16, 46, 38, 33, 29, 33, 13, 5, 3, + 1, 71, 53, 29, 4, 65, 14, 2, 73, 68, 4, 41, 29, 11, 2, 19, + 5, 4, 67, 67, 62, 70, 0, 6, 6, 5, 3, 13, 17, 6, 15, 18, + 28, 68, 0, 74, 66, 23, 73, 74, 71, 2, 3, 69, 77, 69, 79, 69, + 71, 87, 13, 12, 28, 5, 66, 12, 7, 1, 6, 5, 1, 74, 0, 65, + 69, 67, 15, 96, 79, 16, 82, 65, 72, 67, 10, 72, 75, 4, 6, 89, + 73, 97, 20, 22, 27, 7, 65, 2, 64, 69, 72, 80, 81, 82, 94, 90, + 93, 110, 102, 109, 126, 102, 101, 97, 94, 104, 92, 99, 97, 86, 93, 64, + 66, 68, 69, 87, 80, 81, 99, 87, 95, 96, 115, 101, 106, 109, 79, 77, + 113, 79, 82, 91, 106, 96, 102, 104, 89, 104, 97, 101, 109, 107, 111, 97, + 104, 109, 71, 65, 13, 1, 28, 38, 0, 7, 16, 20, 5, 18, 34, 4, + 64, 29, 71, 101, 123, 126, 126, 126, 126, 126, 6, 39, 30, 27, 17, 30, + 16, 9, 7, 0, 73, 68, 10, 1, 23, 32, 1, 3, 11, 15, 6, 13, + 27, 7, 0, 29, 71, 101, 123, 126, 126, 126, 126, 126, 75, 70, 64, 12, + 12, 20, 49}, + + { + + 25, 4, 82, 25, 4, 82, 12, 21, 27, 12, 69, 84, 91, 20, 60, 12, + 23, 70, 9, 19, 65, 74, 2, 80, 103, 73, 107, 126, 126, 126, 57, 3, + 65, 9, 19, 65, 85, 8, 11, 64, 0, 68, 69, 64, 80, 78, 91, 5, + 65, 68, 4, 72, 67, 81, 0, 74, 72, 80, 5, 2, 22, 0, 0, 0, + 5, 96, 97, 10, 75, 70, 14, 69, 85, 41, 21, 13, 52, 60, 26, 28, + 17, 29, 10, 10, 33, 80, 75, 75, 81, 24, 65, 20, 31, 67, 71, 66, + 20, 0, 74, 73, 82, 19, 68, 8, 0, 68, 7, 64, 9, 9, 9, 15, + 23, 12, 5, 10, 62, 62, 62, 60, 8, 64, 3, 13, 64, 11, 12, 5, + 67, 36, 66, 106, 102, 55, 9, 16, 5, 6, 11, 18, 47, 27, 12, 65, + 1, 2, 19, 23, 103, 3, 5, 79, 65, 73, 7, 7, 71, 19, 12, 12, + 89, 8, 78, 74, 5, 89, 8, 9, 6, 77, 96, 0, 75, 72, 77, 91, + 86, 74, 126, 62, 126, 84, 87, 94, 108, 95, 105, 126, 117, 113, 93, 118, + 87, 62, 108, 0, 2, 68, 74, 75, 73, 78, 77, 78, 93, 94, 90, 93, + 94, 118, 75, 77, 98, 72, 79, 80, 86, 90, 95, 93, 98, 97, 84, 99, + 102, 93, 89, 13, 33, 21, 10, 11, 16, 8, 4, 3, 28, 19, 33, 24, + 21, 18, 27, 18, 24, 20, 17, 17, 46, 38, 33, 30, 34, 13, 5, 3, + 2, 71, 54, 29, 3, 65, 14, 2, 73, 67, 3, 41, 29, 10, 1, 18, + 5, 4, 67, 67, 62, 69, 1, 7, 7, 7, 4, 15, 19, 7, 16, 20, + 30, 68, 1, 73, 65, 25, 72, 74, 71, 3, 4, 69, 77, 69, 80, 69, + 71, 87, 13, 13, 29, 5, 66, 13, 8, 1, 7, 6, 1, 75, 0, 65, + 69, 67, 16, 97, 80, 16, 83, 66, 73, 67, 10, 73, 75, 4, 5, 91, + 74, 98, 18, 21, 26, 6, 67, 0, 66, 71, 74, 82, 84, 84, 97, 93, + 95, 114, 106, 113, 126, 106, 104, 100, 96, 106, 94, 101, 98, 87, 94, 66, + 68, 70, 72, 90, 82, 83, 102, 90, 97, 99, 118, 103, 107, 111, 80, 78, + 114, 81, 83, 93, 108, 97, 104, 106, 90, 106, 98, 102, 110, 108, 112, 99, + 105, 110, 71, 64, 13, 1, 29, 39, 0, 7, 17, 21, 5, 19, 35, 4, + 64, 28, 73, 103, 126, 126, 126, 126, 126, 126, 6, 39, 30, 27, 17, 31, + 17, 9, 7, 0, 72, 67, 10, 2, 24, 33, 2, 4, 11, 15, 6, 13, + 28, 7, 0, 28, 73, 103, 126, 126, 126, 126, 126, 126, 75, 70, 64, 13, + 13, 21, 52}, + + { + + 23, 4, 82, 23, 4, 82, 13, 23, 27, 12, 70, 85, 93, 19, 60, 11, + 25, 70, 9, 20, 65, 74, 2, 81, 105, 74, 110, 126, 126, 126, 60, 4, + 65, 9, 20, 65, 85, 9, 11, 65, 0, 68, 68, 64, 80, 78, 91, 5, + 65, 68, 4, 72, 66, 81, 0, 74, 72, 80, 5, 2, 22, 0, 0, 0, + 5, 97, 97, 11, 76, 70, 13, 70, 85, 44, 22, 14, 54, 62, 27, 30, + 19, 30, 11, 11, 35, 80, 75, 74, 81, 24, 65, 21, 33, 67, 71, 66, + 22, 0, 75, 74, 83, 19, 68, 9, 0, 67, 8, 64, 10, 9, 9, 17, + 24, 13, 6, 11, 62, 62, 62, 62, 8, 64, 3, 13, 64, 11, 13, 5, + 67, 38, 66, 108, 104, 57, 9, 16, 5, 6, 11, 18, 48, 28, 12, 65, + 1, 2, 19, 24, 104, 3, 5, 80, 65, 74, 7, 7, 71, 19, 12, 12, + 90, 8, 79, 75, 5, 89, 7, 8, 5, 79, 100, 64, 77, 74, 79, 93, + 89, 76, 126, 62, 126, 86, 89, 96, 111, 98, 109, 126, 121, 116, 95, 122, + 89, 62, 110, 65, 0, 71, 76, 77, 75, 80, 79, 80, 95, 96, 92, 95, + 96, 120, 75, 77, 99, 73, 80, 81, 87, 91, 97, 94, 100, 98, 85, 100, + 103, 94, 88, 15, 34, 22, 10, 11, 17, 8, 4, 4, 30, 20, 33, 24, + 22, 19, 28, 19, 25, 21, 18, 18, 47, 39, 34, 30, 35, 14, 5, 3, + 2, 71, 54, 29, 2, 65, 14, 2, 73, 67, 3, 40, 28, 9, 0, 18, + 5, 4, 67, 67, 62, 68, 3, 8, 9, 8, 5, 17, 21, 8, 18, 22, + 32, 67, 2, 73, 64, 26, 72, 74, 71, 3, 4, 69, 77, 69, 80, 69, + 71, 88, 14, 13, 30, 5, 66, 13, 8, 1, 7, 6, 1, 75, 0, 65, + 69, 67, 16, 98, 81, 16, 84, 67, 74, 68, 10, 74, 76, 4, 5, 92, + 74, 99, 17, 20, 25, 4, 69, 65, 68, 73, 77, 85, 87, 87, 100, 95, + 97, 118, 110, 117, 126, 109, 108, 102, 99, 109, 96, 103, 100, 88, 95, 68, + 71, 73, 74, 93, 85, 86, 105, 92, 100, 101, 121, 105, 109, 112, 81, 79, + 116, 82, 85, 95, 110, 99, 106, 108, 91, 107, 99, 103, 111, 109, 113, 100, + 106, 111, 71, 64, 14, 2, 30, 40, 0, 8, 17, 21, 5, 19, 36, 4, + 64, 27, 75, 106, 126, 126, 126, 126, 126, 126, 6, 39, 30, 27, 17, 31, + 17, 9, 7, 1, 72, 67, 11, 2, 24, 34, 2, 4, 11, 15, 6, 13, + 28, 7, 0, 27, 75, 106, 126, 126, 126, 126, 126, 126, 75, 70, 64, 13, + 13, 23, 54}, + + { + + 22, 4, 82, 22, 4, 82, 15, 24, 27, 12, 70, 86, 95, 18, 60, 11, + 27, 70, 9, 21, 66, 75, 2, 82, 107, 75, 112, 126, 126, 126, 62, 5, + 64, 9, 21, 66, 85, 10, 11, 65, 0, 67, 67, 64, 80, 78, 91, 5, + 65, 68, 4, 72, 66, 80, 0, 74, 72, 80, 6, 2, 22, 0, 0, 0, + 6, 97, 97, 11, 77, 70, 12, 70, 85, 46, 23, 15, 56, 62, 29, 31, + 20, 32, 13, 12, 38, 80, 74, 74, 80, 24, 65, 22, 35, 67, 70, 65, + 24, 0, 75, 74, 83, 19, 68, 9, 1, 66, 9, 0, 11, 10, 10, 18, + 26, 14, 8, 12, 62, 62, 62, 62, 9, 0, 4, 14, 64, 11, 13, 5, + 66, 40, 66, 110, 106, 60, 9, 17, 5, 6, 11, 19, 50, 29, 13, 66, + 1, 2, 20, 24, 105, 4, 5, 80, 66, 75, 6, 6, 72, 19, 12, 12, + 91, 9, 80, 75, 4, 89, 7, 7, 4, 81, 103, 65, 78, 76, 81, 95, + 91, 78, 126, 62, 126, 88, 91, 99, 114, 100, 112, 126, 125, 118, 97, 125, + 91, 62, 111, 67, 65, 73, 78, 79, 77, 82, 81, 82, 98, 98, 94, 96, + 98, 122, 75, 77, 99, 74, 81, 82, 89, 92, 98, 95, 101, 99, 85, 101, + 103, 95, 87, 16, 35, 22, 10, 12, 17, 9, 5, 5, 32, 22, 33, 25, + 22, 19, 29, 20, 27, 23, 20, 18, 48, 40, 35, 31, 36, 14, 6, 4, + 3, 71, 55, 29, 2, 66, 14, 2, 73, 66, 3, 40, 27, 8, 64, 18, + 5, 5, 67, 66, 62, 66, 4, 10, 11, 10, 6, 18, 22, 9, 20, 23, + 34, 66, 3, 72, 0, 28, 72, 74, 70, 4, 5, 69, 78, 69, 81, 69, + 71, 88, 14, 13, 31, 5, 66, 13, 8, 1, 7, 6, 1, 76, 0, 65, + 69, 67, 17, 99, 82, 16, 85, 67, 74, 69, 10, 74, 77, 4, 5, 93, + 74, 100, 16, 19, 24, 2, 71, 67, 70, 76, 79, 88, 90, 90, 103, 98, + 99, 123, 114, 121, 126, 112, 111, 105, 101, 111, 98, 104, 101, 88, 95, 71, + 73, 75, 76, 96, 88, 88, 108, 94, 102, 103, 124, 107, 111, 114, 81, 79, + 118, 83, 86, 96, 112, 101, 108, 109, 92, 109, 100, 103, 113, 110, 113, 102, + 108, 113, 70, 64, 15, 2, 31, 41, 1, 9, 18, 22, 5, 20, 37, 4, + 64, 26, 77, 109, 126, 126, 126, 126, 126, 126, 7, 39, 30, 27, 17, 32, + 17, 10, 8, 1, 72, 67, 11, 2, 25, 35, 2, 4, 12, 16, 6, 14, + 29, 7, 0, 26, 77, 109, 126, 126, 126, 126, 126, 126, 75, 70, 64, 13, + 13, 24, 56}, + + { + + 21, 4, 82, 21, 4, 82, 17, 26, 28, 12, 71, 88, 97, 17, 60, 11, + 29, 70, 10, 22, 66, 75, 2, 82, 108, 76, 115, 126, 126, 126, 62, 6, + 64, 10, 22, 66, 86, 11, 11, 66, 0, 67, 67, 64, 80, 78, 91, 5, + 65, 67, 4, 72, 65, 80, 0, 74, 72, 80, 6, 2, 22, 0, 0, 0, + 6, 97, 97, 12, 78, 70, 11, 71, 85, 49, 24, 16, 57, 62, 30, 33, + 22, 33, 14, 13, 40, 80, 74, 73, 80, 24, 65, 23, 37, 67, 69, 65, + 26, 0, 76, 75, 84, 19, 68, 10, 1, 65, 10, 0, 12, 10, 11, 20, + 27, 14, 9, 13, 62, 62, 62, 62, 9, 0, 4, 14, 64, 11, 14, 6, + 66, 42, 66, 112, 108, 62, 9, 17, 5, 6, 11, 19, 51, 30, 13, 66, + 1, 2, 20, 25, 106, 4, 5, 81, 66, 76, 6, 6, 72, 19, 12, 12, + 92, 9, 81, 76, 4, 89, 7, 7, 4, 83, 106, 66, 80, 78, 84, 97, + 94, 80, 126, 62, 126, 90, 93, 101, 117, 103, 115, 126, 126, 121, 99, 126, + 93, 62, 113, 69, 67, 75, 81, 82, 79, 84, 83, 84, 100, 100, 96, 98, + 99, 124, 75, 77, 100, 75, 82, 83, 90, 93, 99, 96, 103, 100, 86, 102, + 104, 95, 87, 18, 36, 23, 10, 12, 18, 9, 5, 5, 34, 23, 33, 25, + 23, 20, 30, 21, 28, 24, 22, 19, 48, 40, 35, 31, 37, 15, 6, 4, + 3, 71, 55, 29, 1, 66, 14, 2, 73, 66, 3, 39, 27, 7, 65, 18, + 5, 5, 67, 66, 62, 65, 6, 11, 12, 11, 7, 20, 24, 10, 21, 25, + 36, 66, 4, 72, 1, 30, 71, 74, 70, 4, 5, 69, 78, 69, 81, 69, + 71, 89, 15, 14, 32, 5, 66, 14, 8, 1, 7, 6, 1, 76, 0, 65, + 69, 67, 17, 100, 83, 16, 86, 68, 75, 69, 10, 75, 78, 4, 5, 94, + 74, 101, 15, 18, 23, 1, 73, 69, 72, 78, 82, 90, 93, 93, 106, 100, + 101, 126, 118, 125, 126, 116, 114, 107, 103, 114, 100, 106, 103, 89, 96, 73, + 76, 78, 79, 99, 90, 91, 111, 97, 105, 106, 126, 109, 113, 115, 82, 80, + 119, 85, 88, 98, 114, 102, 110, 111, 93, 110, 101, 104, 114, 111, 114, 103, + 109, 114, 70, 0, 15, 3, 32, 42, 1, 9, 18, 22, 5, 20, 38, 4, + 64, 25, 79, 111, 126, 126, 126, 126, 126, 126, 7, 39, 30, 27, 17, 32, + 18, 10, 8, 2, 72, 67, 12, 3, 26, 36, 2, 5, 12, 16, 6, 14, + 29, 7, 0, 25, 79, 111, 126, 126, 126, 126, 126, 126, 75, 70, 64, 14, + 14, 26, 58}, + + { + + 20, 4, 82, 20, 4, 82, 19, 27, 28, 12, 71, 89, 99, 16, 60, 11, + 31, 70, 10, 23, 66, 76, 2, 83, 110, 77, 117, 126, 126, 126, 62, 7, + 64, 10, 23, 66, 86, 12, 11, 66, 0, 67, 66, 64, 80, 78, 91, 5, + 65, 67, 4, 72, 65, 80, 0, 74, 72, 80, 6, 2, 22, 0, 0, 0, + 7, 97, 97, 12, 79, 70, 10, 71, 85, 51, 25, 17, 59, 62, 32, 34, + 24, 35, 15, 14, 42, 80, 74, 73, 80, 24, 65, 24, 39, 67, 68, 64, + 28, 0, 76, 75, 84, 19, 68, 10, 2, 64, 11, 1, 13, 11, 12, 21, + 29, 15, 10, 14, 62, 62, 62, 62, 9, 0, 4, 15, 64, 11, 14, 6, + 65, 44, 66, 114, 110, 62, 9, 17, 5, 6, 11, 20, 53, 31, 13, 66, + 1, 2, 21, 26, 107, 4, 5, 81, 67, 77, 5, 6, 73, 19, 12, 12, + 93, 9, 82, 77, 3, 89, 7, 6, 3, 85, 109, 67, 82, 80, 86, 99, + 96, 82, 126, 62, 126, 92, 95, 103, 120, 105, 118, 126, 126, 124, 101, 126, + 95, 62, 115, 71, 69, 77, 83, 84, 81, 86, 85, 86, 102, 102, 98, 100, + 101, 126, 75, 77, 100, 76, 83, 84, 91, 94, 100, 97, 105, 101, 86, 103, + 105, 96, 86, 19, 37, 23, 10, 12, 19, 10, 6, 6, 36, 24, 33, 25, + 23, 21, 31, 22, 30, 26, 24, 20, 49, 41, 36, 32, 38, 15, 6, 4, + 4, 71, 56, 29, 0, 66, 14, 2, 73, 65, 3, 39, 26, 6, 66, 18, + 5, 5, 67, 66, 62, 64, 7, 12, 14, 13, 8, 22, 26, 11, 23, 27, + 38, 65, 5, 71, 2, 32, 71, 74, 70, 5, 6, 69, 78, 69, 82, 69, + 71, 89, 15, 14, 33, 5, 66, 14, 8, 1, 7, 6, 1, 77, 0, 65, + 69, 67, 18, 101, 84, 16, 87, 69, 76, 70, 10, 76, 79, 4, 5, 95, + 74, 102, 14, 17, 22, 64, 75, 71, 74, 80, 84, 93, 96, 96, 109, 103, + 103, 126, 122, 126, 126, 119, 117, 110, 105, 116, 102, 108, 104, 90, 97, 75, + 78, 80, 81, 102, 93, 93, 114, 99, 107, 108, 126, 111, 115, 117, 83, 81, + 121, 86, 89, 100, 116, 104, 112, 113, 94, 112, 102, 105, 115, 112, 115, 105, + 110, 115, 70, 0, 16, 3, 33, 43, 1, 10, 19, 23, 5, 21, 39, 4, + 64, 24, 81, 114, 126, 126, 126, 126, 126, 126, 7, 39, 30, 27, 17, 33, + 18, 10, 8, 2, 72, 67, 12, 3, 27, 37, 2, 5, 12, 16, 6, 14, + 30, 7, 0, 24, 81, 114, 126, 126, 126, 126, 126, 126, 75, 70, 64, 14, + 14, 27, 60}, + + { + + 18, 3, 83, 18, 3, 83, 20, 28, 28, 12, 72, 91, 102, 15, 60, 10, + 32, 71, 10, 23, 67, 77, 2, 84, 112, 79, 120, 126, 126, 126, 62, 7, + 64, 10, 23, 67, 87, 12, 11, 67, 0, 67, 66, 65, 81, 78, 91, 4, + 65, 67, 4, 72, 65, 80, 0, 74, 73, 80, 6, 2, 22, 0, 0, 0, + 7, 98, 97, 12, 80, 71, 9, 72, 86, 53, 26, 18, 60, 62, 33, 35, + 25, 36, 16, 15, 44, 80, 74, 73, 80, 24, 65, 24, 41, 67, 68, 64, + 29, 64, 77, 76, 85, 19, 68, 10, 2, 64, 11, 1, 13, 11, 12, 22, + 30, 15, 11, 15, 62, 62, 62, 62, 9, 0, 4, 15, 65, 11, 14, 6, + 65, 46, 66, 116, 112, 62, 9, 17, 5, 6, 11, 20, 54, 31, 13, 67, + 0, 2, 21, 26, 109, 4, 5, 82, 68, 79, 4, 5, 74, 19, 12, 11, + 95, 9, 83, 78, 2, 89, 6, 5, 2, 88, 113, 69, 84, 83, 89, 102, + 99, 84, 126, 62, 126, 95, 97, 106, 124, 108, 122, 126, 126, 126, 103, 126, + 97, 62, 117, 74, 72, 80, 86, 87, 84, 89, 88, 88, 105, 105, 100, 102, + 103, 126, 75, 78, 101, 77, 85, 86, 93, 96, 102, 99, 107, 102, 87, 104, + 106, 97, 86, 20, 37, 23, 10, 12, 19, 10, 6, 6, 38, 25, 33, 25, + 23, 21, 31, 23, 31, 27, 25, 20, 49, 41, 36, 32, 39, 15, 6, 4, + 4, 72, 56, 28, 64, 67, 14, 2, 73, 65, 2, 38, 25, 4, 67, 17, + 5, 5, 67, 66, 62, 0, 8, 13, 15, 14, 9, 23, 27, 12, 24, 28, + 40, 65, 5, 71, 3, 33, 71, 74, 70, 5, 6, 69, 79, 70, 83, 69, + 72, 90, 15, 14, 34, 5, 67, 14, 8, 0, 7, 6, 1, 78, 0, 65, + 69, 67, 18, 102, 85, 16, 89, 70, 77, 71, 9, 77, 80, 4, 4, 97, + 75, 104, 12, 15, 21, 66, 77, 74, 77, 83, 87, 96, 99, 99, 113, 106, + 105, 126, 126, 126, 126, 123, 121, 113, 108, 119, 104, 110, 106, 91, 98, 78, + 81, 83, 84, 105, 96, 96, 118, 102, 110, 111, 126, 113, 117, 119, 84, 82, + 123, 88, 91, 102, 119, 106, 114, 115, 96, 114, 104, 106, 117, 113, 116, 107, + 112, 117, 70, 0, 16, 3, 34, 44, 1, 10, 19, 23, 5, 21, 39, 4, + 65, 22, 83, 117, 126, 126, 126, 126, 126, 126, 7, 39, 30, 27, 17, 33, + 18, 10, 8, 2, 72, 67, 12, 3, 27, 37, 2, 5, 12, 16, 6, 14, + 30, 6, 64, 22, 83, 117, 126, 126, 126, 126, 126, 126, 75, 71, 65, 14, + 14, 28, 62}, + + { + + 17, 3, 83, 17, 3, 83, 22, 30, 29, 13, 72, 92, 104, 14, 61, 10, + 34, 71, 11, 24, 67, 77, 3, 84, 113, 80, 122, 126, 126, 126, 62, 8, + 0, 11, 24, 67, 87, 13, 11, 67, 1, 66, 65, 65, 81, 77, 90, 4, + 64, 66, 4, 71, 64, 79, 1, 73, 73, 79, 7, 2, 22, 0, 0, 0, + 8, 98, 97, 13, 80, 71, 9, 72, 86, 56, 28, 20, 62, 62, 35, 37, + 27, 38, 18, 17, 47, 80, 73, 72, 79, 24, 65, 25, 44, 66, 67, 0, + 31, 64, 77, 76, 85, 20, 68, 11, 3, 0, 12, 2, 14, 12, 13, 24, + 32, 16, 13, 17, 62, 62, 62, 62, 10, 1, 5, 16, 65, 12, 15, 7, + 64, 49, 65, 118, 113, 62, 9, 18, 5, 7, 12, 21, 56, 32, 14, 67, + 0, 2, 22, 27, 110, 5, 5, 82, 68, 80, 4, 5, 74, 19, 12, 11, + 96, 10, 83, 78, 2, 89, 6, 5, 2, 90, 116, 70, 85, 85, 91, 104, + 101, 86, 126, 62, 126, 97, 98, 108, 126, 110, 125, 126, 126, 126, 105, 126, + 99, 62, 118, 76, 74, 82, 88, 89, 86, 91, 90, 90, 107, 107, 101, 103, + 104, 126, 75, 78, 101, 77, 86, 87, 94, 97, 103, 100, 108, 103, 87, 105, + 106, 97, 85, 22, 38, 24, 10, 13, 20, 11, 7, 7, 41, 27, 34, 26, + 24, 22, 32, 25, 33, 29, 27, 21, 50, 42, 37, 33, 40, 16, 7, 5, + 5, 72, 57, 28, 64, 67, 15, 3, 73, 64, 2, 38, 25, 3, 68, 17, + 6, 6, 66, 65, 62, 2, 10, 15, 17, 16, 11, 25, 29, 14, 26, 30, + 43, 64, 6, 70, 5, 35, 70, 73, 69, 6, 7, 68, 79, 70, 83, 69, + 72, 90, 16, 15, 35, 6, 67, 15, 9, 0, 8, 7, 1, 78, 1, 64, + 68, 66, 19, 102, 86, 16, 90, 70, 77, 71, 9, 77, 80, 4, 4, 98, + 75, 105, 11, 14, 21, 67, 78, 76, 79, 85, 89, 98, 101, 101, 116, 108, + 107, 126, 126, 126, 126, 126, 124, 115, 110, 121, 105, 111, 107, 91, 98, 80, + 83, 85, 86, 107, 98, 98, 121, 104, 112, 113, 126, 114, 118, 120, 84, 82, + 124, 89, 92, 103, 121, 107, 115, 116, 97, 115, 105, 106, 118, 113, 116, 108, + 113, 118, 69, 1, 17, 4, 36, 46, 2, 11, 20, 24, 6, 22, 40, 4, + 65, 21, 85, 119, 126, 126, 126, 126, 126, 126, 8, 39, 31, 28, 18, 34, + 19, 11, 9, 3, 71, 66, 13, 4, 28, 38, 3, 6, 13, 17, 6, 15, + 31, 6, 64, 21, 85, 119, 126, 126, 126, 126, 126, 126, 75, 71, 65, 15, + 15, 30, 62}, + + { + + 16, 3, 83, 16, 3, 83, 24, 31, 29, 13, 72, 93, 106, 13, 61, 10, + 36, 71, 11, 25, 67, 78, 3, 85, 115, 81, 125, 126, 126, 126, 62, 9, + 0, 11, 25, 67, 87, 14, 11, 68, 1, 66, 64, 65, 81, 77, 90, 4, + 64, 66, 4, 71, 64, 79, 1, 73, 73, 79, 7, 2, 22, 0, 0, 0, + 9, 98, 97, 13, 81, 71, 8, 72, 86, 58, 29, 21, 62, 62, 36, 38, + 29, 39, 19, 18, 49, 80, 73, 72, 79, 24, 65, 26, 46, 66, 66, 1, + 33, 64, 77, 76, 86, 20, 68, 11, 4, 1, 13, 3, 15, 12, 14, 25, + 33, 17, 14, 18, 62, 62, 62, 62, 10, 1, 5, 16, 65, 12, 15, 7, + 64, 51, 65, 120, 115, 62, 9, 18, 5, 7, 12, 21, 58, 33, 14, 67, + 0, 2, 23, 28, 111, 5, 5, 82, 69, 81, 3, 5, 75, 19, 12, 11, + 97, 10, 84, 79, 1, 89, 6, 4, 1, 92, 119, 71, 87, 87, 93, 106, + 103, 88, 126, 62, 126, 99, 100, 110, 126, 112, 126, 126, 126, 126, 107, 126, + 101, 62, 120, 78, 76, 84, 90, 91, 88, 93, 92, 92, 109, 109, 103, 105, + 106, 126, 75, 78, 102, 78, 87, 88, 95, 98, 104, 101, 110, 104, 87, 106, + 107, 98, 84, 23, 39, 24, 10, 13, 21, 12, 8, 8, 43, 28, 34, 26, + 24, 23, 33, 26, 35, 31, 29, 22, 51, 43, 38, 34, 41, 16, 7, 5, + 6, 72, 58, 28, 65, 67, 15, 3, 73, 64, 2, 38, 24, 2, 69, 17, + 6, 6, 66, 65, 62, 3, 11, 16, 19, 18, 12, 27, 31, 15, 28, 32, + 45, 0, 7, 70, 6, 37, 70, 73, 69, 7, 7, 68, 79, 70, 84, 69, + 72, 90, 16, 15, 36, 6, 67, 15, 9, 0, 8, 7, 1, 79, 1, 64, + 68, 66, 20, 103, 87, 16, 91, 71, 78, 72, 9, 78, 81, 4, 4, 99, + 75, 106, 10, 13, 20, 69, 80, 78, 81, 87, 91, 101, 104, 104, 119, 111, + 109, 126, 126, 126, 126, 126, 126, 118, 112, 124, 107, 113, 108, 92, 99, 82, + 85, 88, 88, 110, 101, 101, 124, 106, 115, 115, 126, 116, 120, 122, 85, 83, + 126, 90, 93, 105, 123, 109, 117, 118, 98, 117, 106, 107, 119, 114, 117, 110, + 114, 119, 69, 1, 18, 4, 37, 47, 2, 12, 21, 25, 6, 22, 41, 4, + 65, 20, 87, 122, 126, 126, 126, 126, 126, 126, 8, 39, 31, 28, 18, 35, + 19, 11, 9, 3, 71, 66, 13, 4, 29, 39, 3, 6, 13, 17, 6, 15, + 32, 6, 64, 20, 87, 122, 126, 126, 126, 126, 126, 126, 75, 71, 65, 15, + 15, 31, 62}, + + { + + 15, 3, 83, 15, 3, 83, 26, 33, 30, 13, 73, 95, 108, 12, 61, 10, + 38, 71, 12, 26, 67, 78, 3, 85, 116, 82, 126, 126, 126, 126, 62, 10, + 0, 12, 26, 67, 88, 15, 11, 68, 1, 66, 64, 65, 81, 77, 90, 4, + 64, 65, 4, 71, 0, 79, 1, 73, 73, 79, 7, 2, 22, 0, 0, 0, + 9, 98, 97, 14, 82, 71, 7, 73, 86, 61, 30, 22, 62, 62, 38, 40, + 31, 41, 20, 19, 51, 80, 73, 71, 79, 24, 65, 27, 48, 66, 65, 1, + 35, 64, 78, 77, 86, 20, 68, 12, 4, 2, 14, 3, 16, 13, 15, 27, + 35, 17, 15, 19, 62, 62, 62, 62, 10, 1, 5, 17, 65, 12, 16, 8, + 0, 53, 65, 122, 117, 62, 9, 18, 5, 7, 12, 22, 59, 34, 14, 67, + 0, 2, 23, 29, 112, 5, 5, 83, 69, 82, 3, 5, 75, 19, 12, 11, + 98, 10, 85, 80, 1, 89, 6, 4, 1, 94, 122, 72, 89, 89, 96, 108, + 106, 90, 126, 62, 126, 101, 102, 112, 126, 115, 126, 126, 126, 126, 109, 126, + 103, 62, 122, 80, 78, 86, 93, 94, 90, 95, 94, 94, 111, 111, 105, 107, + 107, 126, 75, 78, 102, 79, 88, 89, 96, 99, 105, 102, 112, 105, 88, 107, + 108, 98, 84, 25, 40, 25, 10, 13, 22, 12, 8, 8, 45, 29, 34, 26, + 25, 24, 34, 27, 36, 32, 31, 23, 51, 43, 38, 34, 42, 17, 7, 5, + 6, 72, 58, 28, 66, 67, 15, 3, 73, 0, 2, 37, 24, 1, 70, 17, + 6, 6, 66, 65, 62, 4, 13, 17, 20, 19, 13, 29, 33, 16, 29, 34, + 47, 0, 8, 69, 7, 39, 69, 73, 69, 7, 8, 68, 79, 70, 84, 69, + 72, 91, 17, 16, 37, 6, 67, 16, 9, 0, 8, 7, 1, 79, 1, 64, + 68, 66, 20, 104, 88, 16, 92, 72, 79, 72, 9, 79, 82, 4, 4, 100, + 75, 107, 9, 12, 19, 70, 82, 80, 83, 89, 94, 103, 107, 107, 122, 113, + 111, 126, 126, 126, 126, 126, 126, 120, 114, 126, 109, 115, 110, 93, 100, 84, + 88, 90, 91, 113, 103, 103, 126, 109, 117, 118, 126, 118, 122, 123, 86, 84, + 126, 92, 95, 107, 125, 110, 119, 120, 99, 118, 107, 108, 120, 115, 118, 111, + 115, 120, 69, 2, 18, 5, 38, 48, 2, 12, 21, 25, 6, 23, 42, 4, + 65, 19, 89, 124, 126, 126, 126, 126, 126, 126, 8, 39, 31, 28, 18, 35, + 20, 11, 9, 4, 71, 66, 14, 5, 30, 40, 3, 7, 13, 17, 6, 15, + 32, 6, 64, 19, 89, 124, 126, 126, 126, 126, 126, 126, 75, 71, 65, 16, + 16, 33, 62}, + + }, + + { + + { + + 62, 9, 74, 62, 9, 74, 126, 104, 10, 9, 12, 47, 62, 62, 12, 1, + 99, 47, 85, 102, 6, 6, 73, 6, 23, 53, 62, 62, 21, 97, 126, 117, + 74, 85, 102, 6, 93, 88, 19, 8, 89, 103, 116, 6, 5, 84, 96, 0, + 85, 106, 0, 75, 90, 101, 8, 79, 75, 97, 13, 3, 22, 0, 0, 0, + 83, 86, 97, 72, 22, 1, 29, 88, 126, 126, 91, 95, 84, 86, 89, 91, + 126, 76, 103, 90, 126, 80, 76, 84, 78, 8, 2, 83, 126, 79, 104, 91, + 126, 65, 79, 72, 92, 7, 68, 71, 98, 86, 88, 82, 72, 67, 72, 89, + 69, 4, 66, 6, 71, 71, 5, 74, 19, 69, 1, 12, 16, 21, 22, 10, + 76, 78, 83, 11, 67, 90, 67, 72, 75, 80, 83, 64, 32, 64, 94, 75, + 0, 74, 28, 36, 91, 65, 69, 77, 66, 1, 68, 81, 33, 56, 40, 74, + 66, 124, 26, 62, 62, 126, 24, 21, 29, 34, 32, 26, 21, 23, 30, 20, + 27, 16, 8, 5, 3, 19, 19, 21, 15, 7, 11, 26, 14, 5, 15, 18, + 69, 30, 0, 62, 62, 62, 53, 62, 62, 62, 62, 46, 38, 34, 30, 48, + 43, 73, 29, 32, 19, 47, 27, 27, 35, 42, 43, 51, 47, 21, 93, 7, + 6, 25, 126, 115, 82, 1, 10, 4, 85, 89, 94, 92, 126, 100, 6, 67, + 71, 77, 85, 88, 104, 98, 126, 82, 15, 2, 66, 70, 75, 79, 83, 92, + 108, 79, 69, 75, 5, 5, 78, 83, 81, 99, 81, 25, 1, 5, 4, 73, + 76, 86, 83, 87, 62, 126, 126, 120, 126, 114, 117, 118, 117, 113, 118, 120, + 124, 94, 102, 99, 106, 126, 92, 6, 86, 94, 91, 77, 71, 73, 64, 81, + 64, 6, 67, 68, 67, 68, 77, 64, 68, 78, 8, 4, 65, 9, 19, 3, + 70, 76, 86, 70, 64, 70, 8, 7, 69, 65, 74, 9, 9, 76, 82, 77, + 77, 21, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, + 52, 62, 62, 62, 62, 62, 62, 48, 62, 62, 46, 25, 18, 9, 79, 62, + 62, 62, 62, 48, 48, 38, 41, 47, 45, 35, 22, 35, 16, 1, 32, 37, + 39, 40, 47, 33, 34, 22, 21, 3, 11, 3, 78, 123, 10, 7, 2, 30, + 13, 2, 78, 74, 72, 72, 75, 71, 0, 70, 75, 72, 67, 10, 4, 11, + 68, 62, 62, 62, 62, 56, 51, 40, 25, 64, 71, 26, 19, 14, 7, 4, + 0, 67, 68, 79, 78, 74, 72, 72, 75, 71, 0, 70, 75, 72, 67, 10, + 4, 11, 68, 62, 62, 62, 62, 56, 51, 40, 25, 64, 75, 65, 4, 67, + 67, 104, 106}, + + { + + 62, 9, 74, 62, 9, 74, 125, 102, 11, 10, 12, 46, 62, 62, 13, 2, + 97, 46, 84, 100, 6, 6, 71, 6, 22, 52, 62, 60, 19, 97, 125, 115, + 73, 84, 100, 6, 92, 87, 20, 8, 88, 102, 114, 5, 4, 84, 96, 0, + 84, 105, 0, 75, 89, 100, 8, 78, 74, 96, 14, 3, 22, 0, 0, 0, + 82, 86, 97, 71, 22, 1, 29, 87, 125, 124, 89, 94, 82, 84, 88, 89, + 125, 75, 101, 89, 124, 80, 76, 84, 78, 9, 2, 82, 124, 78, 103, 90, + 125, 65, 78, 72, 91, 8, 68, 70, 97, 85, 87, 81, 71, 66, 71, 88, + 68, 5, 66, 6, 70, 70, 5, 73, 20, 68, 1, 13, 17, 22, 23, 11, + 76, 77, 82, 11, 67, 89, 67, 71, 74, 79, 81, 1, 33, 1, 92, 75, + 64, 73, 29, 37, 91, 65, 68, 77, 65, 1, 67, 79, 33, 56, 41, 72, + 67, 122, 25, 62, 62, 125, 24, 21, 29, 34, 32, 26, 21, 23, 30, 20, + 27, 16, 8, 5, 3, 19, 19, 21, 15, 7, 11, 26, 14, 4, 15, 18, + 69, 29, 0, 62, 62, 62, 52, 62, 62, 62, 62, 45, 37, 32, 29, 46, + 42, 74, 28, 31, 18, 46, 27, 27, 34, 41, 42, 50, 46, 20, 93, 7, + 6, 24, 125, 113, 80, 2, 10, 4, 84, 88, 93, 91, 125, 98, 7, 66, + 70, 76, 83, 87, 102, 97, 124, 81, 16, 3, 65, 69, 74, 78, 82, 91, + 106, 78, 67, 74, 6, 5, 77, 82, 80, 98, 80, 26, 2, 6, 5, 72, + 75, 85, 82, 86, 62, 125, 125, 118, 125, 112, 115, 116, 115, 111, 116, 118, + 121, 93, 101, 98, 105, 123, 91, 5, 85, 93, 90, 76, 71, 72, 64, 80, + 64, 6, 67, 68, 66, 68, 77, 64, 68, 77, 8, 4, 65, 9, 19, 3, + 70, 75, 84, 70, 64, 69, 8, 7, 69, 65, 73, 9, 9, 75, 81, 76, + 76, 20, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, + 50, 62, 62, 62, 62, 62, 62, 47, 60, 60, 45, 24, 17, 9, 79, 62, + 62, 62, 60, 46, 47, 37, 39, 46, 43, 34, 20, 33, 15, 0, 31, 36, + 37, 39, 46, 32, 33, 21, 20, 2, 11, 3, 78, 122, 9, 6, 1, 29, + 12, 1, 77, 73, 71, 71, 73, 70, 1, 69, 73, 71, 66, 11, 5, 12, + 67, 62, 62, 62, 62, 54, 50, 38, 24, 65, 70, 27, 20, 15, 8, 5, + 1, 66, 67, 78, 77, 73, 71, 71, 73, 70, 1, 69, 73, 71, 66, 11, + 5, 12, 67, 62, 62, 62, 62, 54, 50, 38, 24, 65, 75, 65, 4, 66, + 66, 102, 103}, + + { + + 62, 9, 74, 62, 9, 74, 123, 101, 11, 10, 12, 44, 60, 62, 14, 2, + 95, 44, 84, 99, 6, 6, 70, 5, 21, 51, 60, 57, 17, 98, 123, 114, + 73, 84, 99, 6, 92, 86, 20, 8, 87, 101, 113, 4, 3, 84, 96, 0, + 84, 104, 0, 75, 89, 100, 8, 78, 74, 95, 14, 3, 22, 0, 0, 0, + 81, 86, 97, 71, 21, 1, 29, 86, 124, 122, 88, 93, 80, 82, 87, 88, + 123, 74, 100, 88, 122, 81, 76, 84, 78, 9, 2, 81, 122, 78, 102, 89, + 123, 65, 78, 72, 91, 8, 68, 70, 96, 85, 86, 81, 71, 66, 71, 87, + 67, 5, 66, 6, 70, 70, 5, 73, 20, 68, 1, 13, 17, 22, 23, 11, + 77, 76, 81, 10, 67, 89, 67, 70, 74, 79, 80, 2, 34, 3, 90, 76, + 65, 73, 29, 37, 92, 65, 68, 78, 64, 1, 67, 78, 33, 56, 41, 71, + 68, 121, 24, 62, 62, 124, 24, 21, 29, 33, 31, 26, 21, 23, 29, 19, + 26, 16, 8, 5, 3, 18, 18, 20, 15, 7, 11, 25, 13, 3, 14, 17, + 69, 28, 64, 62, 62, 62, 50, 60, 62, 62, 62, 44, 35, 30, 27, 44, + 40, 75, 27, 30, 16, 45, 26, 26, 33, 39, 40, 48, 44, 18, 93, 6, + 5, 22, 124, 112, 79, 3, 10, 4, 83, 87, 92, 90, 123, 97, 8, 65, + 69, 75, 82, 86, 101, 96, 122, 80, 16, 3, 65, 69, 73, 77, 81, 90, + 105, 78, 66, 73, 6, 5, 76, 81, 80, 97, 79, 26, 3, 6, 5, 71, + 74, 84, 81, 85, 62, 124, 123, 116, 123, 111, 114, 114, 113, 110, 114, 116, + 119, 92, 100, 97, 104, 120, 91, 4, 85, 92, 89, 76, 71, 72, 64, 80, + 64, 5, 67, 68, 65, 68, 77, 64, 68, 77, 8, 4, 65, 8, 18, 3, + 70, 75, 83, 71, 64, 68, 7, 7, 69, 65, 73, 9, 9, 75, 80, 76, + 76, 18, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, + 48, 62, 62, 62, 62, 62, 61, 45, 58, 58, 43, 23, 16, 8, 79, 62, + 62, 62, 58, 44, 45, 35, 37, 44, 41, 32, 18, 31, 13, 64, 30, 35, + 35, 37, 44, 30, 31, 20, 19, 1, 10, 2, 78, 121, 8, 5, 64, 28, + 11, 0, 77, 73, 70, 70, 72, 69, 2, 69, 72, 70, 65, 11, 6, 13, + 66, 62, 62, 62, 60, 52, 48, 36, 22, 66, 69, 27, 20, 16, 9, 6, + 1, 65, 67, 77, 77, 73, 70, 70, 72, 69, 2, 69, 72, 70, 65, 11, + 6, 13, 66, 62, 62, 62, 60, 52, 48, 36, 22, 66, 75, 65, 4, 66, + 66, 101, 101}, + + { + + 62, 9, 74, 62, 9, 74, 121, 99, 12, 10, 11, 42, 59, 61, 14, 2, + 93, 43, 84, 97, 6, 5, 69, 4, 20, 50, 58, 53, 15, 99, 121, 112, + 73, 84, 97, 6, 91, 85, 21, 8, 86, 100, 112, 3, 2, 84, 97, 0, + 84, 103, 0, 76, 89, 100, 8, 78, 74, 94, 15, 3, 22, 0, 0, 0, + 81, 86, 97, 70, 20, 1, 28, 86, 123, 120, 87, 92, 79, 81, 86, 87, + 121, 73, 99, 87, 120, 82, 76, 84, 78, 10, 2, 80, 120, 78, 101, 88, + 121, 65, 78, 72, 91, 9, 68, 69, 95, 85, 85, 81, 71, 66, 70, 86, + 67, 5, 66, 6, 70, 70, 5, 73, 20, 68, 1, 14, 17, 23, 23, 12, + 77, 76, 80, 10, 67, 89, 67, 69, 74, 78, 79, 3, 35, 4, 88, 76, + 66, 72, 29, 37, 93, 65, 67, 78, 64, 1, 67, 77, 33, 56, 41, 70, + 69, 119, 23, 62, 62, 122, 24, 21, 28, 32, 31, 25, 20, 23, 29, 18, + 25, 16, 8, 5, 2, 18, 17, 19, 14, 7, 11, 24, 13, 2, 14, 16, + 69, 27, 64, 62, 62, 61, 49, 58, 62, 62, 62, 43, 33, 28, 26, 42, + 38, 77, 26, 29, 14, 44, 25, 25, 32, 38, 38, 46, 42, 17, 93, 5, + 4, 21, 122, 110, 77, 3, 10, 4, 82, 86, 91, 89, 121, 96, 9, 64, + 68, 75, 81, 85, 99, 95, 120, 80, 17, 4, 64, 68, 72, 77, 81, 89, + 104, 78, 64, 72, 6, 5, 75, 81, 80, 96, 78, 27, 4, 7, 5, 70, + 74, 83, 81, 85, 62, 122, 122, 115, 121, 110, 112, 113, 112, 108, 112, 114, + 117, 92, 99, 97, 103, 117, 91, 3, 85, 91, 88, 76, 71, 72, 64, 79, + 64, 4, 67, 68, 65, 68, 77, 64, 68, 77, 7, 4, 65, 7, 17, 3, + 70, 75, 82, 72, 64, 67, 6, 7, 69, 65, 72, 9, 8, 74, 79, 76, + 76, 17, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, + 46, 62, 62, 62, 62, 62, 59, 43, 56, 55, 41, 22, 15, 7, 79, 62, + 62, 62, 56, 42, 43, 34, 35, 42, 39, 30, 16, 29, 11, 65, 29, 34, + 33, 36, 42, 29, 29, 18, 17, 0, 9, 1, 78, 120, 7, 3, 65, 27, + 10, 64, 77, 72, 70, 70, 71, 68, 3, 69, 71, 69, 64, 12, 7, 13, + 65, 62, 62, 62, 58, 50, 46, 34, 20, 67, 69, 28, 21, 17, 9, 7, + 2, 65, 66, 77, 77, 72, 70, 70, 71, 68, 3, 69, 71, 69, 64, 12, + 7, 13, 65, 62, 62, 62, 58, 50, 46, 34, 20, 67, 75, 65, 4, 65, + 65, 99, 99}, + + { + + 62, 9, 74, 62, 9, 74, 120, 98, 12, 10, 11, 40, 57, 60, 15, 2, + 92, 41, 84, 96, 5, 5, 68, 3, 18, 48, 56, 50, 12, 100, 119, 111, + 73, 84, 96, 5, 91, 84, 21, 7, 86, 99, 110, 2, 0, 85, 97, 0, + 83, 102, 64, 76, 89, 100, 8, 78, 74, 94, 15, 3, 22, 0, 0, 0, + 80, 87, 97, 70, 19, 1, 28, 85, 122, 118, 86, 91, 77, 79, 86, 86, + 119, 72, 98, 86, 117, 82, 77, 84, 79, 10, 1, 79, 117, 77, 101, 88, + 119, 65, 78, 72, 91, 9, 68, 69, 94, 85, 85, 80, 71, 66, 70, 85, + 66, 5, 67, 5, 70, 70, 5, 73, 20, 68, 1, 14, 17, 23, 23, 12, + 78, 75, 80, 9, 67, 88, 67, 68, 73, 78, 77, 5, 36, 6, 86, 77, + 67, 72, 30, 37, 94, 65, 67, 79, 0, 1, 67, 76, 33, 56, 41, 68, + 70, 118, 22, 62, 62, 121, 23, 21, 28, 32, 30, 25, 20, 23, 28, 17, + 24, 15, 8, 5, 2, 17, 17, 18, 14, 6, 10, 23, 12, 1, 13, 15, + 69, 25, 65, 62, 62, 59, 47, 57, 62, 62, 62, 42, 31, 25, 24, 40, + 36, 78, 24, 28, 13, 43, 24, 24, 30, 36, 36, 44, 41, 15, 93, 4, + 3, 19, 121, 109, 76, 4, 10, 4, 81, 85, 90, 89, 119, 94, 10, 64, + 68, 74, 79, 84, 98, 94, 117, 79, 17, 4, 64, 68, 71, 76, 80, 89, + 103, 78, 0, 71, 6, 5, 74, 80, 80, 95, 77, 27, 5, 7, 5, 69, + 73, 82, 80, 84, 62, 121, 120, 113, 120, 109, 111, 111, 110, 107, 111, 112, + 114, 91, 98, 96, 102, 114, 90, 2, 84, 90, 88, 76, 71, 72, 65, 79, + 65, 3, 67, 68, 64, 68, 77, 64, 68, 76, 7, 3, 65, 6, 16, 2, + 70, 75, 81, 73, 65, 67, 6, 6, 69, 65, 72, 8, 8, 74, 79, 76, + 76, 15, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, + 44, 62, 62, 62, 62, 62, 57, 41, 54, 53, 39, 20, 14, 6, 79, 62, + 62, 62, 54, 40, 41, 32, 33, 40, 37, 28, 14, 26, 10, 67, 28, 33, + 30, 34, 41, 27, 27, 17, 16, 64, 8, 0, 78, 119, 5, 2, 67, 25, + 9, 65, 77, 72, 69, 69, 70, 68, 3, 68, 70, 68, 0, 12, 8, 14, + 65, 62, 62, 60, 56, 48, 44, 31, 18, 69, 68, 28, 21, 17, 10, 7, + 2, 64, 66, 76, 77, 72, 69, 69, 70, 68, 3, 68, 70, 68, 0, 12, + 8, 14, 65, 62, 62, 60, 56, 48, 44, 31, 18, 69, 75, 65, 4, 65, + 65, 98, 97}, + + { + + 62, 9, 74, 62, 9, 74, 118, 96, 12, 10, 10, 38, 56, 59, 16, 2, + 90, 39, 83, 94, 5, 5, 67, 2, 17, 47, 54, 47, 10, 100, 117, 110, + 73, 83, 94, 5, 91, 83, 21, 7, 85, 98, 109, 1, 64, 85, 97, 0, + 83, 101, 64, 76, 89, 100, 8, 77, 74, 93, 16, 3, 22, 0, 0, 0, + 80, 87, 97, 69, 18, 1, 27, 85, 120, 115, 85, 90, 76, 78, 85, 85, + 117, 71, 97, 85, 115, 83, 77, 84, 79, 10, 1, 78, 115, 77, 100, 87, + 117, 65, 78, 72, 90, 9, 68, 68, 93, 84, 84, 80, 71, 65, 69, 84, + 66, 5, 67, 5, 69, 70, 5, 73, 21, 68, 1, 15, 18, 23, 23, 12, + 78, 75, 79, 9, 67, 88, 67, 67, 73, 77, 76, 6, 37, 7, 84, 77, + 68, 71, 30, 37, 95, 65, 66, 79, 1, 1, 67, 74, 33, 56, 41, 67, + 71, 116, 21, 62, 62, 120, 23, 21, 27, 31, 30, 25, 19, 23, 28, 16, + 23, 15, 8, 5, 2, 17, 16, 17, 13, 6, 10, 22, 12, 0, 12, 15, + 69, 24, 65, 62, 62, 58, 46, 55, 62, 62, 62, 41, 29, 23, 23, 38, + 34, 79, 23, 27, 11, 42, 23, 23, 29, 35, 34, 42, 39, 14, 93, 3, + 2, 17, 119, 107, 75, 4, 10, 4, 80, 84, 89, 88, 117, 93, 11, 0, + 67, 73, 78, 83, 96, 93, 115, 78, 18, 5, 0, 67, 70, 75, 80, 88, + 102, 77, 1, 70, 6, 5, 73, 80, 79, 94, 76, 27, 6, 7, 5, 68, + 72, 81, 80, 83, 62, 120, 119, 112, 118, 108, 109, 110, 108, 105, 109, 110, + 112, 90, 97, 95, 101, 111, 90, 1, 84, 89, 87, 76, 71, 72, 65, 78, + 65, 2, 67, 68, 0, 68, 77, 64, 68, 76, 6, 3, 65, 5, 15, 2, + 70, 75, 80, 73, 65, 66, 5, 6, 69, 65, 72, 8, 7, 74, 78, 76, + 76, 14, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, + 42, 62, 62, 62, 62, 62, 55, 40, 52, 50, 37, 19, 13, 5, 79, 62, + 62, 62, 52, 38, 39, 31, 31, 38, 35, 26, 12, 24, 8, 68, 27, 32, + 28, 33, 39, 26, 25, 16, 15, 65, 7, 64, 78, 118, 4, 1, 68, 24, + 8, 66, 77, 71, 69, 68, 69, 67, 4, 68, 69, 67, 1, 13, 9, 14, + 64, 62, 62, 58, 54, 46, 42, 29, 16, 70, 68, 29, 22, 18, 11, 8, + 3, 64, 66, 75, 77, 71, 69, 68, 69, 67, 4, 68, 69, 67, 1, 13, + 9, 14, 64, 62, 62, 58, 54, 46, 42, 29, 16, 70, 75, 65, 4, 65, + 65, 96, 95}, + + { + + 62, 9, 75, 62, 9, 75, 116, 95, 13, 10, 10, 37, 54, 58, 16, 3, + 88, 38, 83, 93, 5, 4, 66, 1, 16, 46, 53, 43, 8, 101, 115, 108, + 73, 83, 93, 5, 90, 82, 22, 7, 84, 97, 108, 64, 65, 85, 98, 0, + 83, 101, 64, 77, 88, 100, 7, 77, 74, 92, 16, 3, 22, 0, 0, 0, + 79, 87, 97, 69, 18, 0, 27, 84, 119, 113, 84, 89, 74, 76, 84, 84, + 115, 70, 96, 85, 113, 84, 77, 84, 79, 11, 1, 77, 113, 77, 99, 86, + 115, 65, 78, 72, 90, 10, 69, 68, 93, 84, 83, 80, 70, 65, 69, 83, + 65, 5, 67, 5, 69, 70, 5, 73, 21, 68, 1, 15, 18, 24, 24, 13, + 79, 74, 78, 8, 67, 88, 67, 66, 73, 77, 75, 7, 37, 9, 83, 78, + 69, 71, 30, 37, 95, 66, 66, 80, 1, 0, 66, 73, 33, 56, 42, 66, + 72, 115, 20, 62, 62, 118, 23, 21, 27, 30, 29, 24, 19, 22, 27, 16, + 23, 15, 7, 5, 1, 16, 15, 16, 13, 6, 10, 22, 11, 65, 12, 14, + 69, 23, 66, 62, 62, 56, 44, 53, 62, 62, 62, 39, 27, 21, 21, 36, + 32, 81, 22, 25, 9, 40, 22, 22, 28, 33, 32, 40, 37, 12, 93, 2, + 1, 16, 118, 106, 73, 5, 10, 4, 79, 84, 89, 87, 116, 92, 12, 1, + 66, 73, 77, 82, 95, 92, 113, 78, 18, 5, 0, 67, 69, 75, 79, 87, + 101, 77, 3, 69, 6, 5, 73, 79, 79, 94, 76, 28, 6, 8, 5, 67, + 72, 81, 79, 83, 62, 118, 117, 110, 116, 106, 108, 108, 107, 104, 107, 108, + 110, 90, 96, 95, 101, 108, 90, 0, 84, 89, 86, 76, 71, 72, 65, 78, + 65, 1, 67, 68, 0, 68, 77, 64, 68, 76, 6, 3, 65, 4, 14, 2, + 70, 75, 79, 74, 65, 65, 4, 6, 69, 65, 71, 8, 7, 73, 77, 76, + 76, 12, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, + 40, 62, 62, 62, 62, 62, 52, 38, 50, 48, 35, 18, 12, 4, 79, 62, + 62, 62, 50, 36, 38, 29, 29, 36, 32, 24, 10, 22, 6, 69, 26, 30, + 26, 31, 37, 24, 23, 14, 13, 66, 6, 65, 79, 117, 3, 64, 70, 23, + 6, 67, 76, 71, 68, 68, 68, 66, 5, 68, 68, 66, 2, 13, 10, 15, + 0, 62, 62, 56, 52, 44, 40, 27, 14, 71, 67, 29, 22, 19, 11, 9, + 3, 0, 65, 75, 76, 71, 68, 68, 68, 66, 5, 68, 68, 66, 2, 13, + 10, 15, 0, 62, 62, 56, 52, 44, 40, 27, 14, 71, 75, 65, 4, 64, + 64, 95, 92}, + + { + + 62, 9, 75, 62, 9, 75, 114, 93, 13, 10, 9, 35, 53, 57, 17, 3, + 87, 36, 83, 91, 4, 4, 65, 0, 15, 45, 51, 40, 5, 102, 113, 107, + 73, 83, 91, 4, 90, 81, 22, 7, 84, 96, 106, 65, 66, 85, 98, 0, + 82, 100, 65, 77, 88, 100, 7, 77, 74, 91, 17, 3, 22, 0, 0, 0, + 79, 87, 97, 68, 17, 0, 26, 84, 118, 111, 83, 88, 73, 75, 83, 83, + 113, 69, 95, 84, 110, 84, 78, 84, 80, 11, 1, 76, 110, 76, 99, 86, + 113, 65, 78, 72, 90, 10, 69, 67, 92, 84, 82, 79, 70, 65, 68, 82, + 65, 5, 68, 5, 69, 70, 5, 73, 21, 68, 1, 16, 18, 24, 24, 13, + 79, 74, 78, 8, 67, 87, 67, 65, 72, 76, 73, 9, 38, 10, 81, 78, + 70, 70, 31, 37, 96, 66, 65, 80, 2, 0, 66, 72, 33, 56, 42, 64, + 73, 113, 19, 62, 62, 117, 23, 21, 26, 30, 29, 24, 18, 22, 27, 15, + 22, 15, 7, 5, 1, 16, 15, 15, 12, 6, 10, 21, 11, 66, 11, 13, + 69, 22, 66, 62, 62, 54, 43, 52, 62, 62, 62, 38, 25, 19, 20, 34, + 30, 82, 21, 24, 8, 39, 21, 21, 26, 32, 30, 38, 36, 11, 93, 1, + 0, 14, 116, 104, 72, 5, 10, 4, 78, 83, 88, 87, 114, 90, 13, 2, + 66, 72, 75, 81, 93, 91, 110, 77, 19, 6, 1, 66, 68, 74, 79, 86, + 100, 77, 4, 68, 6, 5, 72, 79, 79, 93, 75, 28, 7, 8, 5, 66, + 71, 80, 79, 82, 62, 117, 116, 109, 115, 105, 106, 107, 105, 102, 105, 106, + 107, 89, 95, 94, 100, 105, 89, 64, 83, 88, 85, 76, 71, 72, 65, 77, + 66, 0, 67, 68, 1, 68, 77, 64, 68, 75, 5, 2, 65, 3, 13, 1, + 70, 75, 78, 75, 66, 64, 4, 5, 69, 65, 71, 7, 6, 73, 77, 76, + 76, 11, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, + 38, 62, 62, 62, 62, 62, 50, 36, 48, 45, 33, 17, 11, 3, 79, 62, + 61, 62, 48, 34, 36, 28, 27, 34, 30, 22, 8, 20, 5, 71, 25, 29, + 24, 30, 36, 23, 21, 13, 12, 67, 5, 66, 79, 116, 1, 65, 71, 21, + 5, 68, 76, 70, 68, 67, 67, 65, 5, 67, 67, 65, 3, 14, 11, 15, + 0, 62, 60, 54, 50, 42, 38, 24, 12, 72, 67, 30, 23, 19, 12, 10, + 4, 0, 65, 74, 76, 70, 68, 67, 67, 65, 5, 67, 67, 65, 3, 14, + 11, 15, 0, 62, 60, 54, 50, 42, 38, 24, 12, 72, 75, 65, 4, 64, + 64, 93, 90}, + + { + + 62, 8, 75, 62, 8, 75, 113, 92, 13, 10, 9, 33, 51, 56, 17, 3, + 85, 34, 83, 90, 4, 3, 64, 64, 13, 43, 49, 36, 3, 103, 111, 106, + 73, 83, 90, 4, 90, 81, 22, 6, 83, 95, 105, 66, 68, 86, 99, 0, + 82, 99, 65, 78, 88, 100, 7, 77, 74, 91, 17, 3, 22, 0, 0, 0, + 78, 88, 97, 68, 16, 0, 26, 83, 117, 109, 82, 88, 71, 73, 83, 82, + 111, 69, 94, 83, 108, 85, 78, 85, 80, 11, 0, 76, 108, 76, 98, 85, + 112, 65, 78, 72, 90, 10, 69, 67, 91, 84, 82, 79, 70, 65, 68, 81, + 64, 5, 68, 4, 69, 70, 4, 73, 21, 68, 1, 16, 18, 24, 24, 13, + 80, 73, 77, 7, 67, 87, 67, 64, 72, 76, 72, 10, 39, 12, 79, 79, + 71, 70, 31, 37, 97, 66, 65, 81, 2, 0, 66, 71, 33, 56, 42, 0, + 74, 112, 18, 59, 62, 116, 22, 21, 26, 29, 28, 23, 18, 22, 26, 14, + 21, 14, 7, 4, 0, 15, 14, 14, 12, 5, 9, 20, 10, 67, 10, 12, + 69, 20, 67, 62, 62, 52, 41, 50, 60, 62, 62, 37, 23, 16, 18, 31, + 28, 84, 19, 23, 6, 38, 20, 20, 25, 30, 28, 36, 34, 9, 93, 0, + 64, 12, 115, 103, 71, 6, 10, 4, 78, 82, 87, 86, 112, 89, 13, 2, + 65, 72, 74, 80, 92, 90, 108, 77, 19, 6, 1, 66, 68, 74, 78, 86, + 99, 77, 5, 67, 6, 5, 71, 78, 79, 92, 74, 28, 8, 8, 5, 65, + 71, 79, 78, 82, 62, 116, 114, 107, 113, 104, 105, 105, 104, 101, 104, 104, + 105, 89, 94, 94, 99, 102, 89, 65, 83, 87, 85, 76, 71, 72, 66, 77, + 66, 64, 67, 68, 1, 68, 77, 65, 68, 75, 5, 2, 66, 2, 12, 1, + 71, 75, 77, 76, 66, 64, 3, 5, 69, 66, 71, 7, 6, 73, 76, 76, + 76, 9, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 61, + 36, 62, 62, 62, 62, 61, 48, 34, 45, 43, 31, 15, 9, 2, 79, 61, + 59, 62, 46, 31, 34, 26, 24, 32, 28, 20, 6, 17, 3, 72, 23, 28, + 21, 28, 34, 21, 19, 11, 10, 68, 4, 67, 79, 115, 0, 67, 73, 20, + 4, 69, 76, 70, 67, 67, 66, 65, 6, 67, 66, 65, 4, 14, 11, 16, + 1, 61, 58, 52, 48, 40, 36, 22, 10, 74, 66, 30, 23, 20, 12, 10, + 4, 1, 65, 74, 76, 70, 67, 67, 66, 65, 6, 67, 66, 65, 4, 14, + 11, 16, 1, 61, 58, 52, 48, 40, 36, 22, 10, 74, 75, 66, 3, 64, + 64, 92, 88}, + + { + + 62, 8, 75, 62, 8, 75, 111, 91, 14, 10, 9, 31, 49, 56, 18, 3, + 83, 33, 82, 88, 4, 3, 0, 64, 12, 42, 47, 33, 1, 103, 109, 104, + 72, 82, 88, 4, 89, 80, 23, 6, 82, 94, 104, 67, 69, 86, 99, 0, + 82, 98, 65, 78, 88, 100, 7, 76, 73, 90, 17, 3, 22, 0, 0, 0, + 77, 88, 97, 68, 15, 0, 26, 82, 115, 106, 81, 87, 69, 71, 82, 81, + 109, 68, 92, 82, 106, 86, 78, 85, 80, 12, 0, 75, 106, 76, 97, 84, + 110, 65, 77, 72, 89, 11, 69, 66, 90, 83, 81, 79, 70, 64, 67, 80, + 0, 5, 68, 4, 68, 69, 4, 73, 22, 68, 1, 16, 19, 25, 24, 14, + 80, 72, 76, 6, 67, 87, 67, 0, 72, 75, 71, 11, 40, 14, 77, 80, + 72, 69, 31, 38, 98, 66, 65, 81, 3, 0, 66, 69, 33, 56, 42, 1, + 75, 111, 17, 57, 62, 114, 22, 21, 26, 28, 28, 23, 18, 22, 26, 13, + 20, 14, 7, 4, 0, 15, 13, 14, 12, 5, 9, 19, 9, 68, 10, 12, + 69, 19, 67, 62, 62, 51, 40, 48, 58, 62, 62, 36, 21, 14, 17, 29, + 27, 85, 18, 22, 4, 37, 19, 19, 24, 28, 27, 34, 32, 8, 93, 0, + 65, 11, 113, 101, 69, 7, 10, 4, 77, 81, 86, 85, 110, 88, 14, 3, + 64, 71, 73, 79, 91, 89, 106, 76, 20, 7, 2, 66, 67, 73, 77, 85, + 97, 76, 7, 66, 7, 5, 70, 77, 78, 91, 73, 29, 9, 9, 6, 64, + 70, 78, 77, 81, 62, 114, 112, 105, 111, 103, 104, 103, 102, 99, 102, 102, + 103, 88, 93, 93, 98, 98, 89, 66, 83, 86, 84, 75, 71, 72, 66, 77, + 66, 65, 67, 68, 2, 68, 77, 65, 68, 75, 5, 2, 66, 2, 11, 1, + 71, 74, 75, 76, 66, 0, 2, 5, 69, 66, 70, 7, 6, 72, 75, 75, + 75, 7, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 58, + 34, 62, 62, 62, 62, 58, 46, 33, 43, 41, 30, 14, 8, 1, 79, 59, + 57, 60, 44, 29, 32, 25, 22, 30, 26, 18, 4, 15, 1, 73, 22, 27, + 19, 27, 32, 20, 17, 10, 9, 69, 3, 67, 79, 114, 64, 68, 75, 19, + 3, 70, 76, 69, 66, 66, 64, 64, 7, 67, 65, 64, 5, 15, 12, 17, + 2, 60, 57, 50, 46, 38, 34, 20, 8, 75, 65, 30, 24, 21, 13, 11, + 5, 2, 64, 73, 76, 69, 66, 66, 64, 64, 7, 67, 65, 64, 5, 15, + 12, 17, 2, 60, 57, 50, 46, 38, 34, 20, 8, 75, 75, 66, 3, 0, + 0, 91, 86}, + + { + + 62, 8, 75, 62, 8, 75, 109, 89, 14, 10, 8, 29, 48, 55, 19, 3, + 82, 31, 82, 87, 3, 3, 1, 65, 11, 41, 45, 30, 65, 104, 107, 103, + 72, 82, 87, 3, 89, 79, 23, 6, 82, 93, 102, 68, 70, 86, 99, 0, + 81, 97, 66, 78, 88, 100, 7, 76, 73, 89, 18, 3, 22, 0, 0, 0, + 77, 88, 97, 67, 14, 0, 25, 82, 114, 104, 80, 86, 68, 70, 81, 80, + 107, 67, 91, 81, 103, 86, 79, 85, 81, 12, 0, 74, 103, 75, 97, 84, + 108, 65, 77, 72, 89, 11, 69, 66, 89, 83, 80, 78, 70, 64, 67, 79, + 0, 5, 69, 4, 68, 69, 4, 73, 22, 68, 1, 17, 19, 25, 24, 14, + 81, 72, 76, 6, 67, 86, 67, 1, 71, 75, 69, 13, 41, 15, 75, 80, + 73, 69, 32, 38, 99, 66, 64, 82, 4, 0, 66, 68, 33, 56, 42, 3, + 76, 109, 16, 54, 62, 113, 22, 21, 25, 28, 27, 23, 17, 22, 25, 12, + 19, 14, 7, 4, 0, 14, 13, 13, 11, 5, 9, 18, 9, 69, 9, 11, + 69, 18, 68, 60, 62, 49, 38, 47, 56, 62, 62, 35, 19, 12, 15, 27, + 25, 86, 17, 21, 3, 36, 18, 18, 22, 27, 25, 32, 31, 6, 93, 64, + 66, 9, 112, 100, 68, 7, 10, 4, 76, 80, 85, 85, 108, 86, 15, 4, + 64, 70, 71, 78, 89, 88, 103, 75, 20, 7, 2, 65, 66, 72, 77, 84, + 96, 76, 8, 65, 7, 5, 69, 77, 78, 90, 72, 29, 10, 9, 6, 0, + 69, 77, 77, 80, 62, 113, 111, 104, 110, 102, 102, 102, 100, 98, 100, 100, + 100, 87, 92, 92, 97, 95, 88, 67, 82, 85, 83, 75, 71, 72, 66, 76, + 67, 66, 67, 68, 3, 68, 77, 65, 68, 74, 4, 1, 66, 1, 10, 0, + 71, 74, 74, 77, 67, 1, 2, 4, 69, 66, 70, 6, 5, 72, 75, 75, + 75, 6, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 56, + 32, 62, 62, 62, 62, 55, 44, 31, 41, 38, 28, 13, 7, 0, 79, 57, + 54, 57, 42, 27, 30, 23, 20, 28, 24, 16, 2, 13, 0, 75, 21, 26, + 17, 25, 31, 18, 15, 9, 8, 70, 2, 68, 79, 113, 66, 69, 76, 17, + 2, 71, 76, 69, 66, 65, 0, 0, 7, 66, 64, 0, 6, 15, 13, 17, + 2, 60, 55, 48, 44, 36, 32, 17, 6, 76, 65, 31, 24, 21, 14, 12, + 5, 2, 64, 72, 76, 69, 66, 65, 0, 0, 7, 66, 64, 0, 6, 15, + 13, 17, 2, 60, 55, 48, 44, 36, 32, 17, 6, 76, 75, 66, 3, 0, + 0, 89, 84}, + + { + + 62, 8, 76, 62, 8, 76, 107, 88, 15, 10, 8, 28, 46, 54, 19, 4, + 80, 30, 82, 85, 3, 2, 2, 66, 10, 40, 44, 26, 67, 105, 105, 101, + 72, 82, 85, 3, 88, 78, 24, 6, 81, 92, 101, 70, 71, 86, 100, 0, + 81, 97, 66, 79, 87, 100, 6, 76, 73, 88, 18, 3, 22, 0, 0, 0, + 76, 88, 97, 67, 14, 64, 25, 81, 113, 102, 79, 85, 66, 68, 80, 79, + 105, 66, 90, 81, 101, 87, 79, 85, 81, 13, 0, 73, 101, 75, 96, 83, + 106, 65, 77, 72, 89, 12, 70, 65, 89, 83, 79, 78, 69, 64, 66, 78, + 1, 5, 69, 4, 68, 69, 4, 73, 22, 68, 1, 17, 19, 26, 25, 15, + 81, 71, 75, 5, 67, 86, 67, 2, 71, 74, 68, 14, 41, 17, 74, 81, + 74, 68, 32, 38, 99, 67, 64, 82, 4, 64, 65, 67, 33, 56, 43, 4, + 77, 108, 15, 51, 62, 111, 22, 21, 25, 27, 27, 22, 17, 21, 25, 12, + 19, 14, 6, 4, 64, 14, 12, 12, 11, 5, 9, 18, 8, 71, 9, 10, + 69, 17, 68, 57, 62, 47, 37, 45, 54, 62, 61, 33, 17, 10, 14, 25, + 23, 88, 16, 19, 1, 34, 17, 17, 21, 25, 23, 30, 29, 5, 93, 65, + 67, 8, 110, 98, 66, 8, 10, 4, 75, 80, 85, 84, 107, 85, 16, 5, + 0, 70, 70, 77, 88, 87, 101, 75, 21, 8, 3, 65, 65, 72, 76, 83, + 95, 76, 10, 64, 7, 5, 69, 76, 78, 90, 72, 30, 10, 10, 6, 1, + 69, 77, 76, 80, 62, 111, 109, 102, 108, 100, 101, 100, 99, 96, 98, 98, + 98, 87, 91, 92, 97, 92, 88, 68, 82, 85, 82, 75, 71, 72, 66, 76, + 67, 67, 67, 68, 3, 68, 77, 65, 68, 74, 4, 1, 66, 0, 9, 0, + 71, 74, 73, 78, 67, 2, 1, 4, 69, 66, 69, 6, 5, 71, 74, 75, + 75, 4, 62, 61, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 53, + 30, 62, 62, 62, 62, 53, 41, 29, 39, 36, 26, 12, 6, 64, 79, 55, + 52, 55, 40, 25, 29, 22, 18, 26, 21, 14, 0, 11, 65, 76, 20, 24, + 15, 24, 29, 17, 13, 7, 6, 71, 1, 69, 80, 112, 67, 71, 78, 16, + 0, 72, 75, 68, 65, 65, 1, 1, 8, 66, 0, 1, 7, 16, 14, 18, + 3, 59, 53, 46, 42, 34, 30, 15, 4, 77, 64, 31, 25, 22, 14, 13, + 6, 3, 0, 72, 75, 68, 65, 65, 1, 1, 8, 66, 0, 1, 7, 16, + 14, 18, 3, 59, 53, 46, 42, 34, 30, 15, 4, 77, 75, 66, 3, 1, + 1, 88, 81}, + + { + + 62, 8, 76, 62, 8, 76, 106, 86, 15, 10, 7, 26, 45, 53, 20, 4, 78, + 28, 82, 84, 3, 2, 3, 67, 8, 38, 42, 23, 69, 106, 103, 100, 72, 82, + 84, 3, 88, 77, 24, 5, 80, 91, 100, 71, 73, 87, 100, 0, 81, 96, 66, + 79, 87, 100, 6, 76, 73, 88, 19, 3, 22, 0, 0, 0, 76, 89, 97, 66, + 13, 64, 24, 81, 112, 100, 78, 84, 65, 67, 80, 78, 103, 65, 89, 80, 99, + 88, 79, 85, 81, 13, 64, 72, 99, 75, 95, 82, 104, 65, 77, 72, 89, 12, + 70, 65, 88, 83, 79, 78, 69, 64, 66, 77, 1, 5, 69, 3, 68, 69, 4, + 73, 22, 68, 1, 18, 19, 26, 25, 15, 82, 71, 74, 5, 67, 86, 67, 3, + 71, 74, 67, 15, 42, 18, 72, 81, 75, 68, 32, 38, 100, 67, 0, 83, 5, + 64, 65, 66, 33, 56, 43, 5, 78, 106, 14, 48, 60, 110, 21, 21, 24, 26, + 26, 22, 16, 21, 24, 11, 18, 13, 6, 4, 64, 13, 11, 11, 10, 4, 8, + 17, 8, 72, 8, 9, 69, 15, 69, 55, 62, 45, 35, 43, 52, 62, 58, 32, + 15, 7, 12, 23, 21, 89, 14, 18, 64, 33, 16, 16, 20, 24, 21, 28, 27, + 3, 93, 66, 68, 6, 109, 97, 65, 8, 10, 4, 74, 79, 84, 83, 105, 84, + 17, 5, 1, 69, 69, 76, 86, 86, 99, 74, 21, 8, 3, 64, 64, 71, 76, + 83, 94, 76, 11, 0, 7, 5, 68, 76, 78, 89, 71, 30, 11, 10, 6, 2, + 68, 76, 76, 79, 62, 110, 108, 101, 106, 99, 99, 99, 97, 95, 97, 96, 96, + 86, 90, 91, 96, 89, 88, 69, 82, 84, 82, 75, 71, 72, 67, 75, 67, 68, + 67, 68, 4, 68, 77, 65, 68, 74, 3, 1, 66, 64, 8, 0, 71, 74, 72, + 79, 67, 2, 0, 4, 69, 66, 69, 6, 4, 71, 73, 75, 75, 3, 62, 60, + 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 50, 28, 62, 62, 62, 62, + 50, 39, 27, 37, 33, 24, 10, 5, 65, 79, 52, 50, 53, 38, 23, 27, 20, + 16, 24, 19, 12, 65, 8, 67, 77, 19, 23, 12, 22, 27, 15, 11, 6, 5, + 72, 0, 70, 80, 111, 68, 72, 79, 15, 64, 73, 75, 68, 65, 64, 2, 1, + 9, 66, 1, 2, 8, 16, 15, 18, 4, 59, 51, 44, 40, 32, 28, 13, 2, + 79, 64, 32, 25, 23, 15, 13, 6, 3, 0, 71, 75, 68, 65, 64, 2, 1, + 9, 66, 1, 2, 8, 16, 15, 18, 4, 59, 51, 44, 40, 32, 28, 13, 2, + 79, 75, 66, 3, 1, 1, 86, 79}, + + { + + 62, 8, 76, 62, 8, 76, 104, 85, 15, 10, 7, 24, 43, 52, 21, 4, 77, + 26, 81, 82, 2, 2, 4, 68, 7, 37, 40, 20, 72, 106, 101, 99, 72, 81, + 82, 2, 88, 76, 24, 5, 80, 90, 98, 72, 74, 87, 100, 0, 80, 95, 67, + 79, 87, 100, 6, 75, 73, 87, 19, 3, 22, 0, 0, 0, 75, 89, 97, 66, + 12, 64, 24, 80, 110, 97, 77, 83, 0, 65, 79, 77, 101, 64, 88, 79, 96, + 88, 80, 85, 82, 13, 64, 71, 96, 74, 95, 82, 102, 65, 77, 72, 88, 12, + 70, 64, 87, 82, 78, 77, 69, 0, 65, 76, 2, 5, 70, 3, 67, 69, 4, + 73, 23, 68, 1, 18, 20, 26, 25, 15, 82, 70, 74, 4, 67, 85, 67, 4, + 70, 73, 65, 17, 43, 20, 70, 82, 76, 67, 33, 38, 101, 67, 0, 83, 6, + 64, 65, 64, 33, 56, 43, 7, 79, 105, 13, 46, 57, 109, 21, 21, 24, 26, + 26, 22, 16, 21, 24, 10, 17, 13, 6, 4, 64, 13, 11, 10, 10, 4, 8, + 16, 7, 73, 7, 9, 69, 14, 69, 53, 62, 44, 34, 42, 50, 62, 56, 31, + 13, 5, 11, 21, 19, 90, 13, 17, 65, 32, 15, 15, 18, 22, 19, 26, 26, + 2, 93, 67, 69, 4, 107, 95, 64, 9, 10, 4, 73, 78, 83, 83, 103, 82, + 18, 6, 1, 68, 67, 75, 85, 85, 96, 73, 22, 9, 4, 64, 0, 70, 75, + 82, 93, 75, 12, 1, 7, 5, 67, 75, 77, 88, 70, 30, 12, 10, 6, 3, + 67, 75, 75, 78, 62, 109, 106, 99, 105, 98, 98, 97, 95, 93, 95, 94, 93, + 85, 89, 90, 95, 86, 87, 70, 81, 83, 81, 75, 71, 72, 67, 75, 68, 69, + 67, 68, 5, 68, 77, 65, 68, 73, 3, 0, 66, 65, 7, 64, 71, 74, 71, + 79, 68, 3, 0, 3, 69, 66, 69, 5, 4, 71, 73, 75, 75, 1, 62, 59, + 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 60, 48, 26, 62, 62, 62, 62, + 47, 37, 26, 35, 31, 22, 9, 4, 66, 79, 50, 47, 50, 36, 21, 25, 19, + 14, 22, 17, 10, 67, 6, 68, 79, 18, 22, 10, 21, 26, 14, 9, 5, 4, + 73, 64, 71, 80, 110, 70, 73, 81, 13, 65, 74, 75, 67, 64, 0, 3, 2, + 9, 65, 2, 3, 9, 17, 16, 19, 4, 58, 49, 42, 38, 30, 26, 10, 0, + 80, 0, 32, 26, 23, 16, 14, 7, 4, 0, 70, 75, 67, 64, 0, 3, 2, + 9, 65, 2, 3, 9, 17, 16, 19, 4, 58, 49, 42, 38, 30, 26, 10, 0, + 80, 75, 66, 3, 1, 1, 85, 77}, + + { + + 61, 8, 76, 61, 8, 76, 102, 83, 16, 10, 6, 22, 42, 51, 21, 4, 75, + 25, 81, 81, 2, 1, 5, 69, 6, 36, 38, 16, 74, 107, 99, 97, 72, 81, + 81, 2, 87, 75, 25, 5, 79, 89, 97, 73, 75, 87, 101, 0, 80, 94, 67, + 80, 87, 100, 6, 75, 73, 86, 20, 3, 22, 0, 0, 0, 75, 89, 97, 65, + 11, 64, 23, 80, 109, 95, 76, 82, 1, 64, 78, 76, 99, 0, 87, 78, 94, + 89, 80, 85, 82, 14, 64, 70, 94, 74, 94, 81, 100, 65, 77, 72, 88, 13, + 70, 64, 86, 82, 77, 77, 69, 0, 65, 75, 2, 5, 70, 3, 67, 69, 4, + 73, 23, 68, 1, 19, 20, 27, 25, 16, 83, 70, 73, 4, 67, 85, 67, 5, + 70, 73, 64, 18, 44, 21, 68, 82, 77, 67, 33, 38, 102, 67, 1, 84, 6, + 64, 65, 0, 33, 56, 43, 8, 80, 103, 12, 43, 54, 107, 21, 21, 23, 25, + 25, 21, 15, 21, 23, 9, 16, 13, 6, 4, 65, 12, 10, 9, 9, 4, 8, + 15, 7, 74, 7, 8, 69, 13, 70, 51, 60, 42, 32, 40, 48, 62, 53, 30, + 11, 3, 9, 19, 17, 92, 12, 16, 67, 31, 14, 14, 17, 21, 17, 24, 24, + 0, 93, 68, 70, 3, 106, 94, 1, 9, 10, 4, 72, 77, 82, 82, 101, 81, + 19, 7, 2, 68, 66, 74, 83, 84, 94, 73, 22, 9, 4, 0, 1, 70, 75, + 81, 92, 75, 14, 2, 7, 5, 66, 75, 77, 87, 69, 31, 13, 11, 6, 4, + 67, 74, 75, 78, 62, 107, 105, 98, 103, 97, 96, 96, 94, 92, 93, 92, 91, + 85, 88, 90, 94, 83, 87, 71, 81, 82, 80, 75, 71, 72, 67, 74, 68, 70, + 67, 68, 5, 68, 77, 65, 68, 73, 2, 0, 66, 66, 6, 64, 71, 74, 70, + 80, 68, 4, 64, 3, 69, 66, 68, 5, 3, 70, 72, 75, 75, 0, 62, 58, + 61, 61, 61, 62, 62, 62, 61, 62, 62, 62, 57, 45, 24, 62, 60, 59, 60, + 44, 35, 24, 33, 28, 20, 8, 3, 67, 79, 48, 45, 48, 34, 19, 23, 17, + 12, 20, 15, 8, 69, 4, 70, 80, 17, 21, 8, 19, 24, 12, 7, 3, 2, + 74, 65, 72, 80, 109, 71, 75, 82, 12, 66, 75, 75, 67, 64, 0, 4, 3, + 10, 65, 3, 4, 10, 17, 17, 19, 5, 58, 47, 40, 36, 28, 24, 8, 65, + 81, 0, 33, 26, 24, 16, 15, 7, 4, 1, 70, 75, 67, 64, 0, 4, 3, + 10, 65, 3, 4, 10, 17, 17, 19, 5, 58, 47, 40, 36, 28, 24, 8, 65, + 81, 75, 66, 3, 2, 2, 83, 75}, + + { + + 60, 8, 76, 60, 8, 76, 100, 82, 16, 10, 6, 20, 40, 50, 22, 4, 73, 23, + 81, 79, 2, 1, 6, 70, 5, 35, 36, 13, 76, 108, 97, 96, 72, 81, 79, 2, + 87, 74, 25, 5, 78, 88, 96, 74, 76, 87, 101, 0, 80, 93, 67, 80, 87, 100, + 6, 75, 73, 85, 20, 3, 22, 0, 0, 0, 74, 89, 97, 65, 10, 64, 23, 79, + 108, 93, 75, 81, 3, 1, 77, 75, 97, 1, 86, 77, 92, 90, 80, 85, 82, 14, + 64, 69, 92, 74, 93, 80, 98, 65, 77, 72, 88, 13, 70, 0, 85, 82, 76, 77, + 69, 0, 64, 74, 3, 5, 70, 3, 67, 69, 4, 73, 23, 68, 1, 19, 20, 27, + 25, 16, 83, 69, 72, 3, 67, 85, 67, 6, 70, 72, 0, 19, 45, 23, 66, 83, + 78, 66, 33, 38, 103, 67, 1, 84, 7, 64, 65, 1, 33, 56, 43, 9, 81, 102, + 11, 40, 51, 106, 21, 21, 23, 24, 25, 21, 15, 21, 23, 8, 15, 13, 6, 4, + 65, 12, 9, 8, 9, 4, 8, 14, 6, 75, 6, 7, 69, 12, 70, 49, 58, 40, + 31, 38, 46, 59, 51, 29, 9, 1, 8, 17, 15, 93, 11, 15, 69, 30, 13, 13, + 16, 19, 15, 22, 22, 64, 93, 69, 71, 1, 104, 92, 2, 10, 10, 4, 71, 76, + 81, 81, 99, 80, 20, 8, 3, 67, 65, 73, 82, 83, 92, 72, 23, 10, 5, 0, + 2, 69, 74, 80, 91, 75, 15, 3, 7, 5, 65, 74, 77, 86, 68, 31, 14, 11, + 6, 5, 66, 73, 74, 77, 62, 106, 103, 96, 101, 96, 95, 94, 92, 90, 91, 90, + 89, 84, 87, 89, 93, 80, 87, 72, 81, 81, 79, 75, 71, 72, 67, 74, 68, 71, + 67, 68, 6, 68, 77, 65, 68, 73, 2, 0, 66, 67, 5, 64, 71, 74, 69, 81, + 68, 5, 65, 3, 69, 66, 68, 5, 3, 70, 71, 75, 75, 65, 61, 57, 60, 59, + 59, 62, 62, 62, 59, 60, 62, 61, 54, 42, 22, 61, 57, 55, 55, 41, 33, 22, + 31, 26, 18, 7, 2, 68, 79, 46, 43, 46, 32, 17, 21, 16, 10, 18, 13, 6, + 71, 2, 72, 81, 16, 20, 6, 18, 22, 11, 5, 2, 1, 75, 66, 73, 80, 108, + 72, 76, 84, 11, 67, 76, 75, 66, 0, 1, 5, 4, 11, 65, 4, 5, 11, 18, + 18, 20, 6, 57, 45, 38, 34, 26, 22, 6, 67, 82, 1, 33, 27, 25, 17, 16, + 8, 5, 1, 69, 75, 66, 0, 1, 5, 4, 11, 65, 4, 5, 11, 18, 18, 20, + 6, 57, 45, 38, 34, 26, 22, 6, 67, 82, 75, 66, 3, 2, 2, 82, 73}, + + { + + 58, 7, 77, 58, 7, 77, 99, 81, 16, 10, 5, 18, 38, 49, 22, 4, 72, 21, + 81, 78, 1, 0, 7, 71, 3, 33, 34, 9, 79, 109, 95, 95, 72, 81, 78, 1, + 87, 74, 25, 4, 78, 88, 95, 76, 78, 88, 102, 64, 80, 93, 68, 81, 87, 100, + 5, 75, 73, 85, 20, 2, 22, 0, 0, 0, 74, 90, 97, 65, 9, 65, 22, 79, + 107, 91, 74, 81, 4, 2, 77, 74, 96, 1, 85, 77, 90, 91, 81, 86, 83, 14, + 65, 69, 90, 74, 93, 80, 97, 65, 77, 72, 88, 13, 71, 0, 85, 82, 76, 77, + 69, 0, 64, 73, 3, 5, 71, 2, 67, 69, 3, 73, 23, 68, 1, 19, 20, 27, + 25, 16, 84, 69, 72, 2, 67, 85, 68, 6, 70, 72, 1, 20, 45, 24, 65, 84, + 80, 66, 33, 38, 104, 68, 1, 85, 7, 65, 65, 2, 33, 55, 43, 10, 82, 101, + 9, 37, 47, 105, 20, 21, 22, 23, 24, 20, 14, 20, 22, 7, 14, 12, 5, 3, + 66, 11, 8, 7, 8, 3, 7, 13, 5, 77, 5, 6, 69, 10, 71, 46, 55, 38, + 29, 36, 43, 55, 48, 27, 7, 65, 6, 14, 13, 95, 9, 13, 71, 28, 12, 12, + 14, 17, 13, 20, 20, 66, 93, 70, 72, 64, 103, 91, 3, 10, 10, 4, 71, 76, + 81, 81, 98, 79, 20, 8, 3, 67, 64, 72, 81, 83, 90, 72, 23, 10, 5, 0, + 2, 69, 74, 80, 90, 75, 16, 4, 7, 4, 65, 74, 77, 86, 68, 31, 14, 11, + 6, 6, 66, 73, 74, 77, 62, 105, 102, 95, 100, 95, 94, 93, 91, 89, 90, 89, + 87, 84, 87, 89, 93, 77, 87, 74, 81, 81, 79, 75, 71, 72, 68, 74, 69, 72, + 68, 68, 6, 69, 77, 66, 68, 73, 1, 64, 67, 68, 4, 65, 72, 74, 68, 82, + 69, 5, 66, 2, 69, 67, 68, 4, 2, 70, 71, 75, 75, 67, 59, 56, 58, 57, + 56, 62, 62, 62, 56, 57, 62, 58, 50, 39, 20, 57, 53, 51, 49, 38, 30, 20, + 28, 23, 16, 5, 0, 69, 79, 43, 40, 43, 30, 14, 19, 14, 7, 16, 10, 4, + 74, 64, 74, 83, 14, 18, 3, 16, 20, 9, 3, 0, 64, 76, 67, 74, 81, 107, + 74, 78, 86, 9, 69, 78, 75, 66, 0, 1, 6, 4, 11, 65, 5, 5, 12, 18, + 18, 20, 6, 56, 43, 36, 31, 23, 20, 3, 69, 84, 1, 33, 27, 25, 17, 16, + 8, 5, 1, 69, 75, 66, 0, 1, 6, 4, 11, 65, 5, 5, 12, 18, 18, 20, + 6, 56, 43, 36, 31, 23, 20, 3, 69, 84, 75, 67, 2, 2, 2, 81, 71}, + + { + + 57, 7, 77, 57, 7, 77, 97, 79, 17, 11, 5, 17, 37, 49, 23, 5, 70, 20, + 80, 76, 1, 0, 9, 71, 2, 32, 33, 6, 81, 109, 93, 93, 71, 80, 76, 1, + 86, 73, 26, 4, 77, 87, 93, 77, 79, 88, 102, 64, 79, 92, 68, 81, 86, 99, + 5, 74, 72, 84, 21, 2, 22, 0, 0, 0, 73, 90, 97, 64, 9, 65, 22, 78, + 105, 88, 72, 80, 6, 4, 76, 72, 94, 2, 83, 76, 87, 91, 81, 86, 83, 15, + 65, 68, 87, 73, 92, 79, 95, 65, 76, 72, 87, 14, 71, 1, 84, 81, 75, 76, + 68, 1, 0, 72, 4, 6, 71, 2, 66, 68, 3, 72, 24, 67, 1, 20, 21, 28, + 26, 17, 84, 68, 71, 2, 67, 84, 68, 7, 69, 71, 3, 22, 46, 26, 0, 84, + 81, 65, 34, 39, 104, 68, 2, 85, 8, 65, 64, 4, 33, 55, 44, 12, 83, 99, + 8, 35, 44, 103, 20, 21, 22, 23, 24, 20, 14, 20, 22, 7, 14, 12, 5, 3, + 66, 11, 8, 7, 8, 3, 7, 13, 5, 78, 5, 6, 69, 9, 71, 44, 53, 37, + 28, 35, 41, 52, 46, 26, 6, 67, 5, 12, 12, 96, 8, 12, 72, 27, 12, 12, + 13, 16, 12, 19, 19, 67, 93, 70, 72, 65, 101, 89, 5, 11, 10, 4, 70, 75, + 80, 80, 96, 77, 21, 9, 4, 66, 1, 71, 79, 82, 87, 71, 24, 11, 6, 1, + 3, 68, 73, 79, 88, 74, 18, 5, 8, 4, 64, 73, 76, 85, 67, 32, 15, 12, + 7, 7, 65, 72, 73, 76, 62, 103, 100, 93, 98, 93, 92, 91, 89, 87, 88, 87, + 84, 83, 86, 88, 92, 73, 86, 75, 80, 80, 78, 74, 71, 71, 68, 73, 69, 72, + 68, 68, 7, 69, 77, 66, 68, 72, 1, 64, 67, 68, 4, 65, 72, 73, 66, 82, + 69, 6, 66, 2, 69, 67, 67, 4, 2, 69, 70, 74, 74, 68, 58, 55, 57, 56, + 54, 60, 60, 59, 54, 55, 59, 56, 47, 37, 18, 54, 50, 48, 44, 36, 28, 19, + 26, 21, 15, 4, 64, 69, 79, 41, 38, 41, 28, 12, 18, 13, 5, 15, 8, 3, + 76, 66, 75, 84, 13, 17, 1, 15, 19, 8, 2, 64, 65, 77, 67, 74, 81, 106, + 75, 79, 87, 8, 70, 79, 74, 65, 1, 2, 8, 5, 12, 64, 7, 6, 13, 19, + 19, 21, 7, 56, 42, 35, 29, 21, 19, 1, 70, 85, 2, 34, 28, 26, 18, 17, + 9, 6, 2, 68, 74, 65, 1, 2, 8, 5, 12, 64, 7, 6, 13, 19, 19, 21, + 7, 56, 42, 35, 29, 21, 19, 1, 70, 85, 75, 67, 2, 3, 3, 79, 68}, + + { + + 56, 7, 77, 56, 7, 77, 95, 78, 17, 11, 5, 15, 35, 48, 24, 5, 68, 18, + 80, 75, 1, 0, 10, 72, 1, 31, 31, 3, 83, 110, 91, 92, 71, 80, 75, 1, + 86, 72, 26, 4, 76, 86, 92, 78, 80, 88, 102, 64, 79, 91, 68, 81, 86, 99, + 5, 74, 72, 83, 21, 2, 22, 0, 0, 0, 72, 90, 97, 64, 8, 65, 22, 77, + 104, 86, 71, 79, 8, 6, 75, 71, 92, 3, 82, 75, 85, 92, 81, 86, 83, 15, + 65, 67, 85, 73, 91, 78, 93, 65, 76, 72, 87, 14, 71, 1, 83, 81, 74, 76, + 68, 1, 0, 71, 5, 6, 71, 2, 66, 68, 3, 72, 24, 67, 1, 20, 21, 28, + 26, 17, 85, 67, 70, 1, 67, 84, 68, 8, 69, 71, 4, 23, 47, 28, 2, 85, + 82, 65, 34, 39, 105, 68, 2, 86, 9, 65, 64, 5, 33, 55, 44, 13, 84, 98, + 7, 32, 41, 102, 20, 21, 22, 22, 23, 20, 14, 20, 21, 6, 13, 12, 5, 3, + 66, 10, 7, 6, 8, 3, 7, 12, 4, 79, 4, 5, 69, 8, 72, 42, 51, 35, + 26, 33, 39, 49, 44, 25, 4, 69, 3, 10, 10, 97, 7, 11, 74, 26, 11, 11, + 12, 14, 10, 17, 17, 69, 93, 71, 73, 67, 100, 88, 6, 12, 10, 4, 69, 74, + 79, 79, 94, 76, 22, 10, 5, 65, 2, 70, 78, 81, 85, 70, 24, 11, 6, 1, + 4, 67, 72, 78, 87, 74, 19, 6, 8, 4, 0, 72, 76, 84, 66, 32, 16, 12, + 7, 8, 64, 71, 72, 75, 62, 102, 98, 91, 96, 92, 91, 89, 87, 86, 86, 85, + 82, 82, 85, 87, 91, 70, 86, 76, 80, 79, 77, 74, 71, 71, 68, 73, 69, 73, + 68, 68, 8, 69, 77, 66, 68, 72, 1, 64, 67, 69, 3, 65, 72, 73, 65, 83, + 69, 7, 67, 2, 69, 67, 67, 4, 2, 69, 69, 74, 74, 70, 57, 54, 56, 54, + 52, 57, 57, 56, 52, 52, 56, 53, 44, 34, 16, 50, 46, 44, 39, 33, 26, 17, + 24, 19, 13, 3, 65, 70, 79, 39, 36, 39, 26, 10, 16, 11, 3, 13, 6, 1, + 78, 68, 77, 85, 12, 16, 64, 13, 17, 6, 0, 65, 66, 78, 68, 75, 81, 105, + 76, 80, 89, 7, 71, 80, 74, 65, 2, 3, 9, 6, 13, 64, 8, 7, 14, 19, + 20, 22, 8, 55, 40, 33, 27, 19, 17, 64, 72, 86, 3, 34, 28, 27, 19, 18, + 9, 7, 2, 67, 74, 65, 2, 3, 9, 6, 13, 64, 8, 7, 14, 19, 20, 22, + 8, 55, 40, 33, 27, 19, 17, 64, 72, 86, 75, 67, 2, 3, 3, 78, 66}, + + { + + 55, 7, 77, 55, 7, 77, 93, 76, 18, 11, 4, 13, 34, 47, 24, 5, 66, 17, 80, + 73, 1, 64, 11, 73, 0, 30, 29, 64, 85, 111, 89, 90, 71, 80, 73, 1, 85, 71, + 27, 4, 75, 85, 91, 79, 81, 88, 103, 64, 79, 90, 68, 82, 86, 99, 5, 74, 72, + 82, 22, 2, 22, 0, 0, 0, 72, 90, 97, 0, 7, 65, 21, 77, 103, 84, 70, 78, + 9, 7, 74, 70, 90, 4, 81, 74, 83, 93, 81, 86, 83, 16, 65, 66, 83, 73, 90, + 77, 91, 65, 76, 72, 87, 15, 71, 2, 82, 81, 73, 76, 68, 1, 1, 70, 5, 6, + 71, 2, 66, 68, 3, 72, 24, 67, 1, 21, 21, 29, 26, 18, 85, 67, 69, 1, 67, + 84, 68, 9, 69, 70, 5, 24, 48, 29, 4, 85, 83, 64, 34, 39, 106, 68, 3, 86, + 9, 65, 64, 6, 33, 55, 44, 14, 85, 96, 6, 29, 38, 100, 20, 21, 21, 21, 23, + 19, 13, 20, 21, 5, 12, 12, 5, 3, 67, 10, 6, 5, 7, 3, 7, 11, 4, 80, + 4, 4, 69, 7, 72, 40, 49, 33, 25, 31, 37, 46, 41, 24, 2, 71, 2, 8, 8, + 99, 6, 10, 76, 25, 10, 10, 11, 13, 8, 15, 15, 70, 93, 72, 74, 68, 98, 86, + 8, 12, 10, 4, 68, 73, 78, 78, 92, 75, 23, 11, 6, 65, 3, 69, 76, 80, 83, + 70, 25, 12, 7, 2, 5, 67, 72, 77, 86, 74, 21, 7, 8, 4, 1, 72, 76, 83, + 65, 33, 17, 13, 7, 9, 64, 70, 72, 75, 62, 100, 97, 90, 94, 91, 89, 88, 86, + 84, 84, 83, 80, 82, 84, 87, 90, 67, 86, 77, 80, 78, 76, 74, 71, 71, 68, 72, + 69, 74, 68, 68, 8, 69, 77, 66, 68, 72, 0, 64, 67, 70, 2, 65, 72, 73, 64, + 84, 69, 8, 68, 2, 69, 67, 66, 4, 1, 68, 68, 74, 74, 71, 56, 53, 55, 52, + 50, 55, 55, 53, 49, 49, 53, 50, 41, 31, 14, 46, 43, 40, 34, 30, 24, 15, 22, + 16, 11, 2, 66, 71, 79, 37, 34, 37, 24, 8, 14, 10, 1, 11, 4, 64, 80, 70, + 79, 86, 11, 15, 66, 12, 15, 5, 65, 67, 68, 79, 69, 76, 81, 104, 77, 82, 90, + 6, 72, 81, 74, 64, 2, 3, 10, 7, 14, 64, 9, 8, 15, 20, 21, 22, 9, 55, + 38, 31, 25, 17, 15, 66, 74, 87, 3, 35, 29, 28, 19, 19, 10, 7, 3, 67, 74, + 64, 2, 3, 10, 7, 14, 64, 9, 8, 15, 20, 21, 22, 9, 55, 38, 31, 25, 17, + 15, 66, 74, 87, 75, 67, 2, 4, 4, 76, 64}, + + { + + 53, 7, 77, 53, 7, 77, 92, 75, 18, 11, 4, 11, 32, 46, 25, 5, 65, 15, 80, + 72, 0, 64, 12, 74, 65, 28, 27, 67, 88, 112, 87, 89, 71, 80, 72, 0, 85, 70, + 27, 3, 75, 84, 89, 80, 83, 89, 103, 64, 78, 89, 69, 82, 86, 99, 5, 74, 72, + 82, 22, 2, 22, 0, 0, 0, 71, 91, 97, 0, 6, 65, 21, 76, 102, 82, 69, 77, + 11, 9, 74, 69, 88, 5, 80, 73, 80, 93, 82, 86, 84, 16, 66, 65, 80, 72, 90, + 77, 89, 65, 76, 72, 87, 15, 71, 2, 81, 81, 73, 75, 68, 1, 1, 69, 6, 6, + 72, 1, 66, 68, 3, 72, 24, 67, 1, 21, 21, 29, 26, 18, 86, 66, 69, 0, 67, + 83, 68, 10, 68, 70, 7, 26, 49, 31, 6, 86, 84, 64, 35, 39, 107, 68, 3, 87, + 10, 65, 64, 7, 33, 55, 44, 16, 86, 95, 5, 26, 35, 99, 19, 21, 21, 21, 22, + 19, 13, 20, 20, 4, 11, 11, 5, 3, 67, 9, 6, 4, 7, 2, 6, 10, 3, 81, + 3, 3, 69, 5, 73, 38, 47, 31, 23, 30, 35, 42, 39, 23, 0, 74, 0, 6, 6, + 100, 4, 9, 77, 24, 9, 9, 9, 11, 6, 13, 14, 72, 93, 73, 75, 70, 97, 85, + 9, 13, 10, 4, 67, 72, 77, 78, 90, 73, 24, 11, 6, 64, 5, 68, 75, 79, 80, + 69, 25, 12, 7, 2, 6, 66, 71, 77, 85, 74, 22, 8, 8, 4, 2, 71, 76, 82, + 64, 33, 18, 13, 7, 10, 0, 69, 71, 74, 62, 99, 95, 88, 93, 90, 88, 86, 84, + 83, 83, 81, 77, 81, 83, 86, 89, 64, 85, 78, 79, 77, 76, 74, 71, 71, 69, 72, + 70, 75, 68, 68, 9, 69, 77, 66, 68, 71, 0, 65, 67, 71, 1, 66, 72, 73, 0, + 85, 70, 8, 68, 1, 69, 67, 66, 3, 1, 68, 68, 74, 74, 73, 55, 52, 54, 51, + 47, 52, 52, 50, 47, 46, 49, 47, 37, 29, 12, 42, 39, 36, 29, 27, 22, 13, 20, + 14, 9, 0, 67, 72, 79, 34, 31, 34, 22, 6, 12, 8, 64, 9, 2, 66, 82, 73, + 80, 88, 10, 14, 69, 10, 14, 3, 67, 68, 69, 80, 70, 77, 81, 103, 79, 83, 92, + 4, 73, 82, 74, 64, 3, 4, 11, 7, 14, 0, 10, 9, 16, 20, 22, 23, 9, 54, + 36, 29, 23, 15, 13, 69, 76, 89, 4, 35, 29, 28, 20, 19, 10, 8, 3, 66, 74, + 64, 3, 4, 11, 7, 14, 0, 10, 9, 16, 20, 22, 23, 9, 54, 36, 29, 23, 15, + 13, 69, 76, 89, 75, 67, 2, 4, 4, 75, 1}, + + { + + 52, 7, 77, 52, 7, 77, 90, 73, 18, 11, 3, 9, 31, 45, 26, 5, 0, 13, 79, + 70, 0, 64, 13, 75, 66, 27, 25, 70, 90, 112, 85, 88, 71, 79, 70, 0, 85, 69, + 27, 3, 74, 83, 88, 81, 84, 89, 103, 64, 78, 88, 69, 82, 86, 99, 5, 73, 72, + 81, 23, 2, 22, 0, 0, 0, 71, 91, 97, 1, 5, 65, 20, 76, 100, 79, 68, 76, + 12, 10, 73, 68, 86, 6, 79, 72, 78, 94, 82, 86, 84, 16, 66, 64, 78, 72, 89, + 76, 87, 65, 76, 72, 86, 15, 71, 3, 80, 80, 72, 75, 68, 2, 2, 68, 6, 6, + 72, 1, 65, 68, 3, 72, 25, 67, 1, 22, 22, 29, 26, 18, 86, 66, 68, 0, 67, + 83, 68, 11, 68, 69, 8, 27, 50, 32, 8, 86, 85, 0, 35, 39, 108, 68, 4, 87, + 11, 65, 64, 9, 33, 55, 44, 17, 87, 93, 4, 24, 32, 98, 19, 21, 20, 20, 22, + 19, 12, 20, 20, 3, 10, 11, 5, 3, 67, 9, 5, 3, 6, 2, 6, 9, 3, 82, + 2, 3, 69, 4, 73, 36, 45, 30, 22, 28, 33, 39, 36, 22, 65, 76, 64, 4, 4, + 101, 3, 8, 79, 23, 8, 8, 8, 10, 4, 11, 12, 73, 93, 74, 76, 72, 95, 83, + 10, 13, 10, 4, 66, 71, 76, 77, 88, 72, 25, 12, 7, 0, 6, 67, 73, 78, 78, + 68, 26, 13, 8, 3, 7, 65, 71, 76, 84, 73, 23, 9, 8, 4, 3, 71, 75, 81, + 0, 33, 19, 13, 7, 11, 1, 68, 71, 73, 62, 98, 94, 87, 91, 89, 86, 85, 82, + 81, 81, 79, 75, 80, 82, 85, 88, 2, 85, 79, 79, 76, 75, 74, 71, 71, 69, 71, + 70, 76, 68, 68, 10, 69, 77, 66, 68, 71, 64, 65, 67, 72, 0, 66, 72, 73, 1, + 85, 70, 9, 69, 1, 69, 67, 66, 3, 0, 68, 67, 74, 74, 74, 54, 51, 53, 49, + 45, 50, 49, 47, 44, 43, 46, 44, 34, 26, 10, 38, 36, 32, 24, 24, 20, 12, 18, + 11, 7, 64, 68, 73, 79, 32, 29, 32, 20, 4, 10, 7, 66, 7, 0, 68, 84, 75, + 82, 89, 9, 13, 71, 9, 12, 2, 69, 69, 70, 81, 71, 78, 81, 102, 80, 84, 93, + 3, 74, 83, 74, 0, 3, 5, 12, 8, 15, 0, 11, 10, 17, 21, 23, 23, 10, 54, + 34, 27, 21, 13, 11, 71, 78, 90, 4, 36, 30, 29, 21, 20, 11, 8, 3, 65, 74, + 0, 3, 5, 12, 8, 15, 0, 11, 10, 17, 21, 23, 23, 10, 54, 34, 27, 21, 13, + 11, 71, 78, 90, 75, 67, 2, 4, 4, 73, 3}, + + { + + 51, 7, 78, 51, 7, 78, 88, 72, 19, 11, 3, 8, 29, 44, 26, 6, 2, 12, 79, + 69, 0, 65, 14, 76, 67, 26, 24, 74, 92, 113, 83, 86, 71, 79, 69, 0, 84, 68, + 28, 3, 73, 82, 87, 83, 85, 89, 104, 64, 78, 88, 69, 83, 85, 99, 4, 73, 72, + 80, 23, 2, 22, 0, 0, 0, 70, 91, 97, 1, 5, 66, 20, 75, 99, 77, 67, 75, + 14, 12, 72, 67, 84, 7, 78, 72, 76, 95, 82, 86, 84, 17, 66, 0, 76, 72, 88, + 75, 85, 65, 76, 72, 86, 16, 72, 3, 80, 80, 71, 75, 67, 2, 2, 67, 7, 6, + 72, 1, 65, 68, 3, 72, 25, 67, 1, 22, 22, 30, 27, 19, 87, 65, 67, 64, 67, + 83, 68, 12, 68, 69, 9, 28, 50, 34, 9, 87, 86, 0, 35, 39, 108, 69, 4, 88, + 11, 66, 0, 10, 33, 55, 45, 18, 88, 92, 3, 21, 29, 96, 19, 21, 20, 19, 21, + 18, 12, 19, 19, 3, 10, 11, 4, 3, 68, 8, 4, 2, 6, 2, 6, 9, 2, 84, + 2, 2, 69, 3, 74, 33, 43, 28, 20, 26, 31, 36, 34, 20, 67, 78, 66, 2, 2, + 103, 2, 6, 81, 21, 7, 7, 7, 8, 2, 9, 10, 75, 93, 75, 77, 73, 94, 82, + 12, 14, 10, 4, 65, 71, 76, 76, 87, 71, 26, 13, 8, 0, 7, 66, 72, 77, 76, + 68, 26, 13, 8, 3, 8, 65, 70, 75, 83, 73, 25, 10, 8, 4, 3, 70, 75, 81, + 0, 34, 19, 14, 7, 12, 1, 68, 70, 73, 62, 96, 92, 85, 89, 87, 85, 83, 81, + 80, 79, 77, 73, 80, 81, 85, 88, 5, 85, 80, 79, 76, 74, 74, 71, 71, 69, 71, + 70, 77, 68, 68, 10, 69, 77, 66, 68, 71, 64, 65, 67, 73, 64, 66, 72, 73, 2, + 86, 70, 10, 70, 1, 69, 67, 65, 3, 0, 67, 66, 74, 74, 76, 53, 50, 52, 47, + 43, 47, 47, 44, 42, 40, 43, 41, 31, 23, 8, 35, 32, 28, 19, 22, 17, 10, 16, + 9, 5, 65, 69, 74, 79, 30, 27, 30, 18, 2, 9, 5, 68, 5, 66, 70, 86, 77, + 84, 90, 8, 11, 73, 7, 10, 0, 71, 71, 72, 82, 72, 79, 82, 101, 81, 86, 95, + 2, 76, 84, 73, 0, 4, 5, 13, 9, 16, 0, 12, 11, 18, 21, 24, 24, 11, 53, + 32, 25, 19, 11, 9, 73, 80, 91, 5, 36, 30, 30, 21, 21, 11, 9, 4, 65, 73, + 0, 4, 5, 13, 9, 16, 0, 12, 11, 18, 21, 24, 24, 11, 53, 32, 25, 19, 11, + 9, 73, 80, 91, 75, 67, 2, 5, 5, 72, 6}, + + { + + 50, 7, 78, 50, 7, 78, 86, 70, 19, 11, 2, 6, 28, 43, 27, 6, 3, 10, 79, + 67, 64, 65, 15, 77, 68, 25, 22, 77, 95, 114, 81, 85, 71, 79, 67, 64, 84, 67, + 28, 3, 73, 81, 85, 84, 86, 89, 104, 64, 77, 87, 70, 83, 85, 99, 4, 73, 72, + 79, 24, 2, 22, 0, 0, 0, 70, 91, 97, 2, 4, 66, 19, 75, 98, 75, 66, 74, + 15, 13, 71, 66, 82, 8, 77, 71, 73, 95, 83, 86, 85, 17, 66, 1, 73, 71, 88, + 75, 83, 65, 76, 72, 86, 16, 72, 4, 79, 80, 70, 74, 67, 2, 3, 66, 7, 6, + 73, 1, 65, 68, 3, 72, 25, 67, 1, 23, 22, 30, 27, 19, 87, 65, 67, 64, 67, + 82, 68, 13, 67, 68, 11, 30, 51, 35, 11, 87, 87, 1, 36, 39, 109, 69, 5, 88, + 12, 66, 0, 11, 33, 55, 45, 20, 89, 90, 2, 18, 26, 95, 19, 21, 19, 19, 21, + 18, 11, 19, 19, 2, 9, 11, 4, 3, 68, 8, 4, 1, 5, 2, 6, 8, 2, 85, + 1, 1, 69, 2, 74, 31, 41, 26, 19, 25, 29, 33, 31, 19, 69, 80, 67, 0, 0, + 104, 1, 5, 82, 20, 6, 6, 5, 7, 0, 7, 9, 76, 93, 76, 78, 75, 92, 80, + 13, 14, 10, 4, 64, 70, 75, 76, 85, 69, 27, 14, 8, 1, 9, 65, 70, 76, 73, + 67, 27, 14, 9, 4, 9, 64, 70, 74, 82, 73, 26, 11, 8, 4, 4, 70, 75, 80, + 1, 34, 20, 14, 7, 13, 2, 67, 70, 72, 62, 95, 91, 84, 88, 86, 83, 82, 79, + 78, 77, 75, 70, 79, 80, 84, 87, 8, 84, 81, 78, 75, 73, 74, 71, 71, 69, 70, + 71, 78, 68, 68, 11, 69, 77, 66, 68, 70, 65, 66, 67, 74, 65, 67, 72, 73, 3, + 87, 71, 11, 70, 0, 69, 67, 65, 2, 64, 67, 66, 74, 74, 77, 52, 49, 51, 46, + 40, 45, 44, 41, 39, 37, 40, 38, 28, 21, 6, 31, 29, 24, 14, 19, 15, 8, 14, + 6, 3, 66, 70, 75, 79, 28, 24, 27, 16, 0, 7, 4, 70, 3, 68, 72, 88, 79, + 85, 92, 7, 10, 75, 6, 9, 64, 73, 72, 73, 83, 73, 80, 82, 100, 83, 87, 96, + 0, 77, 85, 73, 1, 4, 6, 14, 10, 16, 1, 13, 12, 19, 22, 25, 24, 11, 53, + 30, 23, 17, 9, 7, 76, 82, 92, 5, 37, 31, 30, 22, 22, 12, 9, 4, 64, 73, + 1, 4, 6, 14, 10, 16, 1, 13, 12, 19, 22, 25, 24, 11, 53, 30, 23, 17, 9, + 7, 76, 82, 92, 75, 67, 2, 5, 5, 70, 8}, + + { + + 48, 6, 78, 48, 6, 78, 85, 69, 19, 11, 2, 4, 26, 42, 27, 6, 5, 8, 79, + 66, 64, 66, 16, 78, 70, 23, 20, 81, 97, 115, 79, 84, 71, 79, 66, 64, 84, 67, + 28, 2, 72, 80, 84, 85, 88, 90, 105, 64, 77, 86, 70, 84, 85, 99, 4, 73, 72, + 79, 24, 2, 22, 0, 0, 0, 69, 92, 97, 2, 3, 66, 19, 74, 97, 73, 65, 74, + 17, 15, 71, 65, 80, 8, 76, 70, 71, 96, 83, 87, 85, 17, 67, 1, 71, 71, 87, + 74, 82, 65, 76, 72, 86, 16, 72, 4, 78, 80, 70, 74, 67, 2, 3, 65, 8, 6, + 73, 0, 65, 68, 2, 72, 25, 67, 1, 23, 22, 30, 27, 19, 88, 64, 66, 65, 67, + 82, 68, 14, 67, 68, 12, 31, 52, 37, 13, 88, 88, 1, 36, 39, 110, 69, 5, 89, + 12, 66, 0, 12, 33, 55, 45, 21, 90, 89, 1, 15, 22, 94, 18, 21, 19, 18, 20, + 17, 11, 19, 18, 1, 8, 10, 4, 2, 69, 7, 3, 0, 5, 1, 5, 7, 1, 86, + 0, 0, 69, 0, 75, 29, 39, 24, 17, 23, 26, 29, 29, 18, 71, 83, 69, 66, 65, + 106, 64, 4, 84, 19, 5, 5, 4, 5, 65, 5, 7, 78, 93, 77, 79, 77, 91, 79, + 14, 15, 10, 4, 64, 69, 74, 75, 83, 68, 27, 14, 9, 1, 10, 64, 69, 75, 71, + 67, 27, 14, 9, 4, 9, 64, 69, 74, 81, 73, 27, 12, 8, 4, 5, 69, 75, 79, + 2, 34, 21, 14, 7, 14, 2, 66, 69, 72, 62, 94, 89, 82, 86, 85, 82, 80, 78, + 77, 76, 73, 68, 79, 79, 84, 86, 11, 84, 82, 78, 74, 73, 74, 71, 71, 70, 70, + 71, 79, 68, 68, 11, 69, 77, 67, 68, 70, 65, 66, 68, 75, 66, 67, 73, 73, 4, + 88, 71, 11, 71, 0, 69, 68, 65, 2, 64, 67, 65, 74, 74, 79, 51, 48, 50, 44, + 38, 42, 41, 38, 37, 34, 36, 35, 24, 18, 4, 27, 25, 20, 9, 16, 13, 6, 11, + 4, 1, 68, 72, 76, 79, 25, 22, 25, 14, 66, 5, 2, 73, 1, 70, 74, 90, 82, + 87, 93, 5, 9, 78, 4, 7, 66, 75, 74, 75, 84, 74, 81, 82, 99, 84, 89, 98, + 64, 78, 86, 73, 1, 5, 6, 15, 10, 17, 1, 14, 12, 20, 22, 25, 25, 12, 52, + 28, 21, 15, 7, 5, 78, 84, 94, 6, 37, 31, 31, 22, 22, 12, 10, 4, 64, 73, + 1, 5, 6, 15, 10, 17, 1, 14, 12, 20, 22, 25, 25, 12, 52, 28, 21, 15, 7, + 5, 78, 84, 94, 75, 68, 1, 5, 5, 69, 10}, + + { + + 47, 6, 78, 47, 6, 78, 83, 68, 20, 11, 2, 2, 24, 42, 28, 6, 7, 7, 78, + 64, 64, 66, 17, 78, 71, 22, 18, 84, 99, 115, 77, 82, 70, 78, 64, 64, 83, 66, + 29, 2, 71, 79, 83, 86, 89, 90, 105, 64, 77, 85, 70, 84, 85, 99, 4, 72, 71, + 78, 24, 2, 22, 0, 0, 0, 68, 92, 97, 2, 2, 66, 19, 73, 95, 70, 64, 73, + 19, 17, 70, 64, 78, 9, 74, 69, 69, 97, 83, 87, 85, 18, 67, 2, 69, 71, 86, + 73, 80, 65, 75, 72, 85, 17, 72, 5, 77, 79, 69, 74, 67, 3, 4, 64, 9, 6, + 73, 0, 64, 67, 2, 72, 26, 67, 1, 23, 23, 31, 27, 20, 88, 0, 65, 66, 67, + 82, 68, 15, 67, 67, 13, 32, 53, 39, 15, 89, 89, 2, 36, 40, 111, 69, 5, 89, + 13, 66, 0, 14, 33, 55, 45, 22, 91, 88, 0, 13, 19, 92, 18, 21, 19, 17, 20, + 17, 11, 19, 18, 0, 7, 10, 4, 2, 69, 7, 2, 0, 5, 1, 5, 6, 0, 87, + 0, 0, 69, 64, 75, 27, 37, 23, 16, 21, 24, 26, 27, 17, 73, 85, 70, 68, 66, + 107, 65, 3, 86, 18, 4, 4, 3, 3, 66, 3, 5, 79, 93, 77, 80, 78, 89, 77, + 16, 16, 10, 4, 0, 68, 73, 74, 81, 67, 28, 15, 10, 2, 11, 0, 68, 74, 69, + 66, 28, 15, 10, 4, 10, 0, 68, 73, 79, 72, 29, 13, 9, 4, 6, 68, 74, 78, + 3, 35, 22, 15, 8, 15, 3, 65, 68, 71, 62, 92, 87, 80, 84, 84, 81, 78, 76, + 75, 74, 71, 66, 78, 78, 83, 85, 15, 84, 83, 78, 73, 72, 73, 71, 71, 70, 70, + 71, 80, 68, 68, 12, 69, 77, 67, 68, 70, 65, 66, 68, 75, 67, 67, 73, 72, 6, + 88, 71, 12, 72, 0, 69, 68, 64, 2, 64, 66, 64, 73, 73, 81, 50, 47, 49, 42, + 36, 39, 39, 35, 35, 32, 33, 33, 21, 15, 2, 23, 22, 17, 4, 13, 11, 5, 9, + 2, 0, 69, 73, 77, 79, 23, 20, 23, 12, 68, 3, 1, 75, 64, 72, 76, 92, 84, + 89, 94, 4, 8, 80, 3, 5, 67, 77, 75, 76, 85, 75, 81, 82, 98, 85, 90, 100, + 65, 79, 87, 73, 2, 6, 7, 17, 11, 18, 1, 15, 13, 21, 23, 26, 26, 13, 51, + 27, 19, 13, 5, 3, 80, 86, 95, 7, 37, 32, 32, 23, 23, 13, 11, 5, 0, 73, + 2, 6, 7, 17, 11, 18, 1, 15, 13, 21, 23, 26, 26, 13, 51, 27, 19, 13, 5, + 3, 80, 86, 95, 75, 68, 1, 6, 6, 68, 12}, + + { + + 46, 6, 78, 46, 6, 78, 81, 66, 20, 11, 1, 0, 23, 41, 29, 6, 8, 5, + 78, 0, 65, 66, 18, 79, 72, 21, 16, 87, 102, 116, 75, 81, 70, 78, 0, 65, + 83, 65, 29, 2, 71, 78, 81, 87, 90, 90, 105, 64, 76, 84, 71, 84, 85, 99, + 4, 72, 71, 77, 25, 2, 22, 0, 0, 0, 68, 92, 97, 3, 1, 66, 18, 73, + 94, 68, 0, 72, 20, 18, 69, 0, 76, 10, 73, 68, 66, 97, 84, 87, 86, 18, + 67, 3, 66, 70, 86, 73, 78, 65, 75, 72, 85, 17, 72, 5, 76, 79, 68, 73, + 67, 3, 4, 0, 9, 6, 74, 0, 64, 67, 2, 72, 26, 67, 1, 24, 23, 31, + 27, 20, 89, 0, 65, 66, 67, 81, 68, 16, 66, 67, 15, 34, 54, 40, 17, 89, + 90, 2, 37, 40, 112, 69, 6, 90, 14, 66, 0, 15, 33, 55, 45, 24, 92, 86, + 64, 10, 16, 91, 18, 21, 18, 17, 19, 17, 10, 19, 17, 64, 6, 10, 4, 2, + 69, 6, 2, 64, 4, 1, 5, 5, 0, 88, 64, 64, 69, 65, 76, 25, 35, 21, + 14, 20, 22, 23, 24, 16, 75, 87, 72, 70, 68, 108, 66, 2, 87, 17, 3, 3, + 1, 2, 68, 1, 4, 81, 93, 78, 81, 80, 88, 76, 17, 16, 10, 4, 1, 67, + 72, 74, 79, 65, 29, 16, 10, 3, 13, 1, 66, 73, 66, 65, 28, 15, 10, 5, + 11, 1, 68, 72, 78, 72, 30, 14, 9, 4, 7, 68, 74, 77, 4, 35, 23, 15, + 8, 16, 4, 64, 68, 70, 62, 91, 86, 79, 83, 83, 79, 77, 74, 74, 72, 69, + 0, 77, 77, 82, 84, 18, 83, 84, 77, 72, 71, 73, 71, 71, 70, 69, 72, 81, + 68, 68, 13, 69, 77, 67, 68, 69, 66, 67, 68, 76, 68, 68, 73, 72, 7, 89, + 72, 13, 72, 64, 69, 68, 64, 1, 65, 66, 64, 73, 73, 82, 49, 46, 48, 41, + 33, 37, 36, 32, 32, 29, 30, 30, 18, 13, 0, 19, 18, 13, 64, 10, 9, 3, + 7, 64, 65, 70, 74, 78, 79, 21, 17, 20, 10, 70, 1, 64, 77, 66, 74, 78, + 94, 86, 90, 96, 3, 7, 82, 1, 4, 69, 79, 76, 77, 86, 76, 82, 82, 97, + 87, 91, 101, 67, 80, 88, 73, 2, 6, 8, 18, 12, 18, 2, 16, 14, 22, 23, + 27, 26, 13, 51, 25, 17, 11, 3, 1, 83, 88, 96, 7, 38, 32, 32, 24, 24, + 13, 11, 5, 1, 73, 2, 6, 8, 18, 12, 18, 2, 16, 14, 22, 23, 27, 26, + 13, 51, 25, 17, 11, 3, 1, 83, 88, 96, 75, 68, 1, 6, 6, 66, 14}, + + { + + 45, 6, 79, 45, 6, 79, 79, 65, 21, 11, 1, 64, 21, 40, 29, 7, 10, 4, + 78, 2, 65, 67, 19, 80, 73, 20, 15, 91, 104, 117, 73, 79, 70, 78, 2, 65, + 82, 64, 30, 2, 70, 77, 80, 89, 91, 90, 106, 64, 76, 84, 71, 85, 84, 99, + 3, 72, 71, 76, 25, 2, 22, 0, 0, 0, 67, 92, 97, 3, 1, 67, 18, 72, + 93, 66, 1, 71, 22, 20, 68, 1, 74, 11, 72, 68, 64, 98, 84, 87, 86, 19, + 67, 4, 64, 70, 85, 72, 76, 65, 75, 72, 85, 18, 73, 6, 76, 79, 67, 73, + 66, 3, 5, 1, 10, 6, 74, 0, 64, 67, 2, 72, 26, 67, 1, 24, 23, 32, + 28, 21, 89, 1, 64, 67, 67, 81, 68, 17, 66, 66, 16, 35, 54, 42, 18, 90, + 91, 3, 37, 40, 112, 70, 6, 90, 14, 67, 1, 16, 33, 55, 46, 25, 93, 85, + 65, 7, 13, 89, 18, 21, 18, 16, 19, 16, 10, 18, 17, 64, 6, 10, 3, 2, + 70, 6, 1, 65, 4, 1, 5, 5, 64, 90, 64, 65, 69, 66, 76, 22, 33, 19, + 13, 18, 20, 20, 22, 14, 77, 89, 73, 72, 70, 110, 67, 0, 89, 15, 2, 2, + 0, 0, 70, 64, 2, 82, 93, 79, 82, 81, 86, 74, 19, 17, 10, 4, 2, 67, + 72, 73, 78, 64, 30, 17, 11, 3, 14, 2, 65, 72, 64, 65, 29, 16, 11, 5, + 12, 1, 67, 71, 77, 72, 32, 15, 9, 4, 7, 67, 74, 77, 4, 36, 23, 16, + 8, 17, 4, 64, 67, 70, 62, 89, 84, 77, 81, 81, 78, 75, 73, 72, 70, 67, + 2, 77, 76, 82, 84, 21, 83, 85, 77, 72, 70, 73, 71, 71, 70, 69, 72, 82, + 68, 68, 13, 69, 77, 67, 68, 69, 66, 67, 68, 77, 69, 68, 73, 72, 8, 90, + 72, 14, 73, 64, 69, 68, 0, 1, 65, 65, 0, 73, 73, 84, 48, 45, 47, 39, + 31, 34, 34, 29, 30, 26, 27, 27, 15, 10, 65, 16, 15, 9, 69, 8, 6, 1, + 5, 66, 67, 71, 75, 79, 79, 19, 15, 18, 8, 72, 0, 65, 79, 68, 77, 80, + 96, 88, 92, 97, 2, 5, 84, 0, 2, 70, 81, 78, 79, 87, 77, 83, 83, 96, + 88, 93, 103, 68, 82, 89, 72, 3, 7, 8, 19, 13, 19, 2, 17, 15, 23, 24, + 28, 27, 14, 50, 23, 15, 9, 1, 64, 85, 90, 97, 8, 38, 33, 33, 24, 25, + 14, 12, 6, 1, 72, 3, 7, 8, 19, 13, 19, 2, 17, 15, 23, 24, 28, 27, + 14, 50, 23, 15, 9, 1, 64, 85, 90, 97, 75, 68, 1, 7, 7, 65, 17}, + + { + + 43, 6, 79, 43, 6, 79, 78, 0, 21, 11, 0, 66, 20, 39, 30, 7, 12, 2, + 78, 3, 65, 67, 20, 81, 75, 18, 13, 94, 106, 118, 71, 78, 70, 78, 3, 65, + 82, 0, 30, 1, 69, 76, 79, 90, 93, 91, 106, 64, 76, 83, 71, 85, 84, 99, + 3, 72, 71, 76, 26, 2, 22, 0, 0, 0, 67, 93, 97, 4, 0, 67, 17, 72, + 92, 64, 2, 70, 23, 21, 68, 2, 72, 12, 71, 67, 1, 99, 84, 87, 86, 19, + 68, 5, 1, 70, 84, 71, 74, 65, 75, 72, 85, 18, 73, 6, 75, 79, 67, 73, + 66, 3, 5, 2, 10, 6, 74, 64, 64, 67, 2, 72, 26, 67, 1, 25, 23, 32, + 28, 21, 90, 1, 0, 67, 67, 81, 68, 18, 66, 66, 17, 36, 55, 43, 20, 90, + 92, 3, 37, 40, 113, 70, 7, 91, 15, 67, 1, 17, 33, 55, 46, 26, 94, 83, + 66, 4, 10, 88, 17, 21, 17, 15, 18, 16, 9, 18, 16, 65, 5, 9, 3, 2, + 70, 5, 0, 66, 3, 0, 4, 4, 64, 91, 65, 66, 69, 68, 77, 20, 31, 17, + 11, 16, 18, 16, 19, 13, 79, 92, 75, 74, 72, 111, 69, 64, 91, 14, 1, 1, + 64, 64, 72, 66, 0, 84, 93, 80, 83, 83, 85, 73, 20, 17, 10, 4, 3, 66, + 71, 72, 76, 0, 31, 17, 12, 4, 15, 3, 0, 71, 1, 64, 29, 16, 11, 6, + 13, 2, 67, 71, 76, 72, 33, 16, 9, 4, 8, 67, 74, 76, 5, 36, 24, 16, + 8, 18, 5, 0, 67, 69, 62, 88, 83, 76, 79, 80, 76, 74, 71, 71, 69, 65, + 4, 76, 75, 81, 83, 24, 83, 86, 77, 71, 70, 73, 71, 71, 71, 68, 72, 83, + 68, 68, 14, 69, 77, 67, 68, 69, 67, 67, 68, 78, 70, 68, 73, 72, 9, 91, + 72, 14, 74, 64, 69, 68, 0, 1, 66, 65, 1, 73, 73, 85, 47, 44, 46, 37, + 29, 32, 31, 26, 27, 23, 23, 24, 11, 7, 67, 12, 11, 5, 74, 5, 4, 64, + 3, 69, 69, 73, 76, 80, 79, 16, 13, 16, 6, 74, 65, 67, 81, 70, 79, 82, + 98, 91, 94, 98, 1, 4, 87, 65, 0, 72, 83, 79, 80, 88, 78, 84, 83, 95, + 89, 94, 104, 69, 83, 90, 72, 3, 7, 9, 20, 13, 20, 2, 18, 16, 24, 24, + 29, 27, 15, 50, 21, 13, 7, 64, 66, 87, 92, 99, 8, 39, 33, 34, 25, 25, + 14, 12, 6, 2, 72, 3, 7, 9, 20, 13, 20, 2, 18, 16, 24, 24, 29, 27, + 15, 50, 21, 13, 7, 64, 66, 87, 92, 99, 75, 68, 1, 7, 7, 0, 19}, + + { + + 42, 6, 79, 42, 6, 79, 76, 1, 21, 11, 0, 68, 18, 38, 31, 7, 13, 0, + 77, 5, 66, 67, 21, 82, 76, 17, 11, 97, 109, 118, 69, 77, 70, 77, 5, 66, + 82, 1, 30, 1, 69, 75, 77, 91, 94, 91, 106, 64, 75, 82, 72, 85, 84, 99, + 3, 71, 71, 75, 26, 2, 22, 0, 0, 0, 66, 93, 97, 4, 64, 67, 17, 71, + 90, 2, 3, 69, 25, 23, 67, 3, 70, 13, 70, 66, 4, 99, 85, 87, 87, 19, + 68, 6, 4, 69, 84, 71, 72, 65, 75, 72, 84, 18, 73, 7, 74, 78, 66, 72, + 66, 4, 6, 3, 11, 6, 75, 64, 0, 67, 2, 72, 27, 67, 1, 25, 24, 32, + 28, 21, 90, 2, 0, 68, 67, 80, 68, 19, 65, 65, 19, 38, 56, 45, 22, 91, + 93, 4, 38, 40, 114, 70, 7, 91, 16, 67, 1, 19, 33, 55, 46, 28, 95, 82, + 67, 2, 7, 87, 17, 21, 17, 15, 18, 16, 9, 18, 16, 66, 4, 9, 3, 2, + 70, 5, 0, 67, 3, 0, 4, 3, 65, 92, 66, 66, 69, 69, 77, 18, 29, 16, + 10, 15, 16, 13, 17, 12, 81, 94, 76, 76, 74, 112, 70, 65, 92, 13, 0, 0, + 66, 66, 74, 68, 64, 85, 93, 81, 84, 85, 83, 71, 21, 18, 10, 4, 4, 65, + 70, 72, 74, 2, 32, 18, 12, 5, 17, 4, 1, 70, 4, 0, 30, 17, 12, 6, + 14, 3, 66, 70, 75, 71, 34, 17, 9, 4, 9, 66, 73, 75, 6, 36, 25, 16, + 8, 19, 6, 1, 66, 68, 62, 87, 81, 74, 78, 79, 75, 72, 69, 69, 67, 0, + 7, 75, 74, 80, 82, 27, 82, 87, 76, 70, 69, 73, 71, 71, 71, 68, 73, 84, + 68, 68, 15, 69, 77, 67, 68, 68, 67, 68, 68, 79, 71, 69, 73, 72, 10, 91, + 73, 15, 74, 65, 69, 68, 0, 0, 66, 65, 1, 73, 73, 87, 46, 43, 45, 36, + 26, 29, 28, 23, 25, 20, 20, 21, 8, 5, 69, 8, 8, 1, 79, 2, 2, 65, + 1, 71, 71, 74, 77, 81, 79, 14, 10, 13, 4, 76, 67, 68, 83, 72, 81, 84, + 100, 93, 95, 100, 0, 3, 89, 66, 64, 73, 85, 80, 81, 89, 79, 85, 83, 94, + 91, 95, 106, 71, 84, 91, 72, 4, 8, 10, 21, 14, 20, 3, 19, 17, 25, 25, + 30, 28, 15, 49, 19, 11, 5, 66, 68, 90, 94, 100, 9, 39, 34, 34, 26, 26, + 15, 13, 6, 3, 72, 4, 8, 10, 21, 14, 20, 3, 19, 17, 25, 25, 30, 28, + 15, 49, 19, 11, 5, 66, 68, 90, 94, 100, 75, 68, 1, 7, 7, 1, 21}, + + { + + 41, 6, 79, 41, 6, 79, 74, 3, 22, 11, 64, 70, 17, 37, 31, 7, 15, 64, + 77, 6, 66, 68, 22, 83, 77, 16, 9, 101, 111, 119, 67, 75, 70, 77, 6, 66, + 81, 2, 31, 1, 68, 74, 76, 92, 95, 91, 107, 64, 75, 81, 72, 86, 84, 99, + 3, 71, 71, 74, 27, 2, 22, 0, 0, 0, 66, 93, 97, 5, 65, 67, 16, 71, + 89, 4, 4, 68, 26, 24, 66, 4, 68, 14, 69, 65, 6, 100, 85, 87, 87, 20, + 68, 7, 6, 69, 83, 70, 70, 65, 75, 72, 84, 19, 73, 7, 73, 78, 65, 72, + 66, 4, 6, 4, 11, 6, 75, 64, 0, 67, 2, 72, 27, 67, 1, 26, 24, 33, + 28, 22, 91, 2, 1, 68, 67, 80, 68, 20, 65, 65, 20, 39, 57, 46, 24, 91, + 94, 4, 38, 40, 115, 70, 8, 92, 16, 67, 1, 20, 33, 55, 46, 29, 96, 80, + 68, 64, 4, 85, 17, 21, 16, 14, 17, 15, 8, 18, 15, 67, 3, 9, 3, 2, + 71, 4, 64, 68, 2, 0, 4, 2, 65, 93, 66, 67, 69, 70, 78, 16, 27, 14, + 8, 13, 14, 10, 14, 11, 83, 96, 78, 78, 76, 114, 71, 66, 94, 12, 64, 64, + 67, 67, 76, 70, 66, 87, 93, 82, 85, 86, 82, 70, 23, 18, 10, 4, 5, 64, + 69, 71, 72, 3, 33, 19, 13, 5, 18, 5, 3, 69, 6, 0, 30, 17, 12, 7, + 15, 3, 66, 69, 74, 71, 36, 18, 9, 4, 10, 66, 73, 74, 7, 37, 26, 17, + 8, 20, 6, 2, 66, 68, 62, 85, 80, 73, 76, 78, 73, 71, 68, 68, 65, 2, + 9, 75, 73, 80, 81, 30, 82, 88, 76, 69, 68, 73, 71, 71, 71, 67, 73, 85, + 68, 68, 15, 69, 77, 67, 68, 68, 68, 68, 68, 80, 72, 69, 73, 72, 11, 92, + 73, 16, 75, 65, 69, 68, 1, 0, 67, 64, 2, 73, 73, 88, 45, 42, 44, 34, + 24, 27, 26, 20, 22, 17, 17, 18, 5, 2, 71, 4, 4, 66, 84, 64, 0, 67, + 64, 74, 73, 75, 78, 82, 79, 12, 8, 11, 2, 78, 69, 70, 85, 74, 83, 86, + 102, 95, 97, 101, 64, 2, 91, 68, 66, 75, 87, 82, 83, 90, 80, 86, 83, 93, + 92, 97, 107, 72, 85, 92, 72, 4, 8, 10, 22, 15, 21, 3, 20, 18, 26, 25, + 31, 28, 16, 49, 17, 9, 3, 68, 70, 92, 96, 101, 9, 40, 34, 35, 26, 27, + 15, 13, 7, 3, 72, 4, 8, 10, 22, 15, 21, 3, 20, 18, 26, 25, 31, 28, + 16, 49, 17, 9, 3, 68, 70, 92, 96, 101, 75, 68, 1, 8, 8, 3, 23}, + + { + + 40, 6, 79, 40, 6, 79, 72, 4, 22, 11, 64, 72, 15, 36, 32, 7, 17, 66, + 77, 8, 66, 68, 23, 84, 78, 15, 7, 104, 113, 120, 65, 74, 70, 77, 8, 66, + 81, 3, 31, 1, 67, 73, 75, 93, 96, 91, 107, 64, 75, 80, 72, 86, 84, 99, + 3, 71, 71, 73, 27, 2, 22, 0, 0, 0, 65, 93, 97, 5, 66, 67, 16, 70, + 88, 6, 5, 67, 28, 26, 65, 5, 66, 15, 68, 64, 8, 101, 85, 87, 87, 20, + 68, 8, 8, 69, 82, 69, 68, 65, 75, 72, 84, 19, 73, 8, 72, 78, 64, 72, + 66, 4, 7, 5, 12, 6, 75, 64, 0, 67, 2, 72, 27, 67, 1, 26, 24, 33, + 28, 22, 91, 3, 2, 69, 67, 80, 68, 21, 65, 64, 21, 40, 58, 48, 26, 92, + 95, 5, 38, 40, 116, 70, 8, 92, 17, 67, 1, 21, 33, 55, 46, 30, 97, 79, + 69, 67, 1, 84, 17, 21, 16, 13, 17, 15, 8, 18, 15, 68, 2, 9, 3, 2, + 71, 4, 65, 69, 2, 0, 4, 1, 66, 94, 67, 68, 69, 71, 78, 14, 25, 12, + 7, 11, 12, 7, 12, 10, 85, 98, 79, 80, 78, 115, 72, 67, 96, 11, 65, 65, + 68, 69, 78, 72, 68, 88, 93, 83, 86, 88, 80, 68, 24, 19, 10, 4, 6, 0, + 68, 70, 70, 4, 34, 20, 14, 6, 19, 6, 4, 68, 8, 1, 31, 18, 13, 7, + 16, 4, 65, 68, 73, 71, 37, 19, 9, 4, 11, 65, 73, 73, 8, 37, 27, 17, + 8, 21, 7, 3, 65, 67, 62, 84, 78, 71, 74, 77, 72, 69, 66, 66, 0, 4, + 11, 74, 72, 79, 80, 33, 82, 89, 76, 68, 67, 73, 71, 71, 71, 67, 73, 86, + 68, 68, 16, 69, 77, 67, 68, 68, 68, 68, 68, 81, 73, 69, 73, 72, 12, 93, + 73, 17, 76, 65, 69, 68, 1, 0, 67, 64, 3, 73, 73, 90, 44, 41, 43, 32, + 22, 24, 23, 17, 20, 14, 14, 15, 2, 64, 73, 0, 1, 70, 89, 67, 65, 69, + 66, 76, 75, 76, 79, 83, 79, 10, 6, 9, 0, 80, 71, 71, 87, 76, 85, 88, + 104, 97, 99, 102, 65, 1, 93, 69, 68, 76, 89, 83, 84, 91, 81, 87, 83, 92, + 93, 98, 109, 73, 86, 93, 72, 5, 9, 11, 23, 16, 22, 3, 21, 19, 27, 26, + 32, 29, 17, 48, 15, 7, 1, 70, 72, 94, 98, 102, 10, 40, 35, 36, 27, 28, + 16, 14, 7, 4, 72, 5, 9, 11, 23, 16, 22, 3, 21, 19, 27, 26, 32, 29, + 17, 48, 15, 7, 1, 70, 72, 94, 98, 102, 75, 68, 1, 8, 8, 4, 25}, + + { + + 38, 5, 80, 38, 5, 80, 71, 5, 22, 11, 65, 74, 13, 35, 32, 7, 18, + 68, 77, 9, 67, 69, 24, 85, 80, 13, 5, 108, 116, 121, 0, 73, 70, 77, + 9, 67, 81, 3, 31, 0, 67, 73, 74, 95, 98, 92, 108, 65, 75, 80, 73, + 87, 84, 99, 2, 71, 71, 73, 27, 1, 22, 0, 0, 0, 65, 94, 97, 5, + 67, 68, 15, 70, 87, 8, 6, 67, 29, 27, 65, 6, 65, 15, 67, 64, 10, + 102, 86, 88, 88, 20, 69, 8, 10, 69, 82, 69, 67, 65, 75, 72, 84, 19, + 74, 8, 72, 78, 64, 72, 66, 4, 7, 6, 12, 6, 76, 65, 0, 67, 1, + 72, 27, 67, 1, 26, 24, 33, 28, 22, 92, 3, 2, 70, 67, 80, 69, 21, + 65, 64, 22, 41, 58, 49, 27, 93, 97, 5, 38, 40, 117, 71, 8, 93, 17, + 68, 1, 22, 33, 54, 46, 31, 98, 78, 71, 70, 66, 83, 16, 21, 15, 12, + 16, 14, 7, 17, 14, 69, 1, 8, 2, 1, 72, 3, 66, 70, 1, 64, 3, + 0, 67, 96, 68, 69, 69, 73, 79, 11, 22, 10, 5, 9, 9, 3, 9, 8, + 87, 101, 81, 83, 80, 117, 74, 69, 98, 9, 66, 66, 70, 71, 80, 74, 70, + 90, 93, 84, 87, 90, 79, 67, 25, 19, 10, 4, 6, 0, 68, 70, 69, 5, + 34, 20, 14, 6, 20, 7, 5, 68, 10, 1, 31, 18, 13, 7, 16, 4, 65, + 68, 72, 71, 38, 20, 9, 3, 11, 65, 73, 73, 8, 37, 27, 17, 8, 22, + 7, 3, 65, 67, 62, 83, 77, 70, 73, 76, 71, 68, 65, 65, 1, 5, 13, + 74, 72, 79, 80, 36, 82, 91, 76, 68, 67, 73, 71, 71, 72, 67, 74, 87, + 69, 68, 16, 70, 77, 68, 68, 68, 69, 69, 69, 82, 74, 70, 74, 72, 13, + 94, 74, 17, 77, 66, 69, 69, 1, 64, 68, 64, 3, 73, 73, 92, 42, 40, + 41, 30, 19, 21, 20, 14, 17, 11, 10, 12, 65, 67, 75, 67, 66, 74, 95, + 70, 68, 71, 69, 79, 77, 78, 81, 84, 79, 7, 3, 6, 65, 83, 73, 73, + 90, 78, 88, 90, 107, 100, 101, 104, 67, 64, 96, 71, 70, 78, 91, 85, 86, + 92, 82, 88, 84, 91, 95, 100, 111, 75, 88, 95, 72, 5, 9, 11, 24, 16, + 22, 3, 22, 19, 28, 26, 32, 29, 17, 47, 13, 5, 65, 73, 74, 97, 100, + 104, 10, 40, 35, 36, 27, 28, 16, 14, 7, 4, 72, 5, 9, 11, 24, 16, + 22, 3, 22, 19, 28, 26, 32, 29, 17, 47, 13, 5, 65, 73, 74, 97, 100, + 104, 75, 69, 0, 8, 8, 5, 27}, + + { + + 37, 5, 80, 37, 5, 80, 69, 7, 23, 12, 65, 75, 12, 35, 33, 8, 20, + 69, 76, 11, 67, 69, 26, 85, 81, 12, 4, 111, 118, 121, 2, 71, 69, 76, + 11, 67, 80, 4, 32, 0, 66, 72, 72, 96, 99, 92, 108, 65, 74, 79, 73, + 87, 83, 98, 2, 70, 70, 72, 28, 1, 22, 0, 0, 0, 64, 94, 97, 6, + 67, 68, 15, 69, 85, 11, 8, 66, 31, 29, 64, 8, 0, 16, 65, 0, 13, + 102, 86, 88, 88, 21, 69, 9, 13, 68, 81, 68, 65, 65, 74, 72, 83, 20, + 74, 9, 71, 77, 0, 71, 65, 5, 8, 7, 13, 7, 76, 65, 1, 66, 1, + 71, 28, 66, 1, 27, 25, 34, 29, 23, 92, 4, 3, 70, 67, 79, 69, 22, + 64, 0, 24, 43, 59, 51, 29, 93, 98, 6, 39, 41, 117, 71, 9, 93, 18, + 68, 2, 24, 33, 54, 47, 33, 99, 76, 72, 72, 69, 81, 16, 21, 15, 12, + 16, 14, 7, 17, 14, 69, 1, 8, 2, 1, 72, 3, 66, 70, 1, 64, 3, + 0, 67, 97, 68, 69, 69, 74, 79, 9, 20, 9, 4, 8, 7, 0, 7, 7, + 88, 103, 82, 85, 81, 118, 75, 70, 99, 8, 66, 66, 71, 72, 81, 75, 71, + 91, 93, 84, 87, 91, 77, 65, 27, 20, 10, 4, 7, 1, 67, 69, 67, 7, + 35, 21, 15, 7, 22, 8, 7, 67, 13, 2, 32, 19, 14, 8, 17, 5, 64, + 67, 70, 70, 40, 21, 10, 3, 12, 64, 72, 72, 9, 38, 28, 18, 9, 23, + 8, 4, 64, 66, 62, 81, 75, 68, 71, 74, 69, 66, 0, 0, 3, 7, 16, + 73, 71, 78, 79, 40, 81, 92, 75, 67, 66, 72, 71, 70, 72, 66, 74, 87, + 69, 68, 17, 70, 77, 68, 68, 67, 69, 69, 69, 82, 74, 70, 74, 71, 15, + 94, 74, 18, 77, 66, 69, 69, 2, 64, 68, 0, 4, 72, 72, 93, 41, 39, + 40, 29, 17, 19, 18, 11, 15, 9, 7, 10, 68, 69, 77, 70, 69, 77, 100, + 72, 70, 72, 71, 81, 78, 79, 82, 84, 79, 5, 1, 4, 67, 85, 74, 74, + 92, 79, 90, 91, 109, 102, 102, 105, 68, 65, 98, 72, 71, 79, 92, 86, 87, + 93, 82, 88, 84, 90, 96, 101, 112, 76, 89, 96, 71, 6, 10, 12, 26, 17, + 23, 4, 24, 20, 29, 27, 33, 30, 18, 47, 12, 4, 67, 75, 75, 99, 101, + 105, 11, 41, 36, 37, 28, 29, 17, 15, 8, 5, 71, 6, 10, 12, 26, 17, + 23, 4, 24, 20, 29, 27, 33, 30, 18, 47, 12, 4, 67, 75, 75, 99, 101, + 105, 75, 69, 0, 9, 9, 7, 30}, + + { + + 36, 5, 80, 36, 5, 80, 67, 8, 23, 12, 65, 77, 10, 34, 34, 8, 22, + 71, 76, 12, 67, 69, 27, 86, 82, 11, 2, 114, 120, 122, 4, 70, 69, 76, + 12, 67, 80, 5, 32, 0, 65, 71, 71, 97, 100, 92, 108, 65, 74, 78, 73, + 87, 83, 98, 2, 70, 70, 71, 28, 1, 22, 0, 0, 0, 0, 94, 97, 6, + 68, 68, 15, 68, 84, 13, 9, 65, 33, 31, 0, 9, 2, 17, 64, 1, 15, + 103, 86, 88, 88, 21, 69, 10, 15, 68, 80, 67, 0, 65, 74, 72, 83, 20, + 74, 9, 70, 77, 1, 71, 65, 5, 8, 8, 14, 7, 76, 65, 1, 66, 1, + 71, 28, 66, 1, 27, 25, 34, 29, 23, 93, 5, 4, 71, 67, 79, 69, 23, + 64, 0, 25, 44, 60, 53, 31, 94, 99, 6, 39, 41, 118, 71, 9, 94, 19, + 68, 2, 25, 33, 54, 47, 34, 100, 75, 73, 75, 72, 80, 16, 21, 15, 11, + 15, 14, 7, 17, 13, 70, 0, 8, 2, 1, 72, 2, 67, 71, 1, 64, 3, + 64, 68, 98, 69, 70, 69, 75, 80, 7, 18, 7, 2, 6, 5, 66, 5, 6, + 90, 105, 84, 87, 83, 119, 76, 71, 101, 7, 67, 67, 72, 74, 83, 77, 73, + 93, 93, 85, 88, 93, 76, 64, 28, 21, 10, 4, 8, 2, 66, 68, 65, 8, + 36, 22, 16, 8, 23, 9, 8, 66, 15, 3, 32, 19, 14, 8, 18, 6, 0, + 66, 69, 70, 41, 22, 10, 3, 13, 0, 72, 71, 10, 38, 29, 18, 9, 24, + 9, 5, 0, 65, 62, 80, 73, 66, 69, 73, 68, 64, 2, 1, 5, 9, 18, + 72, 70, 77, 78, 43, 81, 93, 75, 66, 65, 72, 71, 70, 72, 66, 74, 88, + 69, 68, 18, 70, 77, 68, 68, 67, 69, 69, 69, 83, 75, 70, 74, 71, 16, + 95, 74, 19, 78, 66, 69, 69, 2, 64, 68, 0, 5, 72, 72, 95, 40, 38, + 39, 27, 15, 16, 15, 8, 13, 6, 4, 7, 71, 72, 79, 74, 73, 81, 105, + 75, 72, 74, 73, 83, 80, 80, 83, 85, 79, 3, 64, 2, 69, 87, 76, 76, + 94, 81, 92, 93, 111, 104, 104, 106, 69, 66, 100, 74, 73, 81, 94, 87, 88, + 94, 83, 89, 84, 89, 97, 102, 114, 77, 90, 97, 71, 6, 11, 13, 27, 18, + 24, 4, 25, 21, 30, 27, 34, 31, 19, 46, 10, 2, 69, 77, 77, 101, 103, + 106, 12, 41, 36, 38, 29, 30, 17, 16, 8, 6, 71, 6, 11, 13, 27, 18, + 24, 4, 25, 21, 30, 27, 34, 31, 19, 46, 10, 2, 69, 77, 77, 101, 103, + 106, 75, 69, 0, 9, 9, 8, 32}, + + { + + 35, 5, 80, 35, 5, 80, 65, 10, 24, 12, 66, 79, 9, 33, 34, 8, 24, + 72, 76, 14, 67, 70, 28, 87, 83, 10, 0, 118, 122, 123, 6, 68, 69, 76, + 14, 67, 79, 6, 33, 0, 64, 70, 70, 98, 101, 92, 109, 65, 74, 77, 73, + 88, 83, 98, 2, 70, 70, 70, 29, 1, 22, 0, 0, 0, 0, 94, 97, 7, + 69, 68, 14, 68, 83, 15, 10, 64, 34, 32, 1, 10, 4, 18, 0, 2, 17, + 104, 86, 88, 88, 22, 69, 11, 17, 68, 79, 66, 2, 65, 74, 72, 83, 21, + 74, 10, 69, 77, 2, 71, 65, 5, 9, 9, 14, 7, 76, 65, 1, 66, 1, + 71, 28, 66, 1, 28, 25, 35, 29, 24, 93, 5, 5, 71, 67, 79, 69, 24, + 64, 1, 26, 45, 61, 54, 33, 94, 100, 7, 39, 41, 119, 71, 10, 94, 19, + 68, 2, 26, 33, 54, 47, 35, 101, 73, 74, 78, 75, 78, 16, 21, 14, 10, + 15, 13, 6, 17, 13, 71, 64, 8, 2, 1, 73, 2, 68, 72, 0, 64, 3, + 65, 68, 99, 69, 71, 69, 76, 80, 5, 16, 5, 1, 4, 3, 69, 2, 5, + 92, 107, 85, 89, 85, 121, 77, 72, 103, 6, 68, 68, 73, 75, 85, 79, 75, + 94, 93, 86, 89, 94, 74, 1, 30, 21, 10, 4, 9, 3, 65, 67, 0, 9, + 37, 23, 17, 8, 24, 10, 10, 65, 17, 3, 33, 20, 15, 9, 19, 6, 0, + 65, 68, 70, 43, 23, 10, 3, 14, 0, 72, 70, 11, 39, 30, 19, 9, 25, + 9, 6, 0, 65, 62, 78, 72, 65, 67, 72, 66, 0, 3, 3, 7, 11, 20, + 72, 69, 77, 77, 46, 81, 94, 75, 65, 64, 72, 71, 70, 72, 65, 74, 89, + 69, 68, 18, 70, 77, 68, 68, 67, 70, 69, 69, 84, 76, 70, 74, 71, 17, + 96, 74, 20, 79, 66, 69, 69, 3, 64, 69, 1, 6, 72, 72, 96, 39, 37, + 38, 25, 13, 14, 13, 5, 10, 3, 1, 4, 74, 75, 81, 78, 76, 85, 110, + 78, 74, 76, 75, 86, 82, 81, 84, 86, 79, 1, 66, 0, 71, 89, 78, 77, + 96, 83, 94, 95, 113, 106, 106, 107, 70, 67, 102, 75, 75, 82, 96, 89, 90, + 95, 84, 90, 84, 88, 98, 104, 115, 78, 91, 98, 71, 7, 11, 13, 28, 19, + 25, 4, 26, 22, 31, 28, 35, 31, 20, 46, 8, 0, 71, 79, 79, 103, 105, + 107, 12, 42, 37, 39, 29, 31, 18, 16, 9, 6, 71, 7, 11, 13, 28, 19, + 25, 4, 26, 22, 31, 28, 35, 31, 20, 46, 8, 0, 71, 79, 79, 103, 105, + 107, 75, 69, 0, 10, 10, 10, 34}, + + { + + 33, 5, 80, 33, 5, 80, 64, 11, 24, 12, 66, 81, 7, 32, 35, 8, 25, + 74, 76, 15, 68, 70, 29, 88, 85, 8, 65, 121, 125, 124, 8, 67, 69, 76, + 15, 68, 79, 7, 33, 64, 64, 69, 68, 99, 103, 93, 109, 65, 73, 76, 74, + 88, 83, 98, 2, 70, 70, 70, 29, 1, 22, 0, 0, 0, 1, 95, 97, 7, + 70, 68, 14, 67, 82, 17, 11, 0, 36, 34, 1, 11, 6, 19, 1, 3, 20, + 104, 87, 88, 89, 22, 70, 12, 20, 67, 79, 66, 4, 65, 74, 72, 83, 21, + 74, 10, 68, 77, 2, 70, 65, 5, 9, 10, 15, 7, 77, 66, 1, 66, 1, + 71, 28, 66, 1, 28, 25, 35, 29, 24, 94, 6, 5, 72, 67, 78, 69, 25, + 0, 1, 28, 47, 62, 56, 35, 95, 101, 7, 40, 41, 120, 71, 10, 95, 20, + 68, 2, 27, 33, 54, 47, 37, 102, 72, 75, 81, 78, 77, 15, 21, 14, 10, + 14, 13, 6, 17, 12, 72, 65, 7, 2, 1, 73, 1, 68, 73, 0, 65, 2, + 66, 69, 100, 70, 72, 69, 78, 81, 3, 14, 3, 64, 3, 1, 73, 0, 4, + 94, 110, 87, 91, 87, 122, 79, 73, 104, 5, 69, 69, 75, 77, 87, 81, 76, + 96, 93, 87, 90, 96, 73, 2, 31, 22, 10, 4, 10, 4, 64, 67, 2, 11, + 38, 23, 17, 9, 26, 11, 11, 64, 20, 4, 33, 20, 15, 9, 20, 7, 1, + 65, 67, 70, 44, 24, 10, 3, 15, 1, 72, 69, 12, 39, 31, 19, 9, 26, + 10, 7, 1, 64, 62, 77, 70, 0, 66, 71, 65, 2, 5, 4, 8, 13, 23, + 71, 68, 76, 76, 49, 80, 95, 74, 64, 64, 72, 71, 70, 73, 65, 75, 90, + 69, 68, 19, 70, 77, 68, 68, 66, 70, 70, 69, 85, 77, 71, 74, 71, 18, + 97, 75, 20, 79, 67, 69, 69, 3, 65, 69, 1, 6, 72, 72, 98, 38, 36, + 37, 24, 10, 11, 10, 2, 8, 0, 66, 1, 78, 77, 83, 82, 80, 89, 115, + 81, 76, 78, 77, 88, 84, 83, 85, 87, 79, 65, 69, 66, 73, 91, 80, 79, + 98, 85, 96, 97, 115, 109, 107, 109, 71, 68, 105, 77, 76, 84, 98, 90, 91, + 96, 85, 91, 84, 87, 100, 105, 117, 80, 92, 99, 71, 7, 12, 14, 29, 19, + 25, 5, 27, 23, 32, 28, 36, 32, 20, 45, 6, 65, 73, 81, 81, 106, 107, + 109, 13, 42, 37, 39, 30, 31, 18, 17, 9, 7, 71, 7, 12, 14, 29, 19, + 25, 5, 27, 23, 32, 28, 36, 32, 20, 45, 6, 65, 73, 81, 81, 106, 107, + 109, 75, 69, 0, 10, 10, 11, 36}, + + { + + 32, 5, 80, 32, 5, 80, 1, 13, 24, 12, 67, 83, 6, 31, 36, 8, + 27, 76, 75, 17, 68, 70, 30, 89, 86, 7, 67, 124, 126, 124, 10, 66, + 69, 75, 17, 68, 79, 8, 33, 64, 0, 68, 67, 100, 104, 93, 109, 65, + 73, 75, 74, 88, 83, 98, 2, 69, 70, 69, 30, 1, 22, 0, 0, 0, + 1, 95, 97, 8, 71, 68, 13, 67, 80, 20, 12, 1, 37, 35, 2, 12, + 8, 20, 2, 4, 22, 105, 87, 88, 89, 22, 70, 13, 22, 67, 78, 65, + 6, 65, 74, 72, 82, 21, 74, 11, 67, 76, 3, 70, 65, 6, 10, 11, + 15, 7, 77, 66, 2, 66, 1, 71, 29, 66, 1, 29, 26, 35, 29, 24, + 94, 6, 6, 72, 67, 78, 69, 26, 0, 2, 29, 48, 62, 57, 37, 95, + 102, 8, 40, 41, 121, 71, 11, 95, 21, 68, 2, 29, 33, 54, 47, 38, + 103, 70, 76, 83, 81, 76, 15, 21, 13, 9, 14, 13, 5, 17, 12, 73, + 66, 7, 2, 1, 73, 1, 69, 74, 64, 65, 2, 67, 69, 101, 71, 72, + 69, 79, 81, 1, 12, 2, 65, 1, 64, 76, 66, 3, 96, 112, 88, 93, + 89, 123, 80, 74, 106, 4, 70, 70, 76, 78, 89, 83, 78, 97, 93, 88, + 91, 98, 71, 4, 32, 22, 10, 4, 11, 5, 0, 66, 4, 12, 39, 24, + 18, 10, 27, 12, 13, 0, 22, 5, 34, 21, 16, 10, 21, 8, 1, 64, + 66, 69, 45, 25, 10, 3, 16, 1, 71, 68, 13, 39, 32, 19, 9, 27, + 11, 8, 1, 0, 62, 76, 69, 1, 64, 70, 0, 3, 7, 6, 10, 15, + 25, 70, 67, 75, 75, 52, 80, 96, 74, 0, 0, 72, 71, 70, 73, 64, + 75, 91, 69, 68, 20, 70, 77, 68, 68, 66, 71, 70, 69, 86, 78, 71, + 74, 71, 19, 97, 75, 21, 80, 67, 69, 69, 3, 65, 70, 1, 7, 72, + 72, 99, 37, 35, 36, 22, 8, 9, 7, 64, 5, 66, 69, 65, 81, 80, + 85, 86, 83, 93, 120, 84, 78, 79, 79, 91, 86, 84, 86, 88, 79, 67, + 71, 68, 75, 93, 82, 80, 100, 87, 98, 99, 117, 111, 109, 110, 72, 69, + 107, 78, 78, 85, 100, 91, 92, 97, 86, 92, 84, 86, 101, 106, 118, 81, + 93, 100, 71, 8, 12, 15, 30, 20, 26, 5, 28, 24, 33, 29, 37, 32, + 21, 45, 4, 67, 75, 83, 83, 108, 109, 110, 13, 43, 38, 40, 31, 32, + 19, 17, 9, 8, 71, 8, 12, 15, 30, 20, 26, 5, 28, 24, 33, 29, + 37, 32, 21, 45, 4, 67, 75, 83, 83, 108, 109, 110, 75, 69, 0, 10, + 10, 13, 38}, + + { + + 31, 5, 81, 31, 5, 81, 3, 14, 25, 12, 67, 84, 4, 30, 36, 9, + 29, 77, 75, 18, 68, 71, 31, 90, 87, 6, 68, 126, 126, 125, 12, 64, + 69, 75, 18, 68, 78, 9, 34, 64, 1, 67, 66, 102, 105, 93, 110, 65, + 73, 75, 74, 89, 82, 98, 1, 69, 70, 68, 30, 1, 22, 0, 0, 0, + 2, 95, 97, 8, 71, 69, 13, 66, 79, 22, 13, 2, 39, 37, 3, 13, + 10, 21, 3, 4, 24, 106, 87, 88, 89, 23, 70, 14, 24, 67, 77, 64, + 8, 65, 74, 72, 82, 22, 75, 11, 67, 76, 4, 70, 64, 6, 10, 12, + 16, 7, 77, 66, 2, 66, 1, 71, 29, 66, 1, 29, 26, 36, 30, 25, + 95, 7, 7, 73, 67, 78, 69, 27, 0, 2, 30, 49, 62, 59, 38, 96, + 103, 8, 40, 41, 121, 72, 11, 96, 21, 69, 3, 30, 33, 54, 48, 39, + 104, 69, 77, 86, 84, 74, 15, 21, 13, 8, 13, 12, 5, 16, 11, 73, + 66, 7, 1, 1, 74, 0, 70, 75, 64, 65, 2, 67, 70, 103, 71, 73, + 69, 80, 82, 65, 10, 0, 67, 64, 66, 79, 68, 1, 98, 114, 90, 95, + 91, 125, 81, 76, 108, 2, 71, 71, 77, 80, 91, 85, 80, 99, 93, 89, + 92, 99, 70, 5, 34, 23, 10, 4, 12, 5, 0, 65, 5, 13, 40, 25, + 19, 10, 28, 13, 14, 1, 24, 5, 34, 21, 16, 10, 22, 8, 2, 0, + 65, 69, 47, 26, 10, 3, 16, 2, 71, 68, 13, 40, 32, 20, 9, 28, + 11, 8, 2, 0, 62, 74, 67, 3, 1, 68, 1, 5, 8, 7, 12, 17, + 27, 70, 66, 75, 75, 55, 80, 97, 74, 0, 1, 72, 71, 70, 73, 64, + 75, 92, 69, 68, 20, 70, 77, 68, 68, 66, 71, 70, 69, 87, 79, 71, + 74, 71, 20, 98, 75, 22, 81, 67, 69, 69, 4, 65, 70, 2, 8, 72, + 72, 101, 36, 34, 35, 20, 6, 6, 5, 67, 3, 69, 72, 68, 84, 83, + 87, 89, 87, 97, 125, 86, 81, 81, 81, 93, 88, 85, 87, 89, 79, 69, + 73, 70, 77, 95, 83, 82, 102, 89, 101, 101, 119, 113, 111, 111, 73, 71, + 109, 80, 80, 87, 102, 93, 94, 98, 87, 93, 85, 85, 102, 108, 120, 82, + 95, 101, 70, 8, 13, 15, 31, 21, 27, 5, 29, 25, 34, 29, 38, 33, + 22, 44, 2, 69, 77, 85, 85, 110, 111, 111, 14, 43, 38, 41, 31, 33, + 19, 18, 10, 8, 70, 8, 13, 15, 31, 21, 27, 5, 29, 25, 34, 29, + 38, 33, 22, 44, 2, 69, 77, 85, 85, 110, 111, 111, 75, 69, 0, 11, + 11, 14, 41}, + + { + + 30, 5, 81, 30, 5, 81, 5, 16, 25, 12, 68, 86, 3, 29, 37, 9, + 30, 79, 75, 20, 69, 71, 32, 91, 88, 5, 70, 126, 126, 126, 14, 0, + 69, 75, 20, 69, 78, 10, 34, 64, 1, 66, 64, 103, 106, 93, 110, 65, + 72, 74, 75, 89, 82, 98, 1, 69, 70, 67, 31, 1, 22, 0, 0, 0, + 2, 95, 97, 9, 72, 69, 12, 66, 78, 24, 14, 3, 40, 38, 4, 14, + 12, 22, 4, 5, 27, 106, 88, 88, 90, 23, 70, 15, 27, 66, 77, 64, + 10, 65, 74, 72, 82, 22, 75, 12, 66, 76, 5, 69, 64, 6, 11, 13, + 16, 7, 78, 66, 2, 66, 1, 71, 29, 66, 1, 30, 26, 36, 30, 25, + 95, 7, 7, 73, 67, 77, 69, 28, 1, 3, 32, 51, 62, 60, 40, 96, + 104, 9, 41, 41, 122, 72, 12, 96, 22, 69, 3, 31, 33, 54, 48, 41, + 105, 67, 78, 89, 87, 73, 15, 21, 12, 8, 13, 12, 4, 16, 11, 74, + 67, 7, 1, 1, 74, 0, 70, 76, 65, 65, 2, 68, 70, 104, 72, 74, + 69, 81, 82, 67, 8, 65, 68, 65, 68, 82, 71, 0, 100, 116, 91, 97, + 93, 126, 82, 77, 109, 1, 72, 72, 79, 81, 93, 87, 81, 100, 93, 90, + 93, 101, 68, 7, 35, 23, 10, 4, 13, 6, 1, 65, 7, 15, 41, 26, + 19, 11, 30, 14, 16, 2, 27, 6, 35, 22, 17, 11, 23, 9, 2, 1, + 64, 69, 48, 27, 10, 3, 17, 2, 71, 67, 14, 40, 33, 20, 9, 29, + 12, 9, 2, 1, 62, 73, 66, 4, 2, 67, 3, 6, 10, 9, 14, 19, + 30, 69, 65, 74, 74, 58, 79, 98, 73, 1, 2, 72, 71, 70, 73, 0, + 76, 93, 69, 68, 21, 70, 77, 68, 68, 65, 72, 71, 69, 88, 80, 72, + 74, 71, 21, 99, 76, 23, 81, 68, 69, 69, 4, 66, 71, 2, 8, 72, + 72, 102, 35, 33, 34, 19, 3, 4, 2, 70, 0, 72, 75, 71, 87, 85, + 89, 93, 90, 101, 126, 89, 83, 83, 83, 96, 90, 86, 88, 90, 79, 71, + 76, 73, 79, 97, 85, 83, 104, 91, 103, 103, 121, 115, 112, 113, 74, 72, + 111, 81, 81, 88, 104, 94, 95, 99, 88, 94, 85, 84, 104, 109, 121, 84, + 96, 102, 70, 9, 13, 16, 32, 22, 27, 6, 30, 26, 35, 30, 39, 33, + 22, 44, 0, 71, 79, 87, 87, 113, 113, 112, 14, 44, 39, 41, 32, 34, + 20, 18, 10, 9, 70, 9, 13, 16, 32, 22, 27, 6, 30, 26, 35, 30, + 39, 33, 22, 44, 0, 71, 79, 87, 87, 113, 113, 112, 75, 69, 0, 11, + 11, 16, 43}, + + { + + 28, 4, 81, 28, 4, 81, 6, 17, 25, 12, 68, 88, 1, 28, 37, 9, + 32, 81, 75, 21, 69, 72, 33, 92, 90, 3, 72, 126, 126, 126, 16, 1, + 69, 75, 21, 69, 78, 10, 34, 65, 2, 65, 0, 104, 108, 94, 111, 65, + 72, 73, 75, 90, 82, 98, 1, 69, 70, 67, 31, 1, 22, 0, 0, 0, + 3, 96, 97, 9, 73, 69, 12, 65, 77, 26, 15, 3, 42, 40, 4, 15, + 14, 22, 5, 6, 29, 107, 88, 89, 90, 23, 71, 15, 29, 66, 76, 0, + 11, 65, 74, 72, 82, 22, 75, 12, 65, 76, 5, 69, 64, 6, 11, 14, + 17, 7, 78, 67, 2, 66, 0, 71, 29, 66, 1, 30, 26, 36, 30, 25, + 96, 8, 8, 74, 67, 77, 69, 29, 1, 3, 33, 52, 62, 62, 42, 97, + 105, 9, 41, 41, 123, 72, 12, 97, 22, 69, 3, 32, 33, 54, 48, 42, + 106, 66, 79, 92, 91, 72, 14, 21, 12, 7, 12, 11, 4, 16, 10, 75, + 68, 6, 1, 0, 75, 64, 71, 77, 65, 66, 1, 69, 71, 105, 73, 75, + 69, 83, 83, 69, 6, 67, 70, 67, 71, 86, 73, 64, 102, 119, 93, 100, + 95, 126, 84, 78, 111, 0, 73, 73, 80, 83, 95, 89, 83, 102, 93, 91, + 94, 103, 67, 8, 36, 24, 10, 4, 13, 7, 2, 64, 9, 16, 41, 26, + 20, 11, 31, 15, 17, 3, 29, 6, 35, 22, 17, 11, 23, 9, 3, 1, + 0, 69, 49, 28, 10, 3, 18, 3, 71, 66, 15, 40, 34, 20, 9, 30, + 12, 10, 3, 1, 62, 72, 64, 6, 4, 66, 4, 8, 11, 10, 15, 21, + 32, 69, 64, 74, 73, 61, 79, 99, 73, 2, 2, 72, 71, 70, 74, 0, + 76, 94, 69, 68, 21, 70, 77, 69, 68, 65, 72, 71, 70, 89, 81, 72, + 75, 71, 22, 100, 76, 23, 82, 68, 69, 70, 4, 66, 71, 2, 9, 72, + 72, 104, 34, 32, 33, 17, 1, 1, 64, 73, 65, 75, 79, 74, 91, 88, + 91, 97, 94, 105, 126, 92, 85, 85, 86, 98, 92, 88, 90, 91, 79, 74, + 78, 75, 81, 100, 87, 85, 107, 93, 105, 105, 123, 118, 114, 114, 76, 73, + 114, 83, 83, 90, 106, 96, 97, 100, 89, 95, 85, 83, 105, 111, 123, 85, + 97, 103, 70, 9, 14, 16, 33, 22, 28, 6, 31, 26, 36, 30, 39, 34, + 23, 43, 65, 73, 81, 89, 89, 115, 115, 114, 15, 44, 39, 42, 32, 34, + 20, 19, 10, 9, 70, 9, 14, 16, 33, 22, 28, 6, 31, 26, 36, 30, + 39, 34, 23, 43, 65, 73, 81, 89, 89, 115, 115, 114, 75, 70, 64, 11, + 11, 17, 45}, + + { + + 27, 4, 81, 27, 4, 81, 8, 18, 26, 12, 68, 90, 64, 28, 38, 9, + 34, 82, 74, 23, 69, 72, 34, 92, 91, 2, 74, 126, 126, 126, 18, 3, + 68, 74, 23, 69, 77, 11, 35, 65, 3, 64, 1, 105, 109, 94, 111, 65, + 72, 72, 75, 90, 82, 98, 1, 68, 69, 66, 31, 1, 22, 0, 0, 0, + 4, 96, 97, 9, 74, 69, 12, 64, 75, 29, 16, 4, 44, 42, 5, 16, + 16, 23, 7, 7, 31, 108, 88, 89, 90, 24, 71, 16, 31, 66, 75, 1, + 13, 65, 73, 72, 81, 23, 75, 13, 64, 75, 6, 69, 64, 7, 12, 15, + 18, 7, 78, 67, 3, 65, 0, 71, 30, 66, 1, 30, 27, 37, 30, 26, + 96, 9, 9, 75, 67, 77, 69, 30, 1, 4, 34, 53, 62, 62, 44, 98, + 106, 10, 41, 42, 124, 72, 12, 97, 23, 69, 3, 34, 33, 54, 48, 43, + 107, 65, 80, 94, 94, 70, 14, 21, 12, 6, 12, 11, 4, 16, 10, 76, + 69, 6, 1, 0, 75, 64, 72, 77, 65, 66, 1, 70, 72, 106, 73, 75, + 69, 84, 83, 71, 4, 68, 71, 69, 73, 89, 75, 65, 104, 121, 94, 102, + 96, 126, 85, 79, 113, 64, 74, 74, 81, 85, 96, 91, 85, 103, 93, 91, + 95, 104, 65, 10, 38, 25, 10, 4, 14, 8, 3, 0, 11, 17, 42, 27, + 21, 12, 32, 16, 18, 4, 31, 7, 36, 23, 18, 11, 24, 10, 4, 2, + 2, 68, 51, 29, 11, 3, 19, 4, 70, 65, 16, 41, 35, 21, 10, 31, + 13, 11, 4, 2, 62, 70, 1, 8, 6, 65, 5, 10, 13, 12, 17, 23, + 34, 68, 0, 73, 72, 62, 79, 100, 73, 3, 3, 71, 71, 70, 74, 0, + 76, 95, 69, 68, 22, 70, 77, 69, 68, 65, 72, 71, 70, 89, 82, 72, + 75, 70, 24, 100, 76, 24, 83, 68, 69, 70, 5, 66, 71, 3, 10, 71, + 71, 106, 33, 31, 32, 15, 64, 65, 66, 76, 67, 77, 82, 76, 94, 91, + 93, 101, 97, 108, 126, 95, 87, 86, 88, 100, 93, 89, 91, 92, 79, 76, + 80, 77, 83, 102, 89, 86, 109, 95, 107, 107, 125, 120, 116, 115, 77, 74, + 116, 84, 85, 91, 108, 97, 98, 101, 90, 95, 85, 82, 106, 112, 125, 86, + 98, 104, 70, 10, 15, 17, 35, 23, 29, 6, 32, 27, 37, 31, 40, 35, + 24, 42, 66, 75, 83, 91, 91, 117, 117, 115, 16, 44, 40, 43, 33, 35, + 21, 20, 11, 10, 70, 10, 15, 17, 35, 23, 29, 6, 32, 27, 37, 31, + 40, 35, 24, 42, 66, 75, 83, 91, 91, 117, 117, 115, 75, 70, 64, 12, + 12, 18, 47}, + + { + + 26, 4, 81, 26, 4, 81, 10, 20, 26, 12, 69, 92, 65, 27, 39, 9, + 35, 84, 74, 24, 70, 72, 35, 93, 92, 1, 76, 126, 126, 126, 20, 4, + 68, 74, 24, 70, 77, 12, 35, 65, 3, 0, 3, 106, 110, 94, 111, 65, + 71, 71, 76, 90, 82, 98, 1, 68, 69, 65, 32, 1, 22, 0, 0, 0, + 4, 96, 97, 10, 75, 69, 11, 64, 74, 31, 17, 5, 45, 43, 6, 17, + 18, 24, 8, 8, 34, 108, 89, 89, 91, 24, 71, 17, 34, 65, 75, 1, + 15, 65, 73, 72, 81, 23, 75, 13, 0, 75, 7, 68, 64, 7, 12, 16, + 18, 7, 79, 67, 3, 65, 0, 71, 30, 66, 1, 31, 27, 37, 30, 26, + 97, 9, 9, 75, 67, 76, 69, 31, 2, 4, 36, 55, 62, 62, 46, 98, + 107, 10, 42, 42, 125, 72, 13, 98, 24, 69, 3, 35, 33, 54, 48, 45, + 108, 0, 81, 97, 97, 69, 14, 21, 11, 6, 11, 11, 3, 16, 9, 77, + 70, 6, 1, 0, 75, 65, 72, 78, 66, 66, 1, 71, 72, 107, 74, 76, + 69, 85, 84, 73, 2, 70, 73, 70, 75, 92, 78, 66, 106, 123, 96, 104, + 98, 126, 86, 80, 114, 65, 75, 75, 83, 86, 98, 93, 86, 105, 93, 92, + 96, 106, 64, 11, 39, 25, 10, 4, 15, 9, 4, 0, 13, 19, 43, 28, + 21, 13, 34, 17, 20, 5, 34, 8, 36, 23, 18, 12, 25, 11, 4, 3, + 3, 68, 52, 30, 11, 3, 20, 4, 70, 64, 17, 41, 36, 21, 10, 32, + 14, 12, 4, 3, 62, 69, 2, 9, 7, 64, 7, 11, 15, 13, 19, 25, + 37, 67, 1, 72, 71, 62, 78, 101, 72, 4, 4, 71, 71, 70, 74, 1, + 77, 96, 69, 68, 23, 70, 77, 69, 68, 64, 73, 72, 70, 90, 83, 73, + 75, 70, 25, 101, 77, 25, 83, 69, 69, 70, 5, 67, 72, 3, 10, 71, + 71, 107, 32, 30, 31, 14, 67, 67, 69, 79, 70, 80, 85, 79, 97, 93, + 95, 105, 101, 112, 126, 98, 89, 88, 90, 103, 95, 90, 92, 93, 79, 78, + 83, 80, 85, 104, 91, 88, 111, 97, 109, 109, 126, 122, 117, 117, 78, 75, + 118, 86, 86, 93, 110, 98, 99, 102, 91, 96, 85, 81, 108, 113, 126, 88, + 99, 105, 70, 10, 15, 18, 36, 24, 29, 7, 33, 28, 38, 31, 41, 35, + 24, 42, 68, 77, 85, 93, 93, 120, 119, 116, 16, 45, 40, 43, 34, 36, + 21, 20, 11, 11, 70, 10, 15, 18, 36, 24, 29, 7, 33, 28, 38, 31, + 41, 35, 24, 42, 68, 77, 85, 93, 93, 120, 119, 116, 75, 70, 64, 12, + 12, 20, 49}, + + { + + 25, 4, 82, 25, 4, 82, 12, 21, 27, 12, 69, 93, 67, 26, 39, 10, + 37, 85, 74, 26, 70, 73, 36, 94, 93, 0, 77, 126, 126, 126, 22, 6, + 68, 74, 26, 70, 76, 13, 36, 65, 4, 1, 4, 108, 111, 94, 112, 65, + 71, 71, 76, 91, 81, 98, 0, 68, 69, 64, 32, 1, 22, 0, 0, 0, + 5, 96, 97, 10, 75, 70, 11, 0, 73, 33, 18, 6, 47, 45, 7, 18, + 20, 25, 9, 8, 36, 109, 89, 89, 91, 25, 71, 18, 36, 65, 74, 2, + 17, 65, 73, 72, 81, 24, 76, 14, 0, 75, 8, 68, 0, 7, 13, 17, + 19, 7, 79, 67, 3, 65, 0, 71, 30, 66, 1, 31, 27, 38, 31, 27, + 97, 10, 10, 76, 67, 76, 69, 32, 2, 5, 37, 56, 62, 62, 47, 99, + 108, 11, 42, 42, 125, 73, 13, 98, 24, 70, 4, 36, 33, 54, 49, 46, + 109, 1, 82, 100, 100, 67, 14, 21, 11, 5, 11, 10, 3, 15, 9, 77, + 70, 6, 0, 0, 76, 65, 73, 79, 66, 66, 1, 71, 73, 109, 74, 77, + 69, 86, 84, 76, 0, 72, 74, 72, 77, 95, 80, 68, 108, 125, 97, 106, + 100, 126, 87, 82, 116, 67, 76, 76, 84, 88, 100, 95, 88, 106, 93, 93, + 97, 107, 1, 13, 41, 26, 10, 4, 16, 9, 4, 1, 14, 20, 44, 29, + 22, 13, 35, 18, 21, 6, 36, 8, 37, 24, 19, 12, 26, 11, 5, 4, + 4, 68, 54, 31, 11, 3, 20, 5, 70, 64, 17, 42, 36, 22, 10, 33, + 14, 12, 5, 3, 62, 67, 4, 11, 9, 1, 8, 13, 16, 15, 21, 27, + 39, 67, 2, 72, 71, 62, 78, 102, 72, 4, 5, 71, 71, 70, 74, 1, + 77, 97, 69, 68, 23, 70, 77, 69, 68, 64, 73, 72, 70, 91, 84, 73, + 75, 70, 26, 102, 77, 26, 84, 69, 69, 70, 6, 67, 72, 4, 11, 71, + 71, 109, 31, 29, 30, 12, 69, 70, 71, 82, 72, 83, 88, 82, 100, 96, + 97, 108, 104, 116, 126, 100, 92, 90, 92, 105, 97, 91, 93, 94, 79, 80, + 85, 82, 87, 106, 92, 89, 113, 99, 112, 111, 126, 124, 119, 118, 79, 77, + 120, 87, 88, 94, 112, 100, 101, 103, 92, 97, 86, 80, 109, 115, 126, 89, + 101, 106, 69, 11, 16, 18, 37, 25, 30, 7, 34, 29, 39, 32, 42, 36, + 25, 41, 70, 79, 87, 95, 95, 122, 121, 117, 17, 45, 41, 44, 34, 37, + 22, 21, 12, 11, 69, 11, 16, 18, 37, 25, 30, 7, 34, 29, 39, 32, + 42, 36, 25, 41, 70, 79, 87, 95, 95, 122, 121, 117, 75, 70, 64, 13, + 13, 21, 52}, + + { + + 23, 4, 82, 23, 4, 82, 13, 23, 27, 12, 70, 95, 68, 25, 40, 10, + 39, 87, 74, 27, 70, 73, 37, 95, 95, 65, 79, 126, 126, 126, 24, 7, + 68, 74, 27, 70, 76, 14, 36, 66, 5, 2, 5, 109, 113, 95, 112, 65, + 71, 70, 76, 91, 81, 98, 0, 68, 69, 64, 33, 1, 22, 0, 0, 0, + 5, 97, 97, 11, 76, 70, 10, 0, 72, 35, 19, 7, 48, 46, 7, 19, + 22, 26, 10, 9, 38, 110, 89, 89, 91, 25, 72, 19, 38, 65, 73, 3, + 19, 65, 73, 72, 81, 24, 76, 14, 1, 75, 8, 68, 0, 7, 13, 18, + 19, 7, 79, 68, 3, 65, 0, 71, 30, 66, 1, 32, 27, 38, 31, 27, + 98, 10, 11, 76, 67, 76, 69, 33, 2, 5, 38, 57, 62, 62, 49, 99, + 109, 11, 42, 42, 126, 73, 14, 99, 25, 70, 4, 37, 33, 54, 49, 47, + 110, 3, 83, 103, 103, 66, 13, 21, 10, 4, 10, 10, 2, 15, 8, 78, + 71, 5, 0, 0, 76, 66, 74, 80, 67, 67, 0, 72, 73, 110, 75, 78, + 69, 88, 85, 78, 65, 74, 76, 74, 79, 99, 83, 69, 110, 126, 99, 108, + 102, 126, 89, 83, 118, 68, 77, 77, 85, 89, 102, 97, 90, 108, 93, 94, + 98, 109, 2, 14, 42, 26, 10, 4, 17, 10, 5, 2, 16, 21, 45, 29, + 23, 14, 36, 19, 23, 7, 38, 9, 37, 24, 19, 13, 27, 12, 5, 4, + 5, 68, 55, 32, 11, 3, 21, 5, 70, 0, 18, 42, 37, 22, 10, 34, + 15, 13, 5, 4, 62, 66, 5, 12, 11, 2, 10, 14, 18, 16, 22, 29, + 41, 66, 3, 71, 70, 62, 78, 103, 72, 5, 5, 71, 71, 70, 75, 2, + 77, 98, 69, 68, 24, 70, 77, 69, 68, 64, 74, 72, 70, 92, 85, 73, + 75, 70, 27, 103, 77, 26, 85, 69, 69, 70, 6, 67, 73, 4, 12, 71, + 71, 110, 30, 28, 29, 10, 71, 72, 74, 85, 75, 86, 92, 85, 104, 99, + 99, 112, 108, 120, 126, 103, 94, 92, 94, 108, 99, 93, 94, 95, 79, 83, + 87, 84, 89, 108, 94, 91, 115, 101, 114, 113, 126, 126, 121, 119, 80, 78, + 123, 89, 90, 96, 114, 101, 102, 104, 93, 98, 86, 79, 110, 116, 126, 90, + 102, 107, 69, 11, 16, 19, 38, 25, 31, 7, 35, 30, 40, 32, 43, 36, + 26, 41, 72, 81, 89, 97, 97, 124, 123, 119, 17, 46, 41, 45, 35, 37, + 22, 21, 12, 12, 69, 11, 16, 19, 38, 25, 31, 7, 35, 30, 40, 32, + 43, 36, 26, 41, 72, 81, 89, 97, 97, 124, 123, 119, 75, 70, 64, 13, + 13, 23, 54}, + + { + + 22, 4, 82, 22, 4, 82, 15, 24, 27, 12, 70, 97, 70, 24, 41, 10, + 40, 89, 73, 29, 71, 73, 38, 96, 96, 66, 81, 126, 126, 126, 26, 8, + 68, 73, 29, 71, 76, 15, 36, 66, 5, 3, 7, 110, 114, 95, 112, 65, + 70, 69, 77, 91, 81, 98, 0, 67, 69, 0, 33, 1, 22, 0, 0, 0, + 6, 97, 97, 11, 77, 70, 10, 1, 70, 38, 20, 8, 50, 48, 8, 20, + 24, 27, 11, 10, 41, 110, 90, 89, 92, 25, 72, 20, 41, 64, 73, 3, + 21, 65, 73, 72, 80, 24, 76, 15, 2, 74, 9, 67, 0, 8, 14, 19, + 20, 7, 80, 68, 4, 65, 0, 71, 31, 66, 1, 32, 28, 38, 31, 27, + 98, 11, 11, 77, 67, 75, 69, 34, 3, 6, 40, 59, 62, 62, 51, 100, + 110, 12, 43, 42, 126, 73, 14, 99, 26, 70, 4, 39, 33, 54, 49, 49, + 111, 4, 84, 105, 106, 65, 13, 21, 10, 4, 10, 10, 2, 15, 8, 79, + 72, 5, 0, 0, 76, 66, 74, 81, 67, 67, 0, 73, 74, 111, 76, 78, + 69, 89, 85, 80, 67, 75, 77, 75, 81, 102, 85, 70, 112, 126, 100, 110, + 104, 126, 90, 84, 119, 69, 78, 78, 87, 91, 104, 99, 91, 109, 93, 95, + 99, 111, 4, 16, 43, 27, 10, 4, 18, 11, 6, 2, 18, 23, 46, 30, + 23, 15, 38, 20, 24, 8, 41, 10, 38, 25, 20, 13, 28, 13, 6, 5, + 6, 67, 56, 33, 11, 3, 22, 6, 69, 1, 19, 42, 38, 22, 10, 35, + 16, 14, 6, 5, 62, 65, 7, 14, 12, 3, 11, 16, 20, 18, 24, 31, + 44, 65, 4, 70, 69, 62, 77, 104, 71, 6, 6, 71, 71, 70, 75, 2, + 78, 99, 69, 68, 25, 70, 77, 69, 68, 0, 74, 73, 70, 93, 86, 74, + 75, 70, 28, 103, 78, 27, 85, 70, 69, 70, 6, 68, 73, 4, 12, 71, + 71, 112, 29, 27, 28, 9, 74, 75, 77, 88, 77, 89, 95, 88, 107, 101, + 101, 116, 111, 124, 126, 106, 96, 93, 96, 110, 101, 94, 95, 96, 79, 85, + 90, 87, 91, 110, 96, 92, 117, 103, 116, 115, 126, 126, 122, 121, 81, 79, + 125, 90, 91, 97, 116, 102, 103, 105, 94, 99, 86, 78, 112, 117, 126, 92, + 103, 108, 69, 12, 17, 20, 39, 26, 31, 8, 36, 31, 41, 33, 44, 37, + 26, 40, 74, 83, 91, 99, 99, 126, 125, 120, 18, 46, 42, 45, 36, 38, + 23, 22, 12, 13, 69, 12, 17, 20, 39, 26, 31, 8, 36, 31, 41, 33, + 44, 37, 26, 40, 74, 83, 91, 99, 99, 126, 125, 120, 75, 70, 64, 13, + 13, 24, 56}, + + { + + 21, 4, 82, 21, 4, 82, 17, 26, 28, 12, 71, 99, 71, 23, 41, 10, + 42, 90, 73, 30, 71, 74, 39, 97, 97, 67, 83, 126, 126, 126, 28, 10, + 68, 73, 30, 71, 75, 16, 37, 66, 6, 4, 8, 111, 115, 95, 113, 65, + 70, 68, 77, 92, 81, 98, 0, 67, 69, 1, 34, 1, 22, 0, 0, 0, + 6, 97, 97, 12, 78, 70, 9, 1, 69, 40, 21, 9, 51, 49, 9, 21, + 26, 28, 12, 11, 43, 111, 90, 89, 92, 26, 72, 21, 43, 64, 72, 4, + 23, 65, 73, 72, 80, 25, 76, 15, 3, 74, 10, 67, 0, 8, 14, 20, + 20, 7, 80, 68, 4, 65, 0, 71, 31, 66, 1, 33, 28, 39, 31, 28, + 99, 11, 12, 77, 67, 75, 69, 35, 3, 6, 41, 60, 62, 62, 53, 100, + 111, 12, 43, 42, 126, 73, 15, 100, 26, 70, 4, 40, 33, 54, 49, 50, + 112, 6, 85, 108, 109, 0, 13, 21, 9, 3, 9, 9, 1, 15, 7, 80, + 73, 5, 0, 0, 77, 67, 75, 82, 68, 67, 0, 74, 74, 112, 76, 79, + 69, 90, 86, 82, 69, 77, 79, 77, 83, 105, 88, 71, 114, 126, 102, 112, + 106, 126, 91, 85, 121, 70, 79, 79, 88, 92, 106, 101, 93, 111, 93, 96, + 100, 112, 5, 17, 45, 27, 10, 4, 19, 12, 7, 3, 20, 24, 47, 31, + 24, 15, 39, 21, 26, 9, 43, 10, 38, 25, 20, 14, 29, 13, 6, 6, + 7, 67, 58, 34, 11, 3, 23, 6, 69, 2, 20, 43, 39, 23, 10, 36, + 16, 15, 6, 5, 62, 0, 8, 15, 14, 4, 13, 17, 21, 19, 26, 33, + 46, 65, 5, 70, 68, 62, 77, 105, 71, 7, 7, 71, 71, 70, 75, 3, + 78, 100, 69, 68, 25, 70, 77, 69, 68, 0, 75, 73, 70, 94, 87, 74, + 75, 70, 29, 104, 78, 28, 86, 70, 69, 70, 7, 68, 74, 5, 13, 71, + 71, 113, 28, 26, 27, 7, 76, 77, 79, 91, 80, 92, 98, 91, 110, 104, + 103, 120, 115, 126, 126, 109, 98, 95, 98, 113, 103, 95, 96, 97, 79, 87, + 92, 89, 93, 112, 98, 94, 119, 105, 118, 117, 126, 126, 124, 122, 82, 80, + 126, 92, 93, 99, 118, 104, 105, 106, 95, 100, 86, 77, 113, 119, 126, 93, + 104, 109, 69, 12, 17, 20, 40, 27, 32, 8, 37, 32, 42, 33, 45, 37, + 27, 40, 76, 85, 93, 101, 101, 126, 126, 121, 18, 47, 42, 46, 36, 39, + 23, 22, 13, 13, 69, 12, 17, 20, 40, 27, 32, 8, 37, 32, 42, 33, + 45, 37, 27, 40, 76, 85, 93, 101, 101, 126, 126, 121, 75, 70, 64, 14, + 14, 26, 58}, + + { + + 20, 4, 82, 20, 4, 82, 19, 27, 28, 12, 71, 101, 73, 22, 42, 10, + 44, 92, 73, 32, 71, 74, 40, 98, 98, 68, 85, 126, 126, 126, 30, 11, + 68, 73, 32, 71, 75, 17, 37, 66, 7, 5, 9, 112, 116, 95, 113, 65, + 70, 67, 77, 92, 81, 98, 0, 67, 69, 2, 34, 1, 22, 0, 0, 0, + 7, 97, 97, 12, 79, 70, 9, 2, 68, 42, 22, 10, 53, 51, 10, 22, + 28, 29, 13, 12, 45, 112, 90, 89, 92, 26, 72, 22, 45, 64, 71, 5, + 25, 65, 73, 72, 80, 25, 76, 16, 4, 74, 11, 67, 0, 8, 15, 21, + 21, 7, 80, 68, 4, 65, 0, 71, 31, 66, 1, 33, 28, 39, 31, 28, + 99, 12, 13, 78, 67, 75, 69, 36, 3, 7, 42, 61, 62, 62, 55, 101, + 112, 13, 43, 42, 126, 73, 15, 100, 27, 70, 4, 41, 33, 54, 49, 51, + 113, 7, 86, 111, 112, 1, 13, 21, 9, 2, 9, 9, 1, 15, 7, 81, + 74, 5, 0, 0, 77, 67, 76, 83, 68, 67, 0, 75, 75, 113, 77, 80, + 69, 91, 86, 84, 71, 79, 80, 79, 85, 108, 90, 72, 116, 126, 103, 114, + 108, 126, 92, 86, 123, 71, 80, 80, 89, 94, 108, 103, 95, 112, 93, 97, + 101, 114, 7, 19, 46, 28, 10, 4, 20, 13, 8, 4, 22, 25, 48, 32, + 25, 16, 40, 22, 27, 10, 45, 11, 39, 26, 21, 14, 30, 14, 7, 7, + 8, 67, 59, 35, 11, 3, 24, 7, 69, 3, 21, 43, 40, 23, 10, 37, + 17, 16, 7, 6, 62, 1, 10, 17, 16, 5, 14, 19, 23, 21, 28, 35, + 48, 64, 6, 69, 67, 62, 77, 106, 71, 8, 8, 71, 71, 70, 75, 3, + 78, 101, 69, 68, 26, 70, 77, 69, 68, 0, 75, 73, 70, 95, 88, 74, + 75, 70, 30, 105, 78, 29, 87, 70, 69, 70, 7, 68, 74, 5, 14, 71, + 71, 115, 27, 25, 26, 5, 78, 80, 82, 94, 82, 95, 101, 94, 113, 107, + 105, 124, 118, 126, 126, 112, 100, 97, 100, 115, 105, 96, 97, 98, 79, 89, + 94, 91, 95, 114, 100, 95, 121, 107, 120, 119, 126, 126, 126, 123, 83, 81, + 126, 93, 95, 100, 120, 105, 106, 107, 96, 101, 86, 76, 114, 120, 126, 94, + 105, 110, 69, 13, 18, 21, 41, 28, 33, 8, 38, 33, 43, 34, 46, 38, + 28, 39, 78, 87, 95, 103, 103, 126, 126, 122, 19, 47, 43, 47, 37, 40, + 24, 23, 13, 14, 69, 13, 18, 21, 41, 28, 33, 8, 38, 33, 43, 34, + 46, 38, 28, 39, 78, 87, 95, 103, 103, 126, 126, 122, 75, 70, 64, 14, + 14, 27, 60}, + + { + + 18, 3, 83, 18, 3, 83, 20, 28, 28, 12, 72, 103, 75, 21, 42, 10, + 45, 94, 73, 33, 72, 75, 41, 99, 100, 70, 87, 126, 126, 126, 32, 12, + 68, 73, 33, 72, 75, 17, 37, 67, 7, 5, 10, 114, 118, 96, 114, 66, + 70, 67, 78, 93, 81, 98, 64, 67, 69, 2, 34, 0, 22, 0, 0, 0, + 7, 98, 97, 12, 80, 71, 8, 2, 67, 44, 23, 10, 54, 52, 10, 23, + 29, 29, 14, 12, 47, 113, 91, 90, 93, 26, 73, 22, 47, 64, 71, 5, + 26, 65, 73, 72, 80, 25, 77, 16, 4, 74, 11, 67, 0, 8, 15, 22, + 21, 7, 81, 69, 4, 65, 64, 71, 31, 66, 1, 33, 28, 39, 31, 28, + 100, 12, 13, 79, 67, 75, 70, 36, 3, 7, 43, 62, 62, 62, 56, 102, + 114, 13, 43, 42, 126, 74, 15, 101, 27, 71, 4, 42, 33, 53, 49, 52, + 114, 8, 88, 114, 116, 2, 12, 21, 8, 1, 8, 8, 0, 14, 6, 82, + 75, 4, 64, 64, 78, 68, 77, 84, 69, 68, 64, 76, 76, 115, 78, 81, + 69, 93, 87, 87, 74, 81, 82, 81, 88, 112, 93, 74, 118, 126, 105, 117, + 110, 126, 94, 88, 125, 73, 81, 81, 91, 96, 110, 105, 97, 114, 93, 98, + 102, 116, 8, 20, 47, 28, 10, 4, 20, 13, 8, 4, 23, 26, 48, 32, + 25, 16, 41, 23, 28, 10, 47, 11, 39, 26, 21, 14, 30, 14, 7, 7, + 9, 67, 60, 36, 11, 2, 24, 7, 69, 3, 21, 43, 40, 23, 10, 38, + 17, 16, 7, 6, 62, 2, 11, 18, 17, 6, 15, 20, 24, 22, 29, 36, + 50, 64, 6, 69, 67, 62, 77, 108, 71, 8, 8, 71, 71, 70, 76, 3, + 79, 102, 70, 68, 26, 71, 77, 70, 68, 0, 76, 74, 71, 96, 89, 75, + 76, 70, 31, 106, 79, 29, 88, 71, 69, 71, 7, 69, 75, 5, 14, 71, + 71, 117, 25, 24, 24, 3, 81, 83, 85, 97, 85, 98, 105, 97, 117, 110, + 107, 126, 122, 126, 126, 115, 103, 99, 103, 118, 107, 98, 99, 99, 79, 92, + 97, 94, 97, 117, 102, 97, 124, 109, 123, 121, 126, 126, 126, 125, 85, 83, + 126, 95, 97, 102, 122, 107, 108, 108, 97, 102, 87, 75, 116, 122, 126, 96, + 107, 112, 69, 13, 18, 21, 42, 28, 33, 8, 39, 33, 44, 34, 46, 38, + 28, 38, 80, 89, 98, 106, 105, 126, 126, 124, 19, 47, 43, 47, 37, 40, + 24, 23, 13, 14, 69, 13, 18, 21, 42, 28, 33, 8, 39, 33, 44, 34, + 46, 38, 28, 38, 80, 89, 98, 106, 105, 126, 126, 124, 75, 71, 65, 14, + 14, 28, 62}, + + { + + 17, 3, 83, 17, 3, 83, 22, 30, 29, 13, 72, 104, 76, 21, 43, 11, + 47, 95, 72, 35, 72, 75, 43, 99, 101, 71, 88, 126, 126, 126, 34, 14, + 67, 72, 35, 72, 74, 18, 38, 67, 8, 6, 12, 115, 119, 96, 114, 66, + 69, 66, 78, 93, 80, 97, 64, 66, 68, 3, 35, 0, 22, 0, 0, 0, + 8, 98, 97, 13, 80, 71, 8, 3, 65, 47, 25, 11, 56, 54, 11, 25, + 31, 30, 16, 13, 50, 113, 91, 90, 93, 27, 73, 23, 50, 0, 70, 6, + 28, 65, 72, 72, 79, 26, 77, 17, 5, 73, 12, 66, 1, 9, 16, 23, + 22, 8, 81, 69, 5, 64, 64, 70, 32, 65, 1, 34, 29, 40, 32, 29, + 100, 13, 14, 79, 67, 74, 70, 37, 4, 8, 45, 62, 62, 62, 58, 102, + 115, 14, 44, 43, 126, 74, 16, 101, 28, 71, 5, 44, 33, 53, 50, 54, + 115, 10, 89, 116, 119, 4, 12, 21, 8, 1, 8, 8, 0, 14, 6, 82, + 75, 4, 64, 64, 78, 68, 77, 84, 69, 68, 64, 76, 76, 116, 78, 81, + 69, 94, 87, 89, 76, 82, 83, 82, 90, 115, 95, 75, 119, 126, 106, 119, + 111, 126, 95, 89, 126, 74, 81, 81, 92, 97, 111, 106, 98, 115, 93, 98, + 102, 117, 10, 22, 49, 29, 10, 4, 21, 14, 9, 5, 25, 28, 49, 33, + 26, 17, 43, 24, 30, 11, 50, 12, 40, 27, 22, 15, 31, 15, 8, 8, + 11, 66, 62, 37, 12, 2, 25, 8, 68, 4, 22, 44, 41, 24, 11, 39, + 18, 17, 8, 7, 62, 4, 13, 20, 19, 8, 17, 22, 26, 24, 31, 38, + 53, 0, 7, 68, 66, 62, 76, 109, 70, 9, 9, 70, 71, 69, 76, 4, + 79, 102, 70, 68, 27, 71, 77, 70, 68, 1, 76, 74, 71, 96, 89, 75, + 76, 69, 33, 106, 79, 30, 88, 71, 69, 71, 8, 69, 75, 6, 15, 70, + 70, 118, 24, 23, 23, 2, 83, 85, 87, 100, 87, 100, 108, 99, 120, 112, + 109, 126, 125, 126, 126, 117, 105, 100, 105, 120, 108, 99, 100, 99, 79, 94, + 99, 96, 99, 119, 103, 98, 126, 110, 125, 122, 126, 126, 126, 126, 86, 84, + 126, 96, 98, 103, 123, 108, 109, 109, 97, 102, 87, 74, 117, 123, 126, 97, + 108, 113, 68, 14, 19, 22, 44, 29, 34, 9, 41, 34, 45, 35, 47, 39, + 29, 38, 81, 90, 100, 108, 106, 126, 126, 125, 20, 48, 44, 48, 38, 41, + 25, 24, 14, 15, 68, 14, 19, 22, 44, 29, 34, 9, 41, 34, 45, 35, + 47, 39, 29, 38, 81, 90, 100, 108, 106, 126, 126, 125, 75, 71, 65, 15, + 15, 30, 62}, + + { + + 16, 3, 83, 16, 3, 83, 24, 31, 29, 13, 72, 106, 78, 20, 44, 11, + 49, 97, 72, 36, 72, 75, 44, 100, 102, 72, 90, 126, 126, 126, 36, 15, + 67, 72, 36, 72, 74, 19, 38, 67, 9, 7, 13, 116, 120, 96, 114, 66, + 69, 65, 78, 93, 80, 97, 64, 66, 68, 4, 35, 0, 22, 0, 0, 0, + 9, 98, 97, 13, 81, 71, 8, 4, 64, 49, 26, 12, 58, 56, 12, 26, + 33, 31, 17, 14, 52, 114, 91, 90, 93, 27, 73, 24, 52, 0, 69, 7, + 30, 65, 72, 72, 79, 26, 77, 17, 6, 73, 13, 66, 1, 9, 16, 24, + 23, 8, 81, 69, 5, 64, 64, 70, 32, 65, 1, 34, 29, 40, 32, 29, + 101, 14, 15, 80, 67, 74, 70, 38, 4, 8, 46, 62, 62, 62, 60, 103, + 116, 14, 44, 43, 126, 74, 16, 102, 29, 71, 5, 45, 33, 53, 50, 55, + 116, 11, 90, 119, 122, 5, 12, 21, 8, 0, 7, 8, 0, 14, 5, 83, + 76, 4, 64, 64, 78, 69, 78, 85, 69, 68, 64, 77, 77, 117, 79, 82, + 69, 95, 88, 91, 78, 84, 85, 84, 92, 118, 97, 76, 121, 126, 108, 121, + 113, 126, 96, 90, 126, 75, 82, 82, 93, 99, 113, 108, 100, 117, 93, 99, + 103, 119, 11, 23, 50, 30, 10, 4, 22, 15, 10, 6, 27, 29, 50, 34, + 27, 18, 44, 25, 31, 12, 52, 13, 40, 27, 22, 15, 32, 16, 9, 9, + 12, 66, 62, 38, 12, 2, 26, 9, 68, 5, 23, 44, 42, 24, 11, 40, + 19, 18, 9, 8, 62, 5, 15, 22, 21, 9, 18, 24, 28, 25, 33, 40, + 55, 1, 8, 67, 65, 62, 76, 110, 70, 10, 10, 70, 71, 69, 76, 4, + 79, 103, 70, 68, 28, 71, 77, 70, 68, 1, 76, 74, 71, 97, 90, 75, + 76, 69, 34, 107, 79, 31, 89, 71, 69, 71, 8, 69, 75, 6, 16, 70, + 70, 120, 23, 22, 22, 0, 85, 88, 90, 103, 89, 103, 111, 102, 123, 115, + 111, 126, 126, 126, 126, 120, 107, 102, 107, 122, 110, 100, 101, 100, 79, 96, + 101, 98, 101, 121, 105, 100, 126, 112, 126, 124, 126, 126, 126, 126, 87, 85, + 126, 98, 100, 105, 125, 109, 110, 110, 98, 103, 87, 73, 118, 124, 126, 98, + 109, 114, 68, 14, 20, 23, 45, 30, 35, 9, 42, 35, 46, 35, 48, 40, + 30, 37, 83, 92, 102, 110, 108, 126, 126, 126, 21, 48, 44, 49, 39, 42, + 25, 25, 14, 16, 68, 14, 20, 23, 45, 30, 35, 9, 42, 35, 46, 35, + 48, 40, 30, 37, 83, 92, 102, 110, 108, 126, 126, 126, 75, 71, 65, 15, + 15, 31, 62}, + + { + + 15, 3, 83, 15, 3, 83, 26, 33, 30, 13, 73, 108, 79, 19, 44, 11, + 51, 98, 72, 38, 72, 76, 45, 101, 103, 73, 92, 126, 126, 126, 38, 17, + 67, 72, 38, 72, 73, 20, 39, 67, 10, 8, 14, 117, 121, 96, 115, 66, + 69, 64, 78, 94, 80, 97, 64, 66, 68, 5, 36, 0, 22, 0, 0, 0, + 9, 98, 97, 14, 82, 71, 7, 4, 0, 51, 27, 13, 59, 57, 13, 27, + 35, 32, 18, 15, 54, 115, 91, 90, 93, 28, 73, 25, 54, 0, 68, 8, + 32, 65, 72, 72, 79, 27, 77, 18, 7, 73, 14, 66, 1, 9, 17, 25, + 23, 8, 81, 69, 5, 64, 64, 70, 32, 65, 1, 35, 29, 41, 32, 30, + 101, 14, 16, 80, 67, 74, 70, 39, 4, 9, 47, 62, 62, 62, 62, 103, + 117, 15, 44, 43, 126, 74, 17, 102, 29, 71, 5, 46, 33, 53, 50, 56, + 117, 13, 91, 122, 125, 7, 12, 21, 7, 64, 7, 7, 64, 14, 5, 84, + 77, 4, 64, 64, 79, 69, 79, 86, 70, 68, 64, 78, 77, 118, 79, 83, + 69, 96, 88, 93, 80, 86, 86, 86, 94, 121, 100, 77, 123, 126, 109, 123, + 115, 126, 97, 91, 126, 76, 83, 83, 94, 100, 115, 110, 102, 118, 93, 100, + 104, 120, 13, 25, 52, 30, 10, 4, 23, 16, 11, 7, 29, 30, 51, 35, + 28, 18, 45, 26, 33, 13, 54, 13, 41, 28, 23, 16, 33, 16, 9, 10, + 13, 66, 62, 39, 12, 2, 27, 9, 68, 6, 24, 45, 43, 25, 11, 41, + 19, 19, 9, 8, 62, 7, 16, 23, 23, 10, 20, 25, 29, 27, 35, 42, + 57, 1, 9, 67, 64, 62, 76, 111, 70, 11, 11, 70, 71, 69, 76, 5, + 79, 104, 70, 68, 28, 71, 77, 70, 68, 1, 77, 74, 71, 98, 91, 75, + 76, 69, 35, 108, 79, 32, 90, 71, 69, 71, 9, 69, 76, 7, 17, 70, + 70, 121, 22, 21, 21, 65, 87, 90, 92, 106, 92, 106, 114, 105, 126, 118, + 113, 126, 126, 126, 126, 123, 109, 104, 109, 125, 112, 101, 102, 101, 79, 98, + 103, 100, 103, 123, 107, 101, 126, 114, 126, 126, 126, 126, 126, 126, 88, 86, + 126, 99, 102, 106, 126, 111, 112, 111, 99, 104, 87, 72, 119, 126, 126, 99, + 110, 115, 68, 15, 20, 23, 46, 31, 36, 9, 43, 36, 47, 36, 49, 40, + 31, 37, 85, 94, 104, 112, 110, 126, 126, 126, 21, 49, 45, 50, 39, 43, + 26, 25, 15, 16, 68, 15, 20, 23, 46, 31, 36, 9, 43, 36, 47, 36, + 49, 40, 31, 37, 85, 94, 104, 112, 110, 126, 126, 126, 75, 71, 65, 16, + 16, 33, 62}, + + }, + + { + + { + + 62, 9, 74, 62, 9, 74, 126, 104, 10, 9, 12, 30, 61, 62, 54, 14, + 118, 6, 78, 65, 1, 14, 73, 13, 64, 20, 62, 67, 90, 104, 126, 104, + 67, 78, 65, 1, 86, 95, 2, 18, 69, 81, 96, 8, 67, 86, 88, 5, + 76, 94, 9, 69, 81, 88, 67, 74, 74, 80, 72, 5, 22, 0, 0, 0, + 83, 86, 97, 72, 22, 1, 52, 8, 69, 126, 102, 82, 74, 107, 126, 126, + 126, 95, 126, 114, 126, 123, 115, 122, 115, 0, 68, 84, 104, 70, 93, 90, + 126, 74, 97, 91, 126, 7, 82, 76, 125, 93, 87, 77, 71, 0, 68, 84, + 1, 65, 2, 7, 66, 64, 2, 78, 13, 11, 28, 19, 25, 18, 17, 19, + 46, 12, 13, 44, 30, 1, 108, 100, 101, 91, 94, 88, 84, 86, 83, 87, + 94, 70, 72, 74, 4, 102, 100, 95, 75, 72, 75, 71, 17, 69, 1, 65, + 26, 72, 6, 9, 1, 72, 62, 54, 38, 45, 54, 44, 26, 45, 34, 30, + 33, 18, 5, 1, 2, 25, 18, 24, 21, 19, 18, 22, 14, 29, 21, 8, + 12, 17, 89, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, + 62, 46, 62, 60, 41, 62, 62, 62, 62, 60, 58, 62, 47, 41, 15, 26, + 3, 68, 97, 71, 21, 13, 9, 1, 5, 0, 72, 74, 91, 67, 36, 24, + 19, 17, 64, 68, 78, 77, 86, 92, 8, 3, 1, 65, 73, 76, 80, 88, + 110, 97, 84, 79, 73, 74, 86, 96, 97, 117, 78, 30, 15, 10, 1, 71, + 79, 86, 90, 97, 62, 93, 84, 79, 66, 71, 1, 3, 4, 75, 1, 5, + 66, 79, 71, 68, 19, 1, 27, 23, 36, 34, 19, 27, 31, 21, 15, 1, + 17, 64, 104, 97, 96, 88, 85, 85, 85, 88, 66, 77, 76, 76, 5, 76, + 83, 99, 95, 95, 76, 74, 70, 75, 68, 65, 73, 1, 1, 68, 75, 8, + 64, 70, 57, 44, 47, 49, 50, 52, 48, 47, 40, 40, 43, 37, 19, 23, + 16, 46, 42, 41, 36, 34, 28, 13, 6, 0, 77, 82, 94, 69, 109, 62, + 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 61, 50, 28, 5, 62, 62, + 33, 62, 62, 62, 60, 62, 58, 52, 58, 51, 52, 34, 37, 24, 66, 42, + 32, 13, 120, 112, 114, 85, 92, 89, 71, 81, 80, 68, 70, 7, 68, 13, + 74, 62, 62, 62, 62, 60, 57, 29, 9, 82, 75, 40, 29, 20, 9, 8, + 2, 64, 68, 92, 106, 97, 90, 90, 88, 73, 79, 86, 73, 70, 69, 66, + 64, 5, 4, 62, 62, 62, 62, 60, 54, 43, 27, 67, 126, 126, 99, 62, + 62, 62, 62}, + + { + + 62, 9, 74, 62, 9, 74, 125, 102, 11, 10, 12, 29, 60, 62, 54, 14, + 115, 6, 77, 64, 1, 14, 72, 12, 65, 20, 62, 68, 91, 104, 124, 102, + 67, 77, 64, 1, 85, 93, 3, 18, 68, 80, 95, 8, 67, 85, 88, 5, + 75, 93, 9, 69, 80, 88, 66, 73, 73, 79, 71, 5, 22, 0, 0, 0, + 82, 86, 97, 71, 22, 1, 52, 8, 69, 125, 101, 82, 73, 105, 125, 125, + 125, 93, 125, 112, 125, 121, 114, 121, 114, 1, 67, 83, 103, 69, 92, 89, + 125, 73, 96, 90, 125, 8, 81, 75, 123, 92, 86, 76, 70, 1, 67, 83, + 2, 64, 2, 7, 65, 64, 2, 77, 13, 11, 28, 19, 25, 18, 17, 19, + 45, 12, 13, 43, 29, 1, 107, 99, 100, 90, 93, 87, 83, 85, 82, 86, + 92, 70, 72, 73, 3, 101, 99, 95, 74, 72, 74, 70, 17, 68, 1, 65, + 25, 71, 6, 8, 1, 72, 62, 54, 38, 45, 54, 44, 26, 45, 34, 29, + 33, 18, 5, 1, 2, 25, 18, 24, 21, 19, 17, 22, 14, 28, 20, 8, + 11, 16, 89, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, + 60, 44, 62, 59, 40, 62, 62, 62, 62, 58, 56, 61, 45, 39, 15, 25, + 2, 68, 97, 70, 22, 14, 10, 2, 5, 0, 71, 73, 90, 66, 37, 25, + 20, 17, 0, 67, 77, 76, 85, 91, 9, 4, 2, 64, 72, 75, 79, 87, + 108, 96, 82, 78, 72, 73, 85, 95, 96, 115, 77, 31, 16, 11, 2, 70, + 78, 85, 89, 96, 62, 92, 83, 78, 66, 70, 1, 4, 5, 74, 2, 6, + 65, 78, 71, 68, 19, 2, 27, 23, 35, 34, 19, 26, 30, 21, 15, 1, + 16, 64, 103, 96, 95, 87, 84, 84, 84, 87, 66, 76, 75, 75, 5, 75, + 82, 98, 94, 95, 76, 73, 70, 74, 68, 65, 72, 1, 1, 67, 74, 8, + 64, 70, 57, 44, 47, 49, 49, 52, 48, 47, 40, 40, 43, 37, 19, 22, + 15, 45, 41, 40, 35, 33, 27, 13, 6, 0, 76, 81, 93, 69, 108, 62, + 62, 62, 62, 62, 62, 62, 62, 62, 62, 61, 59, 48, 27, 5, 62, 62, + 32, 62, 62, 62, 58, 62, 56, 50, 56, 49, 50, 33, 35, 23, 67, 41, + 31, 12, 118, 110, 112, 84, 91, 88, 69, 80, 79, 68, 69, 9, 66, 15, + 73, 62, 62, 62, 62, 58, 55, 27, 7, 83, 74, 41, 29, 20, 9, 9, + 2, 64, 68, 91, 105, 96, 89, 89, 86, 72, 78, 85, 72, 69, 68, 65, + 0, 6, 4, 62, 62, 62, 62, 59, 53, 41, 26, 67, 126, 126, 98, 62, + 62, 62, 62}, + + { + + 62, 9, 74, 62, 9, 74, 123, 101, 11, 10, 12, 28, 59, 61, 54, 14, + 113, 6, 76, 0, 1, 13, 72, 11, 66, 19, 60, 70, 92, 105, 121, 101, + 67, 76, 0, 1, 85, 92, 3, 17, 68, 80, 94, 8, 67, 85, 88, 5, + 75, 92, 9, 69, 80, 88, 66, 73, 73, 79, 71, 5, 22, 0, 0, 0, + 81, 86, 97, 71, 21, 1, 52, 8, 69, 124, 100, 82, 73, 104, 123, 123, + 124, 92, 123, 111, 123, 120, 113, 120, 113, 2, 67, 82, 102, 69, 92, 88, + 123, 73, 96, 90, 124, 8, 81, 75, 122, 92, 85, 76, 70, 1, 67, 82, + 2, 64, 1, 7, 65, 64, 2, 77, 13, 11, 27, 19, 24, 18, 17, 19, + 43, 12, 13, 41, 28, 0, 106, 98, 99, 89, 92, 86, 82, 84, 82, 85, + 91, 70, 72, 73, 2, 101, 98, 95, 74, 72, 73, 70, 16, 67, 1, 65, + 24, 70, 5, 7, 1, 73, 60, 53, 37, 44, 53, 43, 25, 44, 34, 28, + 32, 18, 5, 1, 2, 24, 17, 23, 20, 18, 16, 21, 13, 26, 19, 7, + 10, 15, 89, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, + 58, 41, 62, 57, 38, 62, 62, 62, 62, 56, 54, 58, 43, 37, 14, 23, + 1, 69, 97, 70, 22, 14, 10, 2, 5, 0, 71, 73, 89, 66, 37, 25, + 20, 17, 1, 67, 76, 76, 84, 90, 10, 5, 2, 64, 71, 75, 79, 86, + 107, 95, 81, 77, 72, 73, 84, 94, 95, 114, 77, 31, 16, 11, 2, 69, + 77, 84, 88, 95, 62, 92, 83, 78, 66, 70, 1, 4, 5, 74, 2, 6, + 64, 78, 71, 68, 18, 2, 26, 22, 34, 33, 19, 25, 29, 21, 15, 0, + 15, 65, 102, 95, 94, 87, 84, 84, 83, 86, 66, 76, 75, 75, 4, 75, + 82, 98, 93, 95, 76, 73, 70, 73, 68, 65, 71, 1, 1, 67, 73, 7, + 64, 71, 56, 44, 47, 48, 48, 51, 47, 46, 39, 39, 42, 36, 18, 21, + 14, 43, 40, 38, 33, 32, 26, 12, 5, 0, 76, 81, 93, 70, 107, 62, + 62, 62, 62, 62, 62, 62, 62, 62, 62, 59, 57, 46, 26, 4, 62, 60, + 31, 62, 62, 62, 56, 60, 54, 48, 54, 47, 48, 31, 33, 21, 68, 39, + 29, 10, 117, 109, 111, 83, 90, 87, 67, 79, 78, 68, 68, 10, 65, 16, + 72, 62, 62, 62, 62, 55, 52, 24, 5, 84, 74, 41, 29, 20, 9, 9, + 2, 64, 68, 90, 104, 95, 88, 88, 85, 71, 77, 84, 71, 68, 67, 65, + 1, 6, 4, 62, 62, 62, 61, 57, 51, 39, 24, 68, 126, 126, 97, 62, + 62, 62, 62}, + + { + + 62, 9, 74, 62, 9, 74, 121, 99, 12, 10, 11, 26, 57, 60, 54, 14, + 111, 6, 75, 1, 1, 12, 72, 10, 67, 19, 58, 71, 93, 105, 118, 100, + 67, 75, 1, 1, 84, 91, 4, 17, 68, 79, 93, 7, 68, 85, 88, 5, + 75, 92, 9, 69, 80, 88, 65, 73, 73, 79, 70, 5, 22, 0, 0, 0, + 81, 86, 97, 70, 20, 1, 52, 8, 69, 123, 99, 82, 72, 103, 121, 121, + 122, 91, 121, 110, 121, 119, 112, 119, 112, 3, 67, 81, 101, 69, 91, 88, + 121, 73, 95, 89, 123, 8, 81, 74, 120, 91, 84, 76, 70, 1, 67, 81, + 3, 0, 1, 7, 65, 64, 2, 77, 13, 10, 27, 19, 23, 18, 17, 19, + 41, 12, 12, 39, 27, 64, 105, 97, 98, 88, 91, 86, 81, 84, 81, 84, + 90, 70, 72, 73, 1, 100, 97, 95, 74, 72, 72, 70, 15, 66, 1, 65, + 23, 69, 5, 6, 1, 74, 59, 52, 37, 43, 52, 42, 25, 43, 33, 27, + 31, 18, 5, 1, 1, 23, 16, 22, 19, 17, 15, 20, 13, 24, 18, 7, + 9, 14, 89, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, + 55, 39, 62, 55, 37, 62, 61, 62, 59, 54, 51, 56, 41, 34, 13, 21, + 0, 70, 97, 70, 23, 14, 10, 2, 5, 0, 71, 73, 89, 66, 37, 25, + 20, 17, 2, 66, 76, 75, 84, 89, 11, 5, 3, 64, 70, 74, 78, 86, + 106, 94, 80, 76, 71, 73, 83, 93, 94, 113, 76, 31, 16, 11, 2, 68, + 77, 83, 87, 94, 62, 91, 82, 77, 66, 70, 1, 4, 5, 74, 2, 6, + 64, 78, 71, 68, 18, 3, 25, 21, 33, 32, 19, 24, 28, 21, 15, 0, + 14, 65, 101, 94, 93, 86, 83, 83, 83, 85, 66, 76, 75, 74, 4, 75, + 82, 97, 92, 95, 76, 73, 70, 72, 68, 65, 70, 1, 1, 67, 72, 6, + 64, 72, 55, 43, 46, 47, 47, 50, 46, 45, 38, 38, 41, 35, 17, 20, + 13, 42, 39, 37, 31, 30, 25, 11, 5, 64, 76, 81, 93, 70, 106, 62, + 62, 62, 62, 62, 62, 62, 62, 62, 62, 57, 54, 44, 24, 3, 61, 59, + 29, 62, 62, 60, 54, 58, 52, 46, 52, 45, 45, 29, 31, 19, 69, 37, + 27, 9, 116, 108, 110, 82, 89, 86, 66, 78, 77, 68, 67, 12, 0, 18, + 71, 62, 62, 62, 62, 52, 49, 21, 3, 85, 74, 41, 29, 20, 9, 9, + 2, 64, 68, 90, 103, 94, 87, 87, 84, 71, 77, 83, 71, 68, 67, 65, + 1, 6, 4, 62, 62, 62, 59, 55, 49, 37, 22, 69, 126, 126, 96, 62, + 62, 62, 62}, + + { + + 62, 9, 74, 62, 9, 74, 120, 98, 12, 10, 11, 25, 56, 58, 54, 14, + 108, 5, 74, 1, 1, 11, 72, 9, 68, 18, 56, 73, 94, 106, 115, 99, + 67, 74, 1, 1, 84, 90, 4, 16, 68, 79, 93, 7, 68, 84, 88, 5, + 75, 91, 8, 70, 80, 88, 65, 72, 73, 78, 70, 5, 22, 0, 0, 0, + 80, 87, 97, 70, 19, 1, 52, 8, 69, 122, 98, 82, 72, 101, 120, 119, + 121, 90, 120, 108, 119, 118, 112, 118, 112, 3, 67, 80, 100, 69, 91, 87, + 119, 73, 95, 89, 122, 8, 80, 74, 119, 91, 84, 76, 69, 1, 67, 81, + 3, 0, 0, 6, 65, 64, 2, 77, 13, 10, 26, 19, 23, 18, 17, 18, + 39, 12, 12, 37, 26, 65, 104, 96, 97, 87, 91, 85, 80, 83, 81, 83, + 89, 70, 72, 72, 0, 100, 96, 95, 74, 72, 72, 70, 14, 65, 1, 65, + 21, 68, 4, 5, 1, 75, 57, 51, 36, 42, 51, 41, 24, 42, 33, 25, + 30, 17, 5, 1, 1, 22, 16, 21, 19, 16, 14, 19, 12, 22, 17, 6, + 8, 13, 89, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 59, + 53, 36, 62, 54, 35, 62, 59, 62, 57, 51, 49, 53, 39, 32, 12, 20, + 65, 71, 97, 70, 23, 15, 10, 2, 5, 0, 71, 73, 88, 65, 38, 25, + 20, 17, 3, 66, 75, 75, 83, 89, 12, 6, 3, 64, 70, 74, 78, 85, + 105, 94, 79, 76, 71, 73, 82, 92, 94, 112, 76, 32, 16, 11, 2, 67, + 76, 83, 86, 93, 62, 91, 82, 77, 66, 70, 1, 4, 5, 73, 2, 6, + 0, 78, 71, 68, 17, 3, 24, 20, 32, 31, 19, 22, 27, 20, 15, 64, + 13, 66, 101, 94, 92, 86, 83, 83, 82, 84, 67, 76, 75, 74, 3, 75, + 82, 97, 91, 95, 76, 72, 70, 72, 68, 65, 69, 1, 0, 67, 71, 6, + 65, 73, 54, 43, 46, 46, 46, 49, 45, 44, 37, 37, 40, 34, 16, 19, + 12, 40, 37, 35, 29, 29, 24, 10, 4, 64, 76, 81, 93, 71, 106, 62, + 62, 62, 62, 62, 62, 62, 62, 62, 60, 55, 52, 42, 23, 2, 59, 57, + 28, 62, 62, 58, 52, 55, 50, 44, 50, 43, 43, 27, 29, 17, 70, 35, + 25, 7, 115, 107, 109, 82, 88, 85, 64, 77, 76, 68, 66, 13, 1, 19, + 71, 62, 62, 62, 62, 49, 46, 18, 1, 86, 74, 41, 29, 20, 9, 9, + 2, 64, 68, 89, 102, 93, 86, 87, 83, 70, 76, 82, 70, 67, 66, 64, + 2, 7, 4, 62, 62, 62, 57, 53, 47, 35, 20, 70, 126, 126, 96, 62, + 62, 62, 62}, + + { + + 62, 9, 74, 62, 9, 74, 118, 96, 12, 10, 10, 23, 54, 57, 54, 14, + 106, 5, 73, 2, 1, 11, 71, 8, 69, 18, 54, 75, 95, 106, 112, 97, + 67, 73, 2, 1, 84, 89, 4, 16, 68, 79, 92, 7, 69, 84, 88, 5, + 75, 90, 8, 70, 80, 88, 64, 72, 72, 78, 69, 5, 22, 0, 0, 0, + 80, 87, 97, 69, 18, 1, 52, 8, 69, 121, 97, 82, 71, 100, 118, 117, + 119, 89, 118, 107, 117, 117, 111, 117, 111, 4, 67, 79, 99, 69, 90, 86, + 117, 73, 95, 88, 120, 9, 80, 73, 118, 90, 83, 76, 69, 2, 66, 80, + 4, 1, 0, 6, 65, 64, 2, 77, 13, 9, 25, 19, 22, 18, 17, 18, + 37, 12, 11, 36, 25, 66, 103, 95, 96, 86, 90, 84, 79, 82, 80, 82, + 88, 70, 72, 72, 64, 99, 95, 95, 73, 72, 71, 70, 13, 64, 1, 65, + 20, 67, 4, 4, 1, 75, 56, 50, 36, 41, 50, 40, 23, 42, 33, 24, + 29, 17, 5, 1, 0, 22, 15, 20, 18, 15, 13, 19, 11, 20, 16, 5, + 7, 12, 89, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 57, + 51, 34, 60, 52, 33, 62, 57, 60, 55, 49, 47, 50, 37, 29, 11, 18, + 66, 71, 97, 70, 23, 15, 10, 2, 5, 0, 71, 73, 88, 65, 38, 25, + 20, 17, 4, 65, 74, 75, 82, 88, 13, 7, 3, 0, 69, 73, 77, 85, + 104, 93, 77, 75, 71, 72, 81, 91, 93, 111, 75, 32, 17, 11, 2, 66, + 75, 82, 85, 92, 62, 91, 82, 76, 66, 70, 1, 4, 5, 73, 2, 7, + 0, 78, 71, 68, 16, 4, 23, 19, 31, 31, 19, 21, 26, 20, 15, 65, + 12, 66, 100, 93, 91, 85, 82, 82, 82, 83, 67, 76, 75, 74, 2, 75, + 82, 96, 90, 95, 76, 72, 70, 71, 68, 65, 68, 1, 0, 67, 70, 5, + 65, 73, 53, 43, 45, 46, 45, 48, 44, 43, 37, 36, 39, 33, 15, 18, + 11, 39, 36, 34, 27, 28, 23, 9, 3, 65, 76, 80, 93, 71, 105, 62, + 62, 62, 62, 62, 62, 62, 62, 60, 58, 53, 50, 40, 21, 1, 57, 55, + 27, 61, 62, 56, 50, 53, 48, 42, 48, 41, 40, 25, 27, 15, 71, 33, + 23, 6, 114, 105, 108, 81, 87, 84, 1, 76, 75, 68, 65, 15, 3, 21, + 70, 62, 62, 62, 62, 47, 43, 16, 64, 87, 74, 41, 29, 20, 9, 9, + 2, 64, 68, 89, 101, 92, 85, 86, 82, 69, 76, 81, 69, 66, 65, 64, + 2, 7, 4, 62, 62, 62, 56, 51, 45, 33, 18, 71, 126, 126, 95, 62, + 62, 62, 62}, + + { + + 62, 9, 75, 62, 9, 75, 116, 95, 13, 10, 10, 22, 53, 56, 54, 14, + 104, 5, 73, 3, 1, 10, 71, 7, 70, 17, 53, 76, 96, 107, 109, 96, + 67, 73, 3, 1, 83, 88, 5, 15, 67, 78, 91, 6, 69, 84, 88, 5, + 74, 90, 8, 70, 79, 88, 64, 72, 72, 78, 69, 5, 22, 0, 0, 0, + 79, 87, 97, 69, 18, 0, 52, 8, 69, 120, 97, 82, 71, 99, 116, 115, + 118, 88, 116, 106, 115, 116, 110, 116, 110, 5, 67, 78, 99, 68, 90, 86, + 115, 73, 94, 88, 119, 9, 80, 73, 116, 90, 82, 75, 69, 2, 66, 79, + 4, 1, 64, 6, 65, 64, 2, 77, 13, 9, 25, 19, 21, 18, 17, 18, + 35, 12, 11, 34, 24, 67, 103, 94, 96, 86, 89, 84, 78, 82, 80, 82, + 86, 70, 72, 72, 65, 99, 94, 95, 73, 72, 70, 69, 12, 64, 1, 65, + 19, 66, 3, 3, 1, 76, 54, 49, 35, 41, 49, 40, 23, 41, 32, 23, + 28, 17, 5, 1, 0, 21, 14, 19, 17, 15, 12, 18, 11, 18, 15, 5, + 6, 11, 89, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 54, + 48, 31, 58, 50, 32, 62, 54, 57, 52, 47, 44, 48, 34, 27, 10, 16, + 67, 72, 97, 69, 24, 15, 11, 2, 5, 0, 71, 73, 87, 65, 38, 26, + 20, 17, 5, 65, 74, 74, 82, 87, 14, 7, 4, 0, 68, 73, 77, 84, + 103, 92, 76, 74, 70, 72, 81, 91, 92, 109, 75, 32, 17, 11, 3, 66, + 75, 81, 85, 91, 62, 90, 81, 76, 66, 70, 1, 4, 5, 73, 3, 7, + 1, 78, 71, 69, 16, 4, 22, 18, 30, 30, 19, 20, 25, 20, 15, 65, + 11, 67, 99, 92, 90, 85, 82, 82, 81, 83, 67, 75, 74, 73, 2, 75, + 82, 96, 89, 95, 76, 72, 70, 70, 68, 65, 67, 0, 0, 67, 70, 4, + 65, 74, 52, 42, 45, 45, 44, 48, 44, 42, 36, 36, 38, 32, 14, 17, + 10, 37, 35, 32, 25, 26, 21, 8, 3, 65, 76, 80, 92, 72, 104, 62, + 62, 62, 62, 62, 62, 62, 62, 58, 55, 51, 47, 38, 20, 1, 56, 54, + 25, 59, 62, 54, 48, 51, 46, 40, 45, 39, 38, 23, 25, 14, 73, 31, + 21, 4, 113, 104, 107, 80, 86, 83, 2, 75, 74, 68, 64, 16, 4, 22, + 69, 62, 62, 62, 59, 44, 41, 13, 66, 89, 73, 41, 29, 20, 9, 9, + 2, 64, 68, 88, 100, 92, 84, 85, 81, 69, 75, 80, 69, 66, 65, 64, + 3, 7, 4, 62, 62, 61, 54, 50, 44, 30, 17, 72, 126, 126, 94, 62, + 62, 62, 62}, + + { + + 62, 9, 75, 62, 9, 75, 114, 93, 13, 10, 9, 20, 51, 54, 54, 14, + 101, 4, 72, 3, 1, 9, 71, 6, 71, 17, 51, 78, 97, 107, 106, 95, + 67, 72, 3, 1, 83, 87, 5, 15, 67, 78, 91, 6, 70, 83, 88, 5, + 74, 89, 7, 70, 79, 88, 0, 71, 72, 77, 68, 5, 22, 0, 0, 0, + 79, 87, 97, 68, 17, 0, 52, 8, 69, 119, 96, 82, 70, 97, 115, 113, + 116, 87, 115, 104, 113, 115, 109, 115, 110, 6, 67, 77, 98, 68, 89, 85, + 113, 73, 94, 87, 118, 9, 79, 72, 115, 89, 82, 75, 68, 2, 66, 78, + 5, 2, 64, 5, 65, 64, 2, 77, 13, 8, 24, 19, 21, 18, 17, 17, + 33, 12, 10, 32, 23, 68, 102, 93, 95, 85, 88, 83, 77, 81, 79, 81, + 85, 70, 72, 71, 66, 98, 93, 95, 73, 72, 70, 69, 11, 0, 1, 65, + 17, 65, 3, 2, 1, 77, 53, 48, 35, 40, 48, 39, 22, 40, 32, 22, + 27, 17, 5, 1, 64, 20, 14, 18, 17, 14, 11, 17, 10, 16, 14, 4, + 5, 10, 89, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 60, 61, 52, + 46, 29, 56, 49, 30, 62, 52, 55, 50, 44, 42, 45, 32, 24, 9, 15, + 69, 73, 97, 69, 24, 16, 11, 2, 5, 0, 71, 73, 87, 64, 39, 26, + 20, 17, 6, 64, 73, 74, 81, 86, 15, 8, 4, 0, 67, 72, 76, 84, + 102, 92, 75, 74, 70, 72, 80, 90, 92, 108, 74, 33, 17, 11, 3, 65, + 74, 80, 84, 90, 62, 90, 81, 75, 66, 70, 1, 4, 5, 72, 3, 7, + 1, 78, 71, 69, 15, 5, 21, 17, 29, 29, 19, 19, 24, 19, 15, 66, + 10, 67, 98, 92, 89, 84, 81, 81, 81, 82, 67, 75, 74, 73, 1, 75, + 82, 95, 88, 95, 76, 71, 70, 70, 68, 65, 66, 0, 0, 67, 69, 4, + 66, 75, 51, 42, 44, 44, 43, 47, 43, 41, 35, 35, 37, 31, 13, 16, + 9, 36, 33, 31, 23, 25, 20, 7, 2, 66, 76, 80, 92, 72, 103, 62, + 62, 62, 62, 62, 62, 62, 61, 56, 53, 49, 45, 36, 18, 0, 54, 52, + 24, 57, 62, 52, 46, 49, 44, 38, 43, 37, 35, 21, 23, 12, 74, 29, + 19, 3, 112, 103, 106, 80, 85, 82, 4, 74, 73, 68, 0, 18, 6, 24, + 69, 62, 62, 61, 56, 41, 38, 10, 68, 90, 73, 41, 29, 20, 9, 9, + 2, 64, 68, 88, 99, 91, 83, 84, 80, 68, 75, 79, 68, 65, 64, 0, + 3, 8, 4, 62, 62, 59, 52, 48, 42, 28, 15, 73, 126, 126, 94, 62, + 62, 62, 62}, + + { + + 62, 8, 75, 62, 8, 75, 113, 92, 13, 10, 9, 19, 50, 53, 54, 14, + 99, 4, 71, 4, 1, 8, 71, 5, 73, 16, 49, 80, 98, 108, 104, 94, + 67, 71, 4, 1, 83, 86, 5, 14, 67, 78, 90, 5, 70, 83, 89, 5, + 74, 89, 7, 71, 79, 88, 0, 71, 72, 77, 68, 5, 22, 0, 0, 0, + 78, 88, 97, 68, 16, 0, 52, 8, 69, 118, 95, 82, 70, 96, 113, 111, + 115, 86, 113, 103, 112, 114, 109, 114, 109, 6, 67, 76, 97, 68, 89, 85, + 112, 73, 94, 87, 117, 9, 79, 72, 114, 89, 81, 75, 68, 2, 66, 78, + 5, 2, 65, 5, 65, 64, 2, 77, 13, 8, 23, 19, 20, 18, 17, 17, + 31, 12, 10, 30, 22, 69, 101, 92, 94, 84, 88, 83, 76, 81, 79, 80, + 84, 70, 72, 71, 68, 98, 92, 95, 73, 73, 69, 69, 10, 1, 1, 65, + 16, 64, 2, 1, 1, 78, 51, 47, 34, 39, 47, 38, 21, 39, 31, 20, + 26, 16, 5, 1, 64, 19, 13, 17, 16, 13, 10, 16, 9, 14, 12, 3, + 4, 9, 89, 62, 62, 62, 62, 62, 62, 62, 62, 62, 61, 58, 58, 49, + 43, 26, 54, 47, 28, 61, 50, 52, 47, 42, 39, 42, 30, 22, 8, 13, + 70, 74, 98, 69, 24, 16, 11, 2, 5, 0, 71, 73, 86, 64, 39, 26, + 20, 17, 7, 64, 73, 74, 81, 86, 16, 8, 4, 0, 67, 72, 76, 83, + 101, 91, 74, 73, 70, 72, 79, 89, 91, 107, 74, 33, 17, 11, 3, 64, + 74, 80, 83, 90, 62, 90, 81, 75, 66, 70, 1, 4, 5, 72, 3, 7, + 2, 78, 71, 69, 14, 5, 20, 16, 28, 28, 19, 17, 22, 19, 15, 67, + 9, 68, 98, 91, 88, 84, 81, 81, 80, 81, 68, 75, 74, 73, 0, 75, + 82, 95, 88, 96, 76, 71, 70, 69, 68, 65, 66, 0, 64, 67, 68, 3, + 66, 76, 50, 41, 44, 43, 41, 46, 42, 40, 34, 34, 36, 30, 12, 15, + 8, 34, 32, 29, 21, 23, 19, 6, 1, 66, 76, 80, 92, 73, 103, 62, + 62, 62, 62, 62, 62, 61, 58, 54, 51, 47, 42, 34, 17, 64, 52, 50, + 22, 55, 61, 49, 43, 46, 41, 36, 41, 34, 33, 19, 20, 10, 75, 27, + 17, 1, 111, 102, 105, 79, 84, 82, 5, 73, 73, 68, 0, 19, 7, 25, + 68, 62, 62, 58, 53, 38, 35, 7, 70, 91, 73, 41, 29, 20, 9, 9, + 2, 64, 68, 87, 99, 90, 82, 84, 79, 68, 74, 79, 68, 65, 64, 0, + 4, 8, 3, 62, 62, 57, 50, 46, 40, 26, 13, 74, 126, 126, 93, 62, + 62, 62, 62}, + + { + + 62, 8, 75, 62, 8, 75, 111, 91, 14, 10, 9, 18, 49, 52, 54, 14, + 97, 4, 70, 5, 1, 8, 70, 4, 74, 15, 47, 81, 99, 109, 101, 92, + 67, 70, 5, 1, 82, 85, 6, 13, 67, 77, 89, 5, 70, 83, 89, 5, + 74, 88, 7, 71, 79, 88, 0, 71, 71, 77, 68, 5, 22, 0, 0, 0, + 77, 88, 97, 68, 15, 0, 52, 8, 69, 117, 94, 82, 70, 95, 111, 109, + 113, 84, 111, 102, 110, 113, 108, 113, 108, 7, 66, 75, 96, 68, 88, 84, + 110, 73, 93, 87, 115, 10, 79, 72, 112, 89, 80, 75, 68, 3, 65, 77, + 5, 2, 65, 5, 64, 64, 2, 76, 13, 8, 23, 19, 19, 18, 17, 17, + 29, 12, 10, 29, 21, 69, 100, 91, 93, 83, 87, 82, 75, 80, 79, 79, + 83, 70, 72, 71, 69, 97, 91, 95, 72, 73, 68, 69, 9, 2, 1, 65, + 15, 0, 1, 0, 1, 78, 50, 46, 34, 38, 46, 37, 21, 39, 31, 19, + 25, 16, 5, 1, 64, 19, 12, 16, 15, 12, 9, 16, 9, 13, 11, 3, + 3, 8, 89, 62, 62, 62, 62, 62, 62, 62, 62, 62, 59, 56, 56, 46, + 41, 23, 53, 45, 27, 59, 48, 50, 45, 40, 37, 40, 28, 20, 8, 11, + 71, 74, 98, 69, 25, 16, 11, 3, 5, 0, 70, 73, 85, 64, 39, 26, + 21, 17, 8, 0, 72, 73, 80, 85, 17, 9, 5, 1, 66, 71, 76, 82, + 100, 90, 72, 72, 69, 71, 78, 88, 90, 106, 73, 33, 18, 12, 3, 0, + 73, 79, 82, 89, 62, 89, 80, 74, 66, 70, 1, 5, 6, 72, 3, 8, + 3, 78, 71, 69, 14, 5, 19, 16, 27, 28, 19, 16, 21, 19, 15, 67, + 8, 69, 97, 90, 87, 84, 80, 81, 79, 80, 68, 75, 74, 72, 0, 75, + 82, 95, 87, 96, 76, 71, 70, 68, 68, 65, 65, 0, 64, 67, 67, 2, + 66, 76, 49, 41, 44, 43, 40, 45, 41, 39, 34, 33, 35, 30, 12, 14, + 7, 33, 31, 27, 19, 22, 18, 6, 1, 66, 75, 79, 92, 74, 102, 62, + 62, 62, 62, 62, 62, 59, 56, 52, 49, 45, 40, 32, 16, 65, 50, 49, + 21, 53, 59, 47, 41, 44, 39, 34, 39, 32, 31, 18, 18, 8, 76, 25, + 15, 64, 110, 100, 103, 78, 83, 81, 7, 72, 72, 68, 1, 21, 8, 27, + 67, 62, 62, 56, 50, 36, 32, 5, 72, 92, 73, 41, 29, 20, 9, 10, + 2, 64, 68, 86, 98, 89, 81, 83, 77, 67, 73, 78, 67, 64, 0, 0, + 5, 8, 3, 62, 61, 56, 49, 44, 38, 24, 11, 74, 126, 126, 92, 62, + 62, 62, 62}, + + { + + 62, 8, 75, 62, 8, 75, 109, 89, 14, 10, 8, 16, 47, 50, 54, 14, + 94, 3, 69, 5, 1, 7, 70, 3, 75, 15, 45, 83, 100, 109, 98, 91, + 67, 69, 5, 1, 82, 84, 6, 13, 67, 77, 89, 5, 71, 82, 89, 5, + 74, 87, 6, 71, 79, 88, 1, 70, 71, 76, 67, 5, 22, 0, 0, 0, + 77, 88, 97, 67, 14, 0, 52, 8, 69, 116, 93, 82, 69, 93, 110, 107, + 112, 83, 110, 100, 108, 112, 107, 112, 108, 8, 66, 74, 95, 68, 88, 83, + 108, 73, 93, 86, 114, 10, 78, 71, 111, 88, 80, 75, 67, 3, 65, 76, + 6, 3, 66, 4, 64, 64, 2, 76, 13, 7, 22, 19, 19, 18, 17, 16, + 27, 12, 9, 27, 20, 70, 99, 90, 92, 82, 86, 81, 74, 79, 78, 78, + 82, 70, 72, 70, 70, 97, 90, 95, 72, 73, 68, 69, 8, 3, 1, 65, + 13, 1, 1, 64, 1, 79, 48, 45, 33, 37, 45, 36, 20, 38, 31, 18, + 24, 16, 5, 1, 65, 18, 12, 15, 15, 11, 8, 15, 8, 11, 10, 2, + 2, 7, 89, 62, 62, 62, 62, 62, 62, 62, 62, 62, 57, 54, 53, 44, + 39, 21, 51, 44, 25, 56, 46, 48, 43, 37, 35, 37, 26, 17, 7, 10, + 73, 75, 98, 69, 25, 17, 11, 3, 5, 0, 70, 73, 85, 0, 40, 26, + 21, 17, 9, 0, 71, 73, 79, 84, 18, 10, 5, 1, 65, 71, 75, 82, + 99, 90, 71, 72, 69, 71, 77, 87, 90, 105, 73, 34, 18, 12, 3, 1, + 72, 78, 81, 88, 62, 89, 80, 74, 66, 70, 1, 5, 6, 71, 3, 8, + 3, 78, 71, 69, 13, 6, 18, 15, 26, 27, 19, 15, 20, 18, 15, 68, + 7, 69, 96, 90, 86, 83, 80, 80, 79, 79, 68, 75, 74, 72, 64, 75, + 82, 94, 86, 96, 76, 70, 70, 68, 68, 65, 64, 0, 64, 67, 66, 2, + 67, 77, 48, 41, 43, 42, 39, 44, 40, 38, 33, 32, 34, 29, 11, 13, + 6, 31, 29, 26, 17, 21, 17, 5, 0, 67, 75, 79, 92, 74, 101, 62, + 62, 62, 62, 62, 60, 57, 53, 50, 47, 43, 38, 30, 14, 66, 48, 47, + 20, 51, 57, 45, 39, 42, 37, 32, 37, 30, 28, 16, 16, 6, 77, 23, + 13, 65, 109, 99, 102, 78, 82, 80, 9, 71, 71, 68, 2, 22, 10, 28, + 67, 62, 60, 53, 47, 33, 29, 2, 74, 93, 73, 41, 29, 20, 9, 10, + 2, 64, 68, 86, 97, 88, 80, 82, 76, 66, 73, 77, 66, 0, 1, 1, + 5, 9, 3, 60, 59, 54, 47, 42, 36, 22, 9, 75, 126, 126, 92, 62, + 62, 62, 62}, + + { + + 62, 8, 76, 62, 8, 76, 107, 88, 15, 10, 8, 15, 46, 49, 54, 14, + 92, 3, 69, 6, 1, 6, 70, 2, 76, 14, 44, 84, 101, 110, 95, 90, + 67, 69, 6, 1, 81, 83, 7, 12, 66, 76, 88, 4, 71, 82, 89, 5, + 73, 87, 6, 71, 78, 88, 1, 70, 71, 76, 67, 5, 22, 0, 0, 0, + 76, 88, 97, 67, 14, 64, 52, 8, 69, 115, 93, 82, 69, 92, 108, 105, + 110, 82, 108, 99, 106, 111, 106, 111, 107, 9, 66, 73, 95, 67, 87, 83, + 106, 73, 92, 86, 113, 10, 78, 71, 109, 88, 79, 74, 67, 3, 65, 75, + 6, 3, 66, 4, 64, 64, 2, 76, 13, 7, 22, 19, 18, 18, 17, 16, + 25, 12, 9, 25, 19, 71, 99, 89, 92, 82, 85, 81, 73, 79, 78, 78, + 80, 70, 72, 70, 71, 96, 89, 95, 72, 73, 67, 68, 7, 3, 1, 65, + 12, 2, 0, 65, 1, 80, 47, 44, 33, 37, 44, 36, 20, 37, 30, 17, + 23, 16, 5, 1, 65, 17, 11, 14, 14, 11, 7, 14, 8, 9, 9, 2, + 1, 6, 89, 62, 62, 62, 62, 62, 62, 62, 62, 62, 54, 52, 51, 41, + 36, 18, 49, 42, 24, 54, 43, 45, 40, 35, 32, 35, 23, 15, 6, 8, + 74, 76, 98, 68, 26, 17, 12, 3, 5, 0, 70, 73, 84, 0, 40, 27, + 21, 17, 10, 1, 71, 72, 79, 83, 19, 10, 6, 1, 64, 70, 75, 81, + 98, 89, 70, 71, 68, 71, 77, 87, 89, 103, 72, 34, 18, 12, 4, 1, + 72, 77, 81, 87, 62, 88, 79, 73, 66, 70, 1, 5, 6, 71, 4, 8, + 4, 78, 71, 70, 13, 6, 17, 14, 25, 26, 19, 14, 19, 18, 15, 68, + 6, 70, 95, 89, 85, 83, 79, 80, 78, 79, 68, 74, 73, 71, 64, 75, + 82, 94, 85, 96, 76, 70, 70, 67, 68, 65, 0, 64, 64, 67, 66, 1, + 67, 78, 47, 40, 43, 41, 38, 44, 40, 37, 32, 32, 33, 28, 10, 12, + 5, 30, 28, 24, 15, 19, 15, 4, 0, 67, 75, 79, 91, 75, 100, 62, + 62, 62, 62, 62, 58, 55, 51, 48, 44, 41, 35, 28, 13, 66, 47, 46, + 18, 49, 54, 43, 37, 40, 35, 30, 34, 28, 26, 14, 14, 5, 79, 21, + 11, 67, 108, 98, 101, 77, 81, 79, 10, 70, 70, 68, 3, 24, 11, 30, + 66, 61, 59, 51, 44, 30, 27, 64, 76, 95, 72, 41, 29, 20, 9, 10, + 2, 64, 68, 85, 96, 88, 79, 81, 75, 66, 72, 76, 66, 0, 1, 1, + 6, 9, 3, 59, 58, 52, 45, 41, 35, 19, 8, 76, 126, 124, 91, 62, + 62, 62, 62}, + + { + + 62, 8, 76, 62, 8, 76, 106, 86, 15, 10, 7, 13, 44, 48, 54, 14, + 90, 3, 68, 7, 1, 5, 70, 1, 77, 14, 42, 86, 102, 110, 92, 89, + 67, 68, 7, 1, 81, 82, 7, 12, 66, 76, 87, 4, 72, 82, 89, 5, + 73, 86, 6, 72, 78, 88, 2, 70, 71, 76, 66, 5, 22, 0, 0, 0, + 76, 89, 97, 66, 13, 64, 52, 8, 69, 114, 92, 82, 68, 91, 106, 103, + 109, 81, 106, 98, 104, 110, 106, 110, 106, 9, 66, 72, 94, 67, 87, 82, + 104, 73, 92, 85, 112, 10, 78, 70, 108, 87, 78, 74, 67, 3, 65, 75, + 7, 4, 67, 4, 64, 64, 2, 76, 13, 6, 21, 19, 17, 18, 17, 16, + 23, 12, 8, 23, 18, 72, 98, 88, 91, 81, 85, 80, 72, 78, 77, 77, + 79, 70, 72, 70, 72, 96, 88, 95, 72, 73, 66, 68, 6, 4, 1, 65, + 11, 3, 0, 66, 1, 81, 45, 43, 32, 36, 43, 35, 19, 36, 30, 15, + 22, 15, 5, 1, 66, 16, 10, 13, 13, 10, 6, 13, 7, 7, 8, 1, + 0, 5, 89, 62, 62, 61, 62, 62, 62, 62, 62, 61, 52, 50, 48, 39, + 34, 16, 47, 40, 22, 52, 41, 43, 38, 33, 30, 32, 21, 12, 5, 6, + 75, 77, 98, 68, 26, 17, 12, 3, 5, 0, 70, 73, 84, 0, 40, 27, + 21, 17, 11, 1, 70, 72, 78, 83, 20, 11, 6, 1, 64, 70, 74, 81, + 97, 88, 69, 70, 68, 71, 76, 86, 88, 102, 72, 34, 18, 12, 4, 2, + 71, 77, 80, 86, 62, 88, 79, 73, 66, 70, 1, 5, 6, 71, 4, 8, + 4, 78, 71, 70, 12, 7, 16, 13, 24, 25, 19, 12, 18, 18, 15, 69, + 5, 70, 95, 88, 84, 82, 79, 79, 78, 78, 69, 74, 73, 71, 65, 75, + 82, 93, 84, 96, 76, 70, 70, 66, 68, 65, 1, 64, 65, 67, 65, 0, + 67, 79, 46, 40, 42, 40, 37, 43, 39, 36, 31, 31, 32, 27, 9, 11, + 4, 28, 27, 23, 13, 18, 14, 3, 64, 68, 75, 79, 91, 75, 100, 62, + 62, 62, 62, 62, 56, 53, 48, 46, 42, 39, 33, 26, 11, 67, 45, 44, + 17, 47, 52, 41, 35, 37, 33, 28, 32, 26, 23, 12, 12, 3, 80, 19, + 9, 68, 107, 97, 100, 76, 80, 78, 12, 69, 69, 68, 4, 25, 13, 31, + 65, 59, 57, 48, 41, 27, 24, 67, 78, 96, 72, 41, 29, 20, 9, 10, + 2, 64, 68, 85, 95, 87, 78, 81, 74, 65, 72, 75, 65, 1, 2, 1, + 6, 9, 3, 58, 56, 50, 43, 39, 33, 17, 6, 77, 126, 123, 90, 62, + 62, 62, 62}, + + { + + 62, 8, 76, 62, 8, 76, 104, 85, 15, 10, 7, 12, 43, 46, 54, 14, 87, + 2, 67, 7, 1, 5, 69, 0, 78, 13, 40, 88, 103, 111, 89, 87, 67, 67, + 7, 1, 81, 81, 7, 11, 66, 76, 87, 4, 72, 81, 89, 5, 73, 85, 5, + 72, 78, 88, 2, 69, 70, 75, 66, 5, 22, 0, 0, 0, 75, 89, 97, 66, + 12, 64, 52, 8, 69, 113, 91, 82, 68, 89, 105, 101, 107, 80, 105, 96, 102, + 109, 105, 109, 106, 10, 66, 71, 93, 67, 86, 81, 102, 73, 92, 85, 110, 11, + 77, 70, 107, 87, 78, 74, 66, 4, 64, 74, 7, 4, 67, 3, 64, 64, 2, + 76, 13, 6, 20, 19, 17, 18, 17, 15, 21, 12, 8, 22, 17, 73, 97, 87, + 90, 80, 84, 79, 71, 77, 77, 76, 78, 70, 72, 69, 73, 95, 87, 95, 71, + 73, 66, 68, 5, 5, 1, 65, 9, 4, 64, 67, 1, 81, 44, 42, 32, 35, + 42, 34, 18, 36, 30, 14, 21, 15, 5, 1, 66, 16, 10, 12, 13, 9, 5, + 13, 6, 5, 7, 0, 64, 4, 89, 61, 62, 59, 62, 61, 60, 60, 60, 59, + 50, 48, 46, 36, 32, 13, 45, 39, 20, 49, 39, 41, 36, 30, 28, 29, 19, + 10, 4, 5, 77, 77, 98, 68, 26, 18, 12, 3, 5, 0, 70, 73, 83, 1, + 41, 27, 21, 17, 12, 2, 69, 72, 77, 82, 21, 12, 6, 2, 0, 69, 74, + 80, 96, 88, 67, 70, 68, 70, 75, 85, 88, 101, 71, 35, 19, 12, 4, 3, + 70, 76, 79, 85, 62, 88, 79, 72, 66, 70, 1, 5, 6, 70, 4, 9, 5, + 78, 71, 70, 11, 7, 15, 12, 23, 25, 19, 11, 17, 17, 15, 70, 4, 71, + 94, 88, 83, 82, 78, 79, 77, 77, 69, 74, 73, 71, 66, 75, 82, 93, 83, + 96, 76, 69, 70, 66, 68, 65, 2, 64, 65, 67, 64, 0, 68, 79, 45, 40, + 42, 40, 36, 42, 38, 35, 31, 30, 31, 26, 8, 10, 3, 27, 25, 21, 11, + 17, 13, 2, 65, 68, 75, 78, 91, 76, 99, 62, 62, 62, 62, 60, 54, 51, + 46, 44, 40, 37, 31, 24, 10, 68, 43, 42, 16, 45, 50, 39, 33, 35, 31, + 26, 30, 24, 21, 10, 10, 1, 81, 17, 7, 70, 106, 95, 99, 76, 79, 77, + 14, 68, 68, 68, 5, 27, 14, 33, 65, 58, 55, 46, 38, 25, 21, 69, 80, + 97, 72, 41, 29, 20, 9, 10, 2, 64, 68, 84, 94, 86, 77, 80, 73, 64, + 71, 74, 64, 2, 3, 2, 7, 10, 3, 56, 55, 49, 42, 37, 31, 15, 4, + 78, 126, 122, 90, 62, 62, 62, 62}, + + { + + 61, 8, 76, 61, 8, 76, 102, 83, 16, 10, 6, 10, 41, 45, 54, 14, 85, + 2, 66, 8, 1, 4, 69, 64, 79, 13, 38, 89, 104, 111, 86, 86, 67, 66, + 8, 1, 80, 80, 8, 11, 66, 75, 86, 3, 73, 81, 89, 5, 73, 85, 5, + 72, 78, 88, 3, 69, 70, 75, 65, 5, 22, 0, 0, 0, 75, 89, 97, 65, + 11, 64, 52, 8, 69, 112, 90, 82, 67, 88, 103, 99, 106, 79, 103, 95, 100, + 108, 104, 108, 105, 11, 66, 70, 92, 67, 86, 81, 100, 73, 91, 84, 109, 11, + 77, 69, 105, 86, 77, 74, 66, 4, 64, 73, 8, 5, 68, 3, 64, 64, 2, + 76, 13, 5, 20, 19, 16, 18, 17, 15, 19, 12, 7, 20, 16, 74, 96, 86, + 89, 79, 83, 79, 70, 77, 76, 75, 77, 70, 72, 69, 74, 95, 86, 95, 71, + 73, 65, 68, 4, 6, 1, 65, 8, 5, 64, 68, 1, 82, 42, 41, 31, 34, + 41, 33, 18, 35, 29, 13, 20, 15, 5, 1, 67, 15, 9, 11, 12, 8, 4, + 12, 6, 3, 6, 0, 65, 3, 89, 60, 61, 58, 62, 59, 58, 58, 58, 56, + 47, 46, 43, 34, 29, 11, 43, 37, 19, 47, 37, 38, 33, 28, 25, 27, 17, + 7, 3, 3, 78, 78, 98, 68, 27, 18, 12, 3, 5, 0, 70, 73, 83, 1, + 41, 27, 21, 17, 13, 2, 69, 71, 77, 81, 22, 12, 7, 2, 1, 69, 73, + 80, 95, 87, 66, 69, 67, 70, 74, 84, 87, 100, 71, 35, 19, 12, 4, 4, + 70, 75, 78, 84, 62, 87, 78, 72, 66, 70, 1, 5, 6, 70, 4, 9, 5, + 78, 71, 70, 11, 8, 14, 11, 22, 24, 19, 10, 16, 17, 15, 70, 3, 71, + 93, 87, 82, 81, 78, 78, 77, 76, 69, 74, 73, 70, 66, 75, 82, 92, 82, + 96, 76, 69, 70, 65, 68, 65, 3, 64, 65, 67, 0, 64, 68, 80, 44, 39, + 41, 39, 35, 41, 37, 34, 30, 29, 30, 25, 7, 9, 2, 25, 24, 20, 9, + 15, 12, 1, 65, 69, 75, 78, 91, 76, 98, 62, 62, 61, 61, 57, 52, 49, + 43, 42, 38, 35, 28, 22, 8, 69, 41, 41, 14, 43, 48, 37, 31, 33, 29, + 24, 28, 22, 18, 8, 8, 64, 82, 15, 5, 71, 105, 94, 98, 75, 78, 76, + 15, 67, 67, 68, 6, 28, 16, 34, 64, 56, 54, 43, 35, 22, 18, 72, 82, + 98, 72, 41, 29, 20, 9, 10, 2, 64, 68, 84, 93, 85, 76, 79, 72, 64, + 71, 73, 64, 2, 3, 2, 7, 10, 3, 55, 53, 47, 40, 35, 29, 13, 2, + 79, 125, 120, 89, 62, 62, 62, 62}, + + { + + 60, 8, 76, 60, 8, 76, 100, 82, 16, 10, 6, 9, 40, 44, 54, 14, 83, 2, + 65, 9, 1, 3, 69, 65, 80, 12, 36, 91, 105, 112, 83, 85, 67, 65, 9, 1, + 80, 79, 8, 10, 66, 75, 85, 3, 73, 81, 89, 5, 73, 84, 5, 72, 78, 88, + 3, 69, 70, 75, 65, 5, 22, 0, 0, 0, 74, 89, 97, 65, 10, 64, 52, 8, + 69, 111, 89, 82, 67, 87, 101, 97, 104, 78, 101, 94, 98, 107, 103, 107, 104, 12, + 66, 69, 91, 67, 85, 80, 98, 73, 91, 84, 108, 11, 77, 69, 104, 86, 76, 74, + 66, 4, 64, 72, 8, 5, 68, 3, 64, 64, 2, 76, 13, 5, 19, 19, 15, 18, + 17, 15, 17, 12, 7, 18, 15, 75, 95, 85, 88, 78, 82, 78, 69, 76, 76, 74, + 76, 70, 72, 69, 75, 94, 85, 95, 71, 73, 64, 68, 3, 7, 1, 65, 7, 6, + 65, 69, 1, 83, 41, 40, 31, 33, 40, 32, 17, 34, 29, 12, 19, 15, 5, 1, + 67, 14, 8, 10, 11, 7, 3, 11, 5, 1, 5, 64, 66, 2, 89, 58, 60, 56, + 60, 57, 56, 56, 56, 54, 45, 44, 41, 31, 27, 8, 41, 35, 17, 45, 35, 36, + 31, 26, 23, 24, 15, 5, 2, 1, 79, 79, 98, 68, 27, 18, 12, 3, 5, 0, + 70, 73, 82, 1, 41, 27, 21, 17, 14, 3, 68, 71, 76, 80, 23, 13, 7, 2, + 2, 68, 73, 79, 94, 86, 65, 68, 67, 70, 73, 83, 86, 99, 70, 35, 19, 12, + 4, 5, 69, 74, 77, 83, 62, 87, 78, 71, 66, 70, 1, 5, 6, 70, 4, 9, + 6, 78, 71, 70, 10, 8, 13, 10, 21, 23, 19, 9, 15, 17, 15, 71, 2, 72, + 92, 86, 81, 81, 77, 78, 76, 75, 69, 74, 73, 70, 67, 75, 82, 92, 81, 96, + 76, 69, 70, 64, 68, 65, 4, 64, 65, 67, 1, 65, 68, 81, 43, 39, 41, 38, + 34, 40, 36, 33, 29, 28, 29, 24, 6, 8, 1, 24, 23, 18, 7, 14, 11, 0, + 66, 69, 75, 78, 91, 77, 97, 62, 62, 59, 59, 54, 50, 47, 41, 40, 36, 33, + 26, 20, 7, 70, 39, 39, 13, 41, 46, 35, 29, 31, 27, 22, 26, 20, 16, 6, + 6, 66, 83, 13, 3, 73, 104, 93, 97, 74, 77, 75, 17, 66, 66, 68, 7, 30, + 17, 36, 0, 55, 52, 41, 32, 19, 15, 75, 84, 99, 72, 41, 29, 20, 9, 10, + 2, 64, 68, 83, 92, 84, 75, 78, 71, 0, 70, 72, 0, 3, 4, 2, 8, 10, + 3, 54, 52, 45, 38, 33, 27, 11, 0, 80, 124, 119, 88, 62, 62, 62, 62}, + + { + + 58, 7, 77, 58, 7, 77, 99, 81, 16, 10, 5, 7, 38, 42, 53, 14, 81, 1, + 65, 9, 0, 2, 69, 67, 82, 11, 34, 93, 106, 113, 81, 84, 68, 65, 9, 0, + 80, 78, 8, 9, 66, 75, 85, 2, 74, 81, 90, 5, 73, 84, 4, 73, 78, 88, + 3, 69, 70, 75, 65, 4, 22, 0, 0, 0, 74, 90, 97, 65, 9, 65, 52, 7, + 69, 110, 89, 82, 67, 86, 100, 96, 103, 77, 100, 93, 97, 106, 103, 106, 104, 12, + 66, 69, 91, 67, 85, 80, 97, 73, 91, 84, 107, 11, 77, 69, 103, 86, 76, 74, + 66, 4, 64, 72, 8, 5, 69, 2, 64, 65, 2, 76, 12, 4, 18, 19, 14, 17, + 17, 14, 15, 11, 6, 16, 14, 76, 95, 85, 88, 78, 82, 78, 68, 76, 76, 74, + 75, 71, 72, 69, 77, 94, 85, 95, 71, 74, 64, 68, 2, 7, 1, 65, 5, 6, + 66, 70, 1, 84, 39, 39, 30, 32, 39, 31, 16, 33, 28, 10, 18, 14, 4, 1, + 68, 13, 7, 9, 10, 6, 2, 10, 4, 64, 3, 65, 68, 0, 89, 56, 58, 54, + 58, 55, 53, 53, 53, 51, 42, 41, 38, 28, 24, 5, 39, 33, 15, 42, 32, 33, + 28, 23, 20, 21, 12, 2, 1, 64, 81, 80, 99, 68, 27, 18, 12, 3, 5, 64, + 70, 73, 82, 1, 41, 27, 21, 17, 15, 3, 68, 71, 76, 80, 23, 13, 7, 2, + 2, 68, 73, 79, 93, 86, 64, 68, 67, 70, 73, 83, 86, 98, 70, 35, 19, 12, + 4, 5, 69, 74, 77, 83, 62, 87, 78, 71, 66, 70, 1, 5, 6, 70, 4, 9, + 6, 78, 71, 71, 9, 8, 12, 9, 20, 22, 18, 7, 13, 16, 14, 72, 0, 73, + 92, 86, 80, 81, 77, 78, 76, 75, 70, 74, 73, 70, 68, 75, 82, 92, 81, 97, + 76, 69, 70, 64, 69, 65, 4, 65, 66, 67, 1, 66, 69, 82, 42, 38, 40, 37, + 32, 39, 35, 32, 28, 27, 28, 23, 5, 6, 64, 22, 21, 16, 5, 12, 9, 64, + 67, 70, 75, 78, 91, 78, 97, 62, 61, 57, 56, 51, 47, 44, 38, 37, 33, 30, + 23, 17, 5, 71, 37, 37, 11, 39, 43, 32, 26, 28, 24, 20, 23, 17, 13, 4, + 3, 68, 85, 11, 1, 75, 103, 92, 96, 74, 77, 75, 18, 66, 66, 68, 7, 31, + 18, 37, 0, 53, 50, 38, 28, 16, 12, 78, 87, 101, 72, 41, 28, 19, 9, 10, + 2, 65, 68, 83, 92, 84, 75, 78, 70, 0, 70, 72, 0, 3, 4, 2, 8, 10, + 2, 52, 50, 43, 36, 31, 25, 8, 65, 81, 124, 118, 88, 62, 62, 62, 62}, + + { + + 57, 7, 77, 57, 7, 77, 97, 79, 17, 11, 5, 6, 37, 41, 53, 14, 78, 1, + 64, 10, 0, 2, 68, 68, 83, 11, 33, 94, 107, 113, 78, 82, 68, 64, 10, 0, + 79, 76, 9, 9, 65, 74, 84, 2, 74, 80, 90, 5, 72, 83, 4, 73, 77, 88, + 4, 68, 69, 74, 64, 4, 22, 0, 0, 0, 73, 90, 97, 64, 9, 65, 52, 7, + 69, 108, 88, 82, 66, 84, 98, 94, 101, 75, 98, 91, 95, 104, 102, 105, 103, 13, + 65, 68, 90, 66, 84, 79, 95, 72, 90, 83, 105, 12, 76, 68, 101, 85, 75, 73, + 65, 5, 0, 71, 9, 6, 69, 2, 0, 65, 2, 75, 12, 4, 18, 19, 14, 17, + 17, 14, 14, 11, 6, 15, 13, 76, 94, 84, 87, 77, 81, 77, 67, 75, 75, 73, + 73, 71, 72, 68, 78, 93, 84, 95, 70, 74, 0, 67, 2, 8, 1, 65, 4, 7, + 66, 71, 1, 84, 38, 39, 30, 32, 39, 31, 16, 33, 28, 9, 18, 14, 4, 1, + 68, 13, 7, 9, 10, 6, 1, 10, 4, 65, 2, 65, 69, 64, 89, 55, 57, 53, + 57, 54, 51, 51, 51, 49, 40, 39, 36, 26, 22, 3, 38, 32, 14, 40, 30, 31, + 26, 21, 18, 19, 10, 0, 1, 65, 82, 80, 99, 67, 28, 19, 13, 4, 5, 64, + 69, 72, 81, 2, 42, 28, 22, 17, 16, 4, 67, 70, 75, 79, 24, 14, 8, 3, + 3, 67, 72, 78, 91, 85, 1, 67, 66, 69, 72, 82, 85, 96, 69, 36, 20, 13, + 5, 6, 68, 73, 76, 82, 62, 86, 77, 70, 66, 69, 1, 6, 7, 69, 5, 10, + 7, 77, 71, 71, 9, 9, 12, 9, 19, 22, 18, 6, 12, 16, 14, 72, 64, 73, + 91, 85, 79, 80, 76, 77, 75, 74, 70, 73, 72, 69, 68, 74, 81, 91, 80, 97, + 76, 68, 70, 0, 69, 65, 5, 65, 66, 66, 2, 66, 69, 82, 42, 38, 40, 37, + 31, 39, 35, 32, 28, 27, 28, 23, 5, 5, 65, 21, 20, 15, 4, 11, 8, 64, + 67, 70, 74, 77, 90, 78, 96, 60, 59, 55, 54, 49, 45, 42, 36, 35, 31, 28, + 21, 15, 4, 71, 36, 36, 10, 38, 41, 30, 24, 26, 22, 18, 21, 15, 11, 3, + 1, 69, 86, 10, 0, 76, 101, 90, 94, 73, 76, 74, 20, 65, 65, 68, 8, 33, + 20, 39, 1, 52, 49, 36, 25, 14, 10, 80, 89, 102, 71, 42, 28, 19, 9, 11, + 2, 65, 68, 82, 91, 83, 74, 77, 68, 1, 69, 71, 1, 4, 5, 3, 9, 11, + 2, 51, 49, 42, 35, 30, 24, 6, 66, 81, 123, 116, 87, 62, 62, 62, 62}, + + { + + 56, 7, 77, 56, 7, 77, 95, 78, 17, 11, 5, 5, 36, 40, 53, 14, 76, 1, + 0, 11, 0, 1, 68, 69, 84, 10, 31, 96, 108, 114, 75, 81, 68, 0, 11, 0, + 79, 75, 9, 8, 65, 74, 83, 2, 74, 80, 90, 5, 72, 82, 4, 73, 77, 88, + 4, 68, 69, 74, 64, 4, 22, 0, 0, 0, 72, 90, 97, 64, 8, 65, 52, 7, + 69, 107, 87, 82, 66, 83, 96, 92, 100, 74, 96, 90, 93, 103, 101, 104, 102, 14, + 65, 67, 89, 66, 84, 78, 93, 72, 90, 83, 104, 12, 76, 68, 100, 85, 74, 73, + 65, 5, 0, 70, 9, 6, 70, 2, 0, 65, 2, 75, 12, 4, 17, 19, 13, 17, + 17, 14, 12, 11, 6, 13, 12, 77, 93, 83, 86, 76, 80, 76, 66, 74, 75, 72, + 72, 71, 72, 68, 79, 93, 83, 95, 70, 74, 1, 67, 1, 9, 1, 65, 3, 8, + 67, 72, 1, 85, 36, 38, 29, 31, 38, 30, 15, 32, 28, 8, 17, 14, 4, 1, + 68, 12, 6, 8, 9, 5, 0, 9, 3, 67, 1, 66, 70, 65, 89, 53, 56, 51, + 55, 52, 49, 49, 49, 46, 38, 37, 33, 23, 20, 0, 36, 30, 12, 38, 28, 29, + 24, 19, 16, 16, 8, 65, 0, 67, 83, 81, 99, 67, 28, 19, 13, 4, 5, 64, + 69, 72, 80, 2, 42, 28, 22, 17, 17, 4, 66, 70, 74, 78, 25, 15, 8, 3, + 4, 67, 72, 77, 90, 84, 2, 66, 66, 69, 71, 81, 84, 95, 69, 36, 20, 13, + 5, 7, 67, 72, 75, 81, 62, 86, 77, 70, 66, 69, 1, 6, 7, 69, 5, 10, + 8, 77, 71, 71, 8, 9, 11, 8, 18, 21, 18, 5, 11, 16, 14, 73, 65, 74, + 90, 84, 78, 80, 76, 77, 74, 73, 70, 73, 72, 69, 69, 74, 81, 91, 79, 97, + 76, 68, 70, 1, 69, 65, 6, 65, 66, 66, 3, 67, 69, 83, 41, 38, 40, 36, + 30, 38, 34, 31, 27, 26, 27, 22, 4, 4, 66, 19, 19, 13, 2, 10, 7, 65, + 68, 70, 74, 77, 90, 79, 95, 58, 57, 53, 52, 46, 43, 40, 33, 33, 29, 26, + 19, 13, 3, 72, 34, 34, 9, 36, 39, 28, 22, 24, 20, 16, 19, 13, 9, 1, + 64, 71, 87, 8, 65, 78, 100, 89, 93, 72, 75, 73, 22, 64, 64, 68, 9, 34, + 21, 40, 2, 51, 47, 33, 22, 11, 7, 83, 91, 103, 71, 42, 28, 19, 9, 11, + 2, 65, 68, 81, 90, 82, 73, 76, 67, 2, 68, 70, 2, 5, 6, 3, 10, 11, + 2, 50, 47, 40, 33, 28, 22, 4, 68, 82, 122, 115, 86, 62, 62, 62, 62}, + + { + + 55, 7, 77, 55, 7, 77, 93, 76, 18, 11, 4, 3, 34, 39, 53, 14, 74, 1, + 1, 12, 0, 0, 68, 70, 85, 10, 29, 97, 109, 114, 72, 80, 68, 1, 12, 0, + 78, 74, 10, 8, 65, 73, 82, 1, 75, 80, 90, 5, 72, 82, 4, 73, 77, 88, + 5, 68, 69, 74, 0, 4, 22, 0, 0, 0, 72, 90, 97, 0, 7, 65, 52, 7, + 69, 106, 86, 82, 65, 82, 94, 90, 98, 73, 94, 89, 91, 102, 100, 103, 101, 15, + 65, 66, 88, 66, 83, 78, 91, 72, 89, 82, 103, 12, 76, 67, 98, 84, 73, 73, + 65, 5, 0, 69, 10, 7, 70, 2, 0, 65, 2, 75, 12, 3, 17, 19, 12, 17, + 17, 14, 10, 11, 5, 11, 11, 78, 92, 82, 85, 75, 79, 76, 65, 74, 74, 71, + 71, 71, 72, 68, 80, 92, 82, 95, 70, 74, 2, 67, 0, 10, 1, 65, 2, 9, + 67, 73, 1, 86, 35, 37, 29, 30, 37, 29, 15, 31, 27, 7, 16, 14, 4, 1, + 69, 11, 5, 7, 8, 4, 64, 8, 3, 69, 0, 66, 71, 66, 89, 52, 54, 50, + 53, 50, 47, 47, 47, 44, 35, 35, 31, 21, 17, 65, 34, 28, 11, 36, 26, 26, + 21, 17, 13, 14, 6, 68, 64, 69, 84, 82, 99, 67, 29, 19, 13, 4, 5, 64, + 69, 72, 80, 2, 42, 28, 22, 17, 18, 5, 66, 69, 74, 77, 26, 15, 9, 3, + 5, 66, 71, 77, 89, 83, 3, 65, 65, 69, 70, 80, 83, 94, 68, 36, 20, 13, + 5, 8, 67, 71, 74, 80, 62, 85, 76, 69, 66, 69, 1, 6, 7, 69, 5, 10, + 8, 77, 71, 71, 8, 10, 10, 7, 17, 20, 18, 4, 10, 16, 14, 73, 66, 74, + 89, 83, 77, 79, 75, 76, 74, 72, 70, 73, 72, 68, 69, 74, 81, 90, 78, 97, + 76, 68, 70, 2, 69, 65, 7, 65, 66, 66, 4, 68, 69, 84, 40, 37, 39, 35, + 29, 37, 33, 30, 26, 25, 26, 21, 3, 3, 67, 18, 18, 12, 0, 8, 6, 66, + 68, 71, 74, 77, 90, 79, 94, 56, 55, 51, 50, 43, 41, 38, 31, 31, 27, 24, + 16, 11, 1, 73, 32, 33, 7, 34, 37, 26, 20, 22, 18, 14, 17, 11, 6, 64, + 66, 73, 88, 6, 67, 79, 99, 88, 92, 71, 74, 72, 23, 0, 0, 68, 10, 36, + 23, 42, 3, 49, 46, 31, 19, 8, 4, 86, 93, 104, 71, 42, 28, 19, 9, 11, + 2, 65, 68, 81, 89, 81, 72, 75, 66, 2, 68, 69, 2, 5, 6, 3, 10, 11, + 2, 49, 46, 38, 31, 26, 20, 2, 70, 83, 121, 113, 85, 62, 62, 62, 62}, + + { + + 53, 7, 77, 53, 7, 77, 92, 75, 18, 11, 4, 2, 33, 37, 53, 14, 71, 0, + 2, 12, 0, 64, 68, 71, 86, 9, 27, 99, 110, 115, 69, 79, 68, 2, 12, 0, + 78, 73, 10, 7, 65, 73, 82, 1, 75, 79, 90, 5, 72, 81, 3, 74, 77, 88, + 5, 67, 69, 73, 0, 4, 22, 0, 0, 0, 71, 91, 97, 0, 6, 65, 52, 7, + 69, 105, 85, 82, 65, 80, 93, 88, 97, 72, 93, 87, 89, 101, 100, 102, 101, 15, + 65, 65, 87, 66, 83, 77, 89, 72, 89, 82, 102, 12, 75, 67, 97, 84, 73, 73, + 64, 5, 0, 69, 10, 7, 71, 1, 0, 65, 2, 75, 12, 3, 16, 19, 12, 17, + 17, 13, 8, 11, 5, 9, 10, 79, 91, 81, 84, 74, 79, 75, 64, 73, 74, 70, + 70, 71, 72, 67, 81, 92, 81, 95, 70, 74, 2, 67, 64, 11, 1, 65, 0, 10, + 68, 74, 1, 87, 33, 36, 28, 29, 36, 28, 14, 30, 27, 5, 15, 13, 4, 1, + 69, 10, 5, 6, 8, 3, 65, 7, 2, 71, 64, 67, 72, 67, 89, 50, 53, 48, + 51, 48, 45, 44, 45, 41, 33, 33, 28, 18, 15, 68, 32, 27, 9, 33, 24, 24, + 19, 14, 11, 11, 4, 70, 65, 70, 86, 83, 99, 67, 29, 20, 13, 4, 5, 64, + 69, 72, 79, 3, 43, 28, 22, 17, 19, 5, 65, 69, 73, 77, 27, 16, 9, 3, + 5, 66, 71, 76, 88, 83, 4, 65, 65, 69, 69, 79, 83, 93, 68, 37, 20, 13, + 5, 9, 66, 71, 73, 79, 62, 85, 76, 69, 66, 69, 1, 6, 7, 68, 5, 10, + 9, 77, 71, 71, 7, 10, 9, 6, 16, 19, 18, 2, 9, 15, 14, 74, 67, 75, + 89, 83, 76, 79, 75, 76, 73, 71, 71, 73, 72, 68, 70, 74, 81, 90, 77, 97, + 76, 67, 70, 2, 69, 65, 8, 65, 67, 66, 5, 68, 70, 85, 39, 37, 39, 34, + 28, 36, 32, 29, 25, 24, 25, 20, 2, 2, 68, 16, 16, 10, 65, 7, 5, 67, + 69, 71, 74, 77, 90, 80, 94, 53, 52, 49, 47, 40, 39, 36, 28, 29, 25, 22, + 14, 9, 0, 74, 30, 31, 6, 32, 35, 24, 18, 19, 16, 12, 15, 9, 4, 66, + 68, 75, 89, 4, 69, 81, 98, 87, 91, 71, 73, 71, 25, 1, 1, 68, 11, 37, + 24, 43, 3, 48, 44, 28, 16, 5, 1, 89, 95, 105, 71, 42, 28, 19, 9, 11, + 2, 65, 68, 80, 88, 80, 71, 75, 65, 3, 67, 68, 3, 6, 7, 4, 11, 12, + 2, 47, 44, 36, 29, 24, 18, 0, 72, 84, 120, 112, 85, 62, 62, 62, 62}, + + { + + 52, 7, 77, 52, 7, 77, 90, 73, 18, 11, 3, 0, 31, 36, 53, 14, 69, 0, + 3, 13, 0, 64, 67, 72, 87, 9, 25, 101, 111, 115, 66, 77, 68, 3, 13, 0, + 78, 72, 10, 7, 65, 73, 81, 1, 76, 79, 90, 5, 72, 80, 3, 74, 77, 88, + 6, 67, 68, 73, 1, 4, 22, 0, 0, 0, 71, 91, 97, 1, 5, 65, 52, 7, + 69, 104, 84, 82, 64, 79, 91, 86, 95, 71, 91, 86, 87, 100, 99, 101, 100, 16, + 65, 64, 86, 66, 82, 76, 87, 72, 89, 81, 100, 13, 75, 66, 96, 83, 72, 73, + 64, 6, 1, 68, 11, 8, 71, 1, 0, 65, 2, 75, 12, 2, 15, 19, 11, 17, + 17, 13, 6, 11, 4, 8, 9, 80, 90, 80, 83, 73, 78, 74, 0, 72, 73, 69, + 69, 71, 72, 67, 82, 91, 80, 95, 69, 74, 3, 67, 65, 12, 1, 65, 64, 11, + 68, 75, 1, 87, 32, 35, 28, 28, 35, 27, 13, 30, 27, 4, 14, 13, 4, 1, + 70, 10, 4, 5, 7, 2, 66, 7, 1, 73, 65, 68, 73, 68, 89, 48, 52, 46, + 49, 47, 43, 42, 43, 39, 31, 31, 26, 16, 13, 70, 30, 25, 7, 31, 22, 22, + 17, 12, 9, 8, 2, 73, 66, 72, 87, 83, 99, 67, 29, 20, 13, 4, 5, 64, + 69, 72, 79, 3, 43, 28, 22, 17, 20, 6, 64, 69, 72, 76, 28, 17, 9, 4, + 6, 65, 70, 76, 87, 82, 6, 64, 65, 68, 68, 78, 82, 92, 67, 37, 21, 13, + 5, 10, 65, 70, 72, 78, 62, 85, 76, 68, 66, 69, 1, 6, 7, 68, 5, 11, + 9, 77, 71, 71, 6, 11, 8, 5, 15, 19, 18, 1, 8, 15, 14, 75, 68, 75, + 88, 82, 75, 78, 74, 75, 73, 70, 71, 73, 72, 68, 71, 74, 81, 89, 76, 97, + 76, 67, 70, 3, 69, 65, 9, 65, 67, 66, 6, 69, 70, 85, 38, 37, 38, 34, + 27, 35, 31, 28, 25, 23, 24, 19, 1, 1, 69, 15, 15, 9, 67, 6, 4, 68, + 70, 72, 74, 76, 90, 80, 93, 51, 50, 47, 45, 38, 37, 34, 26, 27, 23, 20, + 12, 7, 65, 75, 28, 29, 5, 30, 33, 22, 16, 17, 14, 10, 13, 7, 1, 68, + 70, 77, 90, 2, 71, 82, 97, 85, 90, 70, 72, 70, 27, 2, 2, 68, 12, 39, + 26, 45, 4, 46, 42, 26, 13, 3, 65, 91, 97, 106, 71, 42, 28, 19, 9, 11, + 2, 65, 68, 80, 87, 79, 70, 74, 64, 4, 67, 67, 4, 7, 8, 4, 11, 12, + 2, 46, 43, 35, 28, 22, 16, 65, 74, 85, 119, 111, 84, 62, 62, 62, 62}, + + { + + 51, 7, 78, 51, 7, 78, 88, 72, 19, 11, 3, 64, 30, 35, 53, 14, 67, 0, + 3, 14, 0, 65, 67, 73, 88, 8, 24, 102, 112, 116, 0, 76, 68, 3, 14, 0, + 77, 71, 11, 6, 64, 72, 80, 0, 76, 79, 90, 5, 71, 80, 3, 74, 76, 88, + 6, 67, 68, 73, 1, 4, 22, 0, 0, 0, 70, 91, 97, 1, 5, 66, 52, 7, + 69, 103, 84, 82, 64, 78, 89, 84, 94, 70, 89, 85, 85, 99, 98, 100, 99, 17, + 65, 0, 86, 65, 82, 76, 85, 72, 88, 81, 99, 13, 75, 66, 94, 83, 71, 72, + 64, 6, 1, 67, 11, 8, 72, 1, 0, 65, 2, 75, 12, 2, 15, 19, 10, 17, + 17, 13, 4, 11, 4, 6, 8, 81, 90, 79, 83, 73, 77, 74, 1, 72, 73, 69, + 67, 71, 72, 67, 83, 91, 79, 95, 69, 74, 4, 66, 66, 12, 1, 65, 65, 12, + 69, 76, 1, 88, 30, 34, 27, 28, 34, 27, 13, 29, 26, 3, 13, 13, 4, 1, + 70, 9, 3, 4, 6, 2, 67, 6, 1, 75, 66, 68, 74, 69, 89, 47, 50, 45, + 47, 45, 41, 40, 41, 36, 28, 29, 23, 13, 10, 73, 28, 23, 6, 29, 19, 19, + 14, 10, 6, 6, 64, 75, 67, 74, 88, 84, 99, 66, 30, 20, 14, 4, 5, 64, + 69, 72, 78, 3, 43, 29, 22, 17, 21, 6, 64, 68, 72, 75, 29, 17, 10, 4, + 7, 65, 70, 75, 86, 81, 7, 0, 64, 68, 68, 78, 81, 90, 67, 37, 21, 13, + 6, 10, 65, 69, 72, 77, 62, 84, 75, 68, 66, 69, 1, 6, 7, 68, 6, 11, + 10, 77, 71, 72, 6, 11, 7, 4, 14, 18, 18, 0, 7, 15, 14, 75, 69, 76, + 87, 81, 74, 78, 74, 75, 72, 70, 71, 72, 71, 67, 71, 74, 81, 89, 75, 97, + 76, 67, 70, 4, 69, 65, 10, 66, 67, 66, 6, 70, 70, 86, 37, 36, 38, 33, + 26, 35, 31, 27, 24, 23, 23, 18, 0, 0, 70, 13, 14, 7, 69, 4, 2, 69, + 70, 72, 74, 76, 89, 81, 92, 49, 48, 45, 43, 35, 35, 32, 23, 25, 20, 18, + 9, 5, 66, 75, 27, 28, 3, 28, 30, 20, 14, 15, 12, 8, 10, 5, 64, 70, + 72, 78, 92, 0, 73, 84, 96, 84, 89, 69, 71, 69, 28, 3, 3, 68, 13, 40, + 27, 46, 5, 45, 41, 23, 10, 0, 67, 94, 99, 108, 70, 42, 28, 19, 9, 11, + 2, 65, 68, 79, 86, 79, 69, 73, 0, 4, 66, 66, 4, 7, 8, 4, 12, 12, + 2, 45, 41, 33, 26, 21, 15, 68, 75, 86, 118, 109, 83, 62, 62, 62, 62}, + + { + + 50, 7, 78, 50, 7, 78, 86, 70, 19, 11, 2, 66, 28, 33, 53, 14, 64, 64, + 4, 14, 0, 66, 67, 74, 89, 8, 22, 104, 113, 116, 3, 75, 68, 4, 14, 0, + 77, 70, 11, 6, 64, 72, 80, 0, 77, 78, 90, 5, 71, 79, 2, 74, 76, 88, + 7, 66, 68, 72, 2, 4, 22, 0, 0, 0, 70, 91, 97, 2, 4, 66, 52, 7, + 69, 102, 83, 82, 0, 76, 88, 82, 92, 69, 88, 83, 83, 98, 97, 99, 99, 18, + 65, 1, 85, 65, 81, 75, 83, 72, 88, 80, 98, 13, 74, 65, 93, 82, 71, 72, + 0, 6, 1, 66, 12, 9, 72, 0, 0, 65, 2, 75, 12, 1, 14, 19, 10, 17, + 17, 12, 2, 11, 3, 4, 7, 82, 89, 78, 82, 72, 76, 73, 2, 71, 72, 68, + 66, 71, 72, 66, 84, 90, 78, 95, 69, 74, 4, 66, 67, 13, 1, 65, 67, 13, + 69, 77, 1, 89, 29, 33, 27, 27, 33, 26, 12, 28, 26, 2, 12, 13, 4, 1, + 71, 8, 3, 3, 6, 1, 68, 5, 0, 77, 67, 69, 75, 70, 89, 45, 49, 43, + 45, 43, 39, 37, 39, 34, 26, 27, 21, 11, 8, 75, 26, 22, 4, 26, 17, 17, + 12, 7, 4, 3, 66, 78, 68, 75, 90, 85, 99, 66, 30, 21, 14, 4, 5, 64, + 69, 72, 78, 4, 44, 29, 22, 17, 22, 7, 0, 68, 71, 74, 30, 18, 10, 4, + 8, 64, 69, 75, 85, 81, 8, 0, 64, 68, 67, 77, 81, 89, 66, 38, 21, 13, + 6, 11, 64, 68, 71, 76, 62, 84, 75, 67, 66, 69, 1, 6, 7, 67, 6, 11, + 10, 77, 71, 72, 5, 12, 6, 3, 13, 17, 18, 64, 6, 14, 14, 76, 70, 76, + 86, 81, 73, 77, 73, 74, 72, 69, 71, 72, 71, 67, 72, 74, 81, 88, 74, 97, + 76, 66, 70, 4, 69, 65, 11, 66, 67, 66, 7, 70, 71, 87, 36, 36, 37, 32, + 25, 34, 30, 26, 23, 22, 22, 17, 64, 64, 71, 12, 12, 6, 71, 3, 1, 70, + 71, 73, 74, 76, 89, 81, 91, 47, 46, 43, 40, 32, 33, 30, 21, 23, 18, 16, + 7, 3, 68, 76, 25, 26, 2, 26, 28, 18, 12, 13, 10, 6, 8, 3, 67, 72, + 74, 80, 93, 65, 75, 85, 95, 83, 88, 69, 70, 68, 30, 4, 4, 68, 14, 42, + 29, 48, 5, 43, 39, 21, 7, 66, 70, 97, 101, 109, 70, 42, 28, 19, 9, 11, + 2, 65, 68, 79, 85, 78, 68, 72, 1, 5, 66, 65, 5, 8, 9, 5, 12, 13, + 2, 43, 40, 31, 24, 19, 13, 70, 77, 87, 117, 108, 83, 62, 62, 62, 62}, + + { + + 48, 6, 78, 48, 6, 78, 85, 69, 19, 11, 2, 67, 27, 32, 53, 14, 1, 64, + 5, 15, 0, 67, 67, 75, 91, 7, 20, 106, 114, 117, 5, 74, 68, 5, 15, 0, + 77, 69, 11, 5, 64, 72, 79, 64, 77, 78, 91, 5, 71, 79, 2, 75, 76, 88, + 7, 66, 68, 72, 2, 4, 22, 0, 0, 0, 69, 92, 97, 2, 3, 66, 52, 7, + 69, 101, 82, 82, 0, 75, 86, 80, 91, 68, 86, 82, 82, 97, 97, 98, 98, 18, + 65, 2, 84, 65, 81, 75, 82, 72, 88, 80, 97, 13, 74, 65, 92, 82, 70, 72, + 0, 6, 1, 66, 12, 9, 73, 0, 0, 65, 2, 75, 12, 1, 13, 19, 9, 17, + 17, 12, 0, 11, 3, 2, 6, 83, 88, 77, 81, 71, 76, 73, 3, 71, 72, 67, + 65, 71, 72, 66, 86, 90, 77, 95, 69, 75, 5, 66, 68, 14, 1, 65, 68, 14, + 70, 78, 1, 90, 27, 32, 26, 26, 32, 25, 11, 27, 25, 0, 11, 12, 4, 1, + 71, 7, 2, 2, 5, 0, 69, 4, 64, 79, 69, 70, 76, 71, 89, 43, 47, 41, + 43, 41, 37, 35, 37, 31, 23, 25, 18, 8, 5, 78, 24, 20, 2, 24, 15, 14, + 9, 5, 1, 0, 68, 80, 69, 77, 91, 86, 100, 66, 30, 21, 14, 4, 5, 64, + 69, 72, 77, 4, 44, 29, 22, 17, 23, 7, 0, 68, 71, 74, 31, 18, 10, 4, + 8, 64, 69, 74, 84, 80, 9, 1, 64, 68, 66, 76, 80, 88, 66, 38, 21, 13, + 6, 12, 64, 68, 70, 76, 62, 84, 75, 67, 66, 69, 1, 6, 7, 67, 6, 11, + 11, 77, 71, 72, 4, 12, 5, 2, 12, 16, 18, 66, 4, 14, 14, 77, 71, 77, + 86, 80, 72, 77, 73, 74, 71, 68, 72, 72, 71, 67, 73, 74, 81, 88, 74, 98, + 76, 66, 70, 5, 69, 65, 11, 66, 68, 66, 8, 71, 71, 88, 35, 35, 37, 31, + 23, 33, 29, 25, 22, 21, 21, 16, 65, 65, 72, 10, 11, 4, 73, 1, 0, 71, + 72, 73, 74, 76, 89, 82, 91, 44, 43, 41, 38, 29, 30, 27, 18, 21, 16, 14, + 4, 1, 69, 77, 23, 24, 0, 24, 26, 15, 9, 10, 7, 4, 6, 0, 69, 74, + 77, 82, 94, 67, 77, 87, 94, 82, 87, 68, 69, 68, 31, 5, 4, 68, 14, 43, + 30, 49, 6, 42, 37, 18, 4, 69, 73, 100, 103, 110, 70, 42, 28, 19, 9, 11, + 2, 65, 68, 78, 85, 77, 67, 72, 2, 5, 65, 65, 5, 8, 9, 5, 13, 13, + 1, 42, 38, 29, 22, 17, 11, 72, 79, 88, 117, 107, 82, 62, 62, 62, 62}, + + { + + 47, 6, 78, 47, 6, 78, 83, 68, 20, 11, 2, 68, 26, 31, 53, 14, 3, 64, + 6, 16, 0, 67, 66, 76, 92, 6, 18, 107, 115, 118, 8, 72, 68, 6, 16, 0, + 76, 68, 12, 4, 64, 71, 78, 64, 77, 78, 91, 5, 71, 78, 2, 75, 76, 88, + 7, 66, 67, 72, 2, 4, 22, 0, 0, 0, 68, 92, 97, 2, 2, 66, 52, 7, + 69, 100, 81, 82, 0, 74, 84, 78, 89, 66, 84, 81, 80, 96, 96, 97, 97, 19, + 64, 3, 83, 65, 80, 74, 80, 72, 87, 80, 95, 14, 74, 65, 90, 82, 69, 72, + 0, 7, 2, 65, 12, 9, 73, 0, 1, 65, 2, 74, 12, 1, 13, 19, 8, 17, + 17, 12, 65, 11, 3, 1, 5, 83, 87, 76, 80, 70, 75, 72, 4, 70, 72, 66, + 64, 71, 72, 66, 87, 89, 76, 95, 68, 75, 6, 66, 69, 15, 1, 65, 69, 15, + 71, 79, 1, 90, 26, 31, 26, 25, 31, 24, 11, 27, 25, 64, 10, 12, 4, 1, + 71, 7, 1, 1, 4, 64, 70, 4, 64, 80, 70, 70, 77, 72, 89, 42, 46, 40, + 42, 40, 35, 33, 35, 29, 21, 23, 16, 5, 3, 81, 23, 18, 1, 22, 13, 12, + 7, 3, 64, 65, 70, 82, 69, 79, 92, 86, 100, 66, 31, 21, 14, 5, 5, 64, + 68, 72, 76, 4, 44, 29, 23, 17, 24, 8, 1, 67, 70, 73, 32, 19, 11, 5, + 9, 0, 69, 73, 83, 79, 11, 2, 0, 67, 65, 75, 79, 87, 65, 38, 22, 14, + 6, 13, 0, 67, 69, 75, 62, 83, 74, 66, 66, 69, 1, 7, 8, 67, 6, 12, + 12, 77, 71, 72, 4, 12, 4, 2, 11, 16, 18, 67, 3, 14, 14, 77, 72, 78, + 85, 79, 71, 77, 72, 74, 70, 67, 72, 72, 71, 66, 73, 74, 81, 88, 73, 98, + 76, 66, 70, 6, 69, 65, 12, 66, 68, 66, 9, 72, 71, 88, 34, 35, 37, 31, + 22, 32, 28, 24, 22, 20, 20, 16, 65, 66, 73, 9, 10, 2, 75, 0, 64, 71, + 72, 73, 73, 75, 89, 83, 90, 42, 41, 39, 36, 27, 28, 25, 16, 19, 14, 12, + 2, 64, 70, 78, 21, 23, 64, 22, 24, 13, 7, 8, 5, 2, 4, 65, 71, 75, + 79, 84, 95, 69, 79, 89, 93, 80, 85, 67, 68, 67, 33, 6, 5, 68, 15, 45, + 31, 51, 7, 41, 36, 16, 1, 71, 76, 102, 105, 111, 70, 42, 28, 19, 9, 12, + 2, 65, 68, 77, 84, 76, 66, 71, 4, 6, 64, 64, 6, 9, 10, 5, 14, 13, + 1, 41, 37, 28, 21, 15, 9, 74, 81, 88, 116, 105, 81, 62, 62, 62, 62}, + + { + + 46, 6, 78, 46, 6, 78, 81, 66, 20, 11, 1, 70, 24, 29, 53, 14, 6, 65, + 7, 16, 0, 68, 66, 77, 93, 6, 16, 109, 116, 118, 11, 71, 68, 7, 16, 0, + 76, 67, 12, 4, 64, 71, 78, 64, 78, 77, 91, 5, 71, 77, 1, 75, 76, 88, + 8, 65, 67, 71, 3, 4, 22, 0, 0, 0, 68, 92, 97, 3, 1, 66, 52, 7, + 69, 99, 80, 82, 1, 72, 83, 76, 88, 65, 83, 79, 78, 95, 95, 96, 97, 20, + 64, 4, 82, 65, 80, 73, 78, 72, 87, 79, 94, 14, 73, 64, 89, 81, 69, 72, + 1, 7, 2, 64, 13, 10, 74, 64, 1, 65, 2, 74, 12, 0, 12, 19, 8, 17, + 17, 11, 67, 11, 2, 64, 4, 84, 86, 75, 79, 69, 74, 71, 5, 69, 71, 65, + 0, 71, 72, 65, 88, 89, 75, 95, 68, 75, 6, 66, 70, 16, 1, 65, 71, 16, + 71, 80, 1, 91, 24, 30, 25, 24, 30, 23, 10, 26, 25, 65, 9, 12, 4, 1, + 72, 6, 1, 0, 4, 65, 71, 3, 65, 82, 71, 71, 78, 73, 89, 40, 45, 38, + 40, 38, 33, 30, 33, 26, 19, 21, 13, 3, 1, 83, 21, 17, 64, 19, 11, 10, + 5, 0, 66, 68, 72, 85, 70, 80, 94, 87, 100, 66, 31, 22, 14, 5, 5, 64, + 68, 72, 76, 5, 45, 29, 23, 17, 25, 8, 2, 67, 69, 72, 33, 20, 11, 5, + 10, 0, 68, 73, 82, 79, 12, 2, 0, 67, 64, 74, 79, 86, 65, 39, 22, 14, + 6, 14, 1, 66, 68, 74, 62, 83, 74, 66, 66, 69, 1, 7, 8, 66, 6, 12, + 12, 77, 71, 72, 3, 13, 3, 1, 10, 15, 18, 68, 2, 13, 14, 78, 73, 78, + 84, 79, 70, 76, 72, 73, 70, 66, 72, 72, 71, 66, 74, 74, 81, 87, 72, 98, + 76, 65, 70, 6, 69, 65, 13, 66, 68, 66, 10, 72, 72, 89, 33, 35, 36, 30, + 21, 31, 27, 23, 21, 19, 19, 15, 66, 67, 74, 7, 8, 1, 77, 64, 65, 72, + 73, 74, 73, 75, 89, 83, 89, 40, 39, 37, 33, 24, 26, 23, 13, 17, 12, 10, + 0, 66, 72, 79, 19, 21, 65, 20, 22, 11, 5, 6, 3, 0, 2, 67, 74, 77, + 81, 86, 96, 71, 81, 90, 92, 79, 84, 67, 67, 66, 35, 7, 6, 68, 16, 46, + 33, 52, 7, 39, 34, 13, 65, 74, 79, 105, 107, 112, 70, 42, 28, 19, 9, 12, + 2, 65, 68, 77, 83, 75, 65, 70, 5, 7, 64, 0, 7, 10, 11, 6, 14, 14, + 1, 39, 35, 26, 19, 13, 7, 76, 83, 89, 115, 104, 81, 62, 62, 62, 62}, + + { + + 45, 6, 79, 45, 6, 79, 79, 65, 21, 11, 1, 71, 23, 28, 53, 14, 8, 65, + 7, 17, 0, 69, 66, 78, 94, 5, 15, 110, 117, 119, 14, 70, 68, 7, 17, 0, + 75, 66, 13, 3, 0, 70, 77, 65, 78, 77, 91, 5, 70, 77, 1, 75, 75, 88, + 8, 65, 67, 71, 3, 4, 22, 0, 0, 0, 67, 92, 97, 3, 1, 67, 52, 7, + 69, 98, 80, 82, 1, 71, 81, 74, 86, 64, 81, 78, 76, 94, 94, 95, 96, 21, + 64, 5, 82, 64, 79, 73, 76, 72, 86, 79, 93, 14, 73, 64, 87, 81, 68, 71, + 1, 7, 2, 0, 13, 10, 74, 64, 1, 65, 2, 74, 12, 0, 12, 19, 7, 17, + 17, 11, 69, 11, 2, 66, 3, 85, 86, 74, 79, 69, 73, 71, 6, 69, 71, 65, + 2, 71, 72, 65, 89, 88, 74, 95, 68, 75, 7, 65, 71, 16, 1, 65, 72, 17, + 72, 81, 1, 92, 23, 29, 25, 24, 29, 23, 10, 25, 24, 66, 8, 12, 4, 1, + 72, 5, 0, 64, 3, 65, 72, 2, 65, 84, 72, 71, 79, 74, 89, 39, 43, 37, + 38, 36, 31, 28, 31, 24, 16, 19, 11, 0, 65, 86, 19, 15, 65, 17, 8, 7, + 2, 65, 69, 70, 75, 87, 71, 82, 95, 88, 100, 65, 32, 22, 15, 5, 5, 64, + 68, 72, 75, 5, 45, 30, 23, 17, 26, 9, 2, 66, 69, 71, 34, 20, 12, 5, + 11, 1, 68, 72, 81, 78, 13, 3, 1, 67, 64, 74, 78, 84, 64, 39, 22, 14, + 7, 14, 1, 65, 68, 73, 62, 82, 73, 65, 66, 69, 1, 7, 8, 66, 7, 12, + 13, 77, 71, 73, 3, 13, 2, 0, 9, 14, 18, 69, 1, 13, 14, 78, 74, 79, + 83, 78, 69, 76, 71, 73, 69, 66, 72, 71, 70, 65, 74, 74, 81, 87, 71, 98, + 76, 65, 70, 7, 69, 65, 14, 67, 68, 66, 10, 73, 72, 90, 32, 34, 36, 29, + 20, 31, 27, 22, 20, 19, 18, 14, 67, 68, 75, 6, 7, 64, 79, 66, 67, 73, + 73, 74, 73, 75, 88, 84, 88, 38, 37, 35, 31, 21, 24, 21, 11, 15, 9, 8, + 66, 68, 73, 79, 18, 20, 67, 18, 19, 9, 3, 4, 1, 65, 64, 69, 76, 79, + 83, 87, 98, 73, 83, 92, 91, 78, 83, 66, 66, 65, 36, 8, 7, 68, 17, 48, + 34, 54, 8, 38, 33, 11, 68, 77, 81, 108, 109, 114, 69, 42, 28, 19, 9, 12, + 2, 65, 68, 76, 82, 75, 64, 69, 6, 7, 0, 1, 7, 10, 11, 6, 15, 14, + 1, 38, 34, 24, 17, 12, 6, 79, 84, 90, 114, 102, 80, 62, 62, 62, 62}, + + { + + 43, 6, 79, 43, 6, 79, 78, 0, 21, 11, 0, 73, 21, 27, 53, 14, 10, 65, + 8, 18, 0, 70, 66, 79, 95, 5, 13, 112, 118, 119, 17, 69, 68, 8, 18, 0, + 75, 65, 13, 3, 0, 70, 76, 65, 79, 77, 91, 5, 70, 76, 1, 76, 75, 88, + 9, 65, 67, 71, 4, 4, 22, 0, 0, 0, 67, 93, 97, 4, 0, 67, 52, 7, + 69, 97, 79, 82, 2, 70, 79, 72, 85, 0, 79, 77, 74, 93, 94, 94, 95, 21, + 64, 6, 81, 64, 79, 72, 74, 72, 86, 78, 92, 14, 73, 0, 86, 80, 67, 71, + 1, 7, 2, 0, 14, 11, 75, 64, 1, 65, 2, 74, 12, 64, 11, 19, 6, 17, + 17, 11, 71, 11, 1, 68, 2, 86, 85, 73, 78, 68, 73, 70, 7, 68, 70, 64, + 3, 71, 72, 65, 90, 88, 73, 95, 68, 75, 8, 65, 72, 17, 1, 65, 73, 18, + 72, 82, 1, 93, 21, 28, 24, 23, 28, 22, 9, 24, 24, 68, 7, 11, 4, 1, + 73, 4, 64, 65, 2, 66, 73, 1, 66, 86, 73, 72, 80, 75, 89, 37, 42, 35, + 36, 34, 29, 26, 29, 21, 14, 17, 8, 65, 67, 88, 17, 13, 67, 15, 6, 5, + 0, 67, 71, 73, 77, 90, 72, 84, 96, 89, 100, 65, 32, 22, 15, 5, 5, 64, + 68, 72, 75, 5, 45, 30, 23, 17, 27, 9, 3, 66, 68, 71, 35, 21, 12, 5, + 11, 1, 67, 72, 80, 77, 14, 4, 1, 67, 0, 73, 77, 83, 64, 39, 22, 14, + 7, 15, 2, 65, 67, 72, 62, 82, 73, 65, 66, 69, 1, 7, 8, 66, 7, 12, + 13, 77, 71, 73, 2, 14, 1, 64, 8, 13, 18, 71, 0, 13, 14, 79, 75, 79, + 83, 77, 68, 75, 71, 72, 69, 65, 73, 71, 70, 65, 75, 74, 81, 86, 70, 98, + 76, 65, 70, 8, 69, 65, 15, 67, 69, 66, 11, 74, 72, 91, 31, 34, 35, 28, + 19, 30, 26, 21, 19, 18, 17, 13, 68, 69, 76, 4, 6, 65, 81, 67, 68, 74, + 74, 75, 73, 75, 88, 84, 88, 35, 34, 33, 29, 18, 22, 19, 8, 13, 7, 6, + 68, 70, 75, 80, 16, 18, 68, 16, 17, 7, 1, 1, 64, 67, 66, 71, 79, 81, + 85, 89, 99, 75, 85, 93, 90, 77, 82, 65, 65, 64, 38, 9, 8, 68, 18, 49, + 36, 55, 9, 36, 31, 8, 71, 80, 84, 111, 111, 115, 69, 42, 28, 19, 9, 12, + 2, 65, 68, 76, 81, 74, 0, 69, 7, 8, 0, 2, 8, 11, 12, 6, 15, 14, + 1, 37, 32, 22, 15, 10, 4, 81, 86, 91, 113, 101, 79, 62, 62, 62, 62}, + + { + + 42, 6, 79, 42, 6, 79, 76, 1, 21, 11, 0, 74, 20, 25, 53, 14, 13, 66, + 9, 18, 0, 70, 65, 80, 96, 4, 11, 114, 119, 120, 20, 67, 68, 9, 18, 0, + 75, 64, 13, 2, 0, 70, 76, 65, 79, 76, 91, 5, 70, 75, 0, 76, 75, 88, + 9, 64, 66, 70, 4, 4, 22, 0, 0, 0, 66, 93, 97, 4, 64, 67, 52, 7, + 69, 96, 78, 82, 2, 68, 78, 70, 83, 1, 78, 75, 72, 92, 93, 93, 95, 22, + 64, 7, 80, 64, 78, 71, 72, 72, 86, 78, 90, 15, 72, 0, 85, 80, 67, 71, + 2, 8, 3, 1, 14, 11, 75, 65, 1, 65, 2, 74, 12, 64, 10, 19, 6, 17, + 17, 10, 73, 11, 1, 69, 1, 87, 84, 72, 77, 67, 72, 69, 8, 67, 70, 0, + 4, 71, 72, 64, 91, 87, 72, 95, 67, 75, 8, 65, 73, 18, 1, 65, 75, 19, + 73, 83, 1, 93, 20, 27, 24, 22, 27, 21, 8, 24, 24, 69, 6, 11, 4, 1, + 73, 4, 64, 66, 2, 67, 74, 1, 67, 88, 74, 73, 81, 76, 89, 35, 41, 33, + 34, 33, 27, 23, 27, 19, 12, 15, 6, 68, 69, 91, 15, 12, 69, 12, 4, 3, + 65, 70, 73, 76, 79, 92, 73, 85, 98, 89, 100, 65, 32, 23, 15, 5, 5, 64, + 68, 72, 74, 6, 46, 30, 23, 17, 28, 10, 4, 66, 67, 70, 36, 22, 12, 6, + 12, 2, 67, 71, 79, 77, 16, 4, 1, 66, 1, 72, 77, 82, 0, 40, 23, 14, + 7, 16, 3, 64, 66, 71, 62, 82, 73, 64, 66, 69, 1, 7, 8, 65, 7, 13, + 14, 77, 71, 73, 1, 14, 0, 65, 7, 13, 18, 72, 64, 12, 14, 80, 76, 80, + 82, 77, 67, 75, 70, 72, 68, 64, 73, 71, 70, 65, 76, 74, 81, 86, 69, 98, + 76, 64, 70, 8, 69, 65, 16, 67, 69, 66, 12, 74, 73, 91, 30, 34, 35, 28, + 18, 29, 25, 20, 19, 17, 16, 12, 69, 70, 77, 3, 4, 67, 83, 68, 69, 75, + 75, 75, 73, 74, 88, 85, 87, 33, 32, 31, 26, 16, 20, 17, 6, 11, 5, 4, + 70, 72, 76, 81, 14, 16, 69, 14, 15, 5, 64, 64, 66, 69, 68, 73, 81, 83, + 87, 91, 100, 77, 87, 95, 89, 75, 81, 65, 64, 0, 40, 10, 9, 68, 19, 51, + 37, 57, 9, 35, 29, 6, 74, 82, 87, 113, 113, 116, 69, 42, 28, 19, 9, 12, + 2, 65, 68, 75, 80, 73, 1, 68, 8, 9, 1, 3, 9, 12, 13, 7, 16, 15, + 1, 35, 31, 21, 14, 8, 2, 83, 88, 92, 112, 100, 79, 62, 62, 62, 62}, + + { + + 41, 6, 79, 41, 6, 79, 74, 3, 22, 11, 64, 76, 18, 24, 53, 14, 15, 66, + 10, 19, 0, 71, 65, 81, 97, 4, 9, 115, 120, 120, 23, 66, 68, 10, 19, 0, + 74, 0, 14, 2, 0, 69, 75, 66, 80, 76, 91, 5, 70, 75, 0, 76, 75, 88, + 10, 64, 66, 70, 5, 4, 22, 0, 0, 0, 66, 93, 97, 5, 65, 67, 52, 7, + 69, 95, 77, 82, 3, 67, 76, 68, 82, 2, 76, 74, 70, 91, 92, 92, 94, 23, + 64, 8, 79, 64, 78, 71, 70, 72, 85, 77, 89, 15, 72, 1, 83, 79, 66, 71, + 2, 8, 3, 2, 15, 12, 76, 65, 1, 65, 2, 74, 12, 65, 10, 19, 5, 17, + 17, 10, 75, 11, 0, 71, 0, 88, 83, 71, 76, 66, 71, 69, 9, 67, 69, 1, + 5, 71, 72, 64, 92, 87, 71, 95, 67, 75, 9, 65, 74, 19, 1, 65, 76, 20, + 73, 84, 1, 94, 18, 26, 23, 21, 26, 20, 8, 23, 23, 70, 5, 11, 4, 1, + 74, 3, 65, 67, 1, 68, 75, 0, 67, 90, 75, 73, 82, 77, 89, 34, 39, 32, + 32, 31, 25, 21, 25, 16, 9, 13, 3, 70, 72, 93, 13, 10, 70, 10, 2, 0, + 68, 72, 76, 78, 81, 95, 74, 87, 99, 90, 100, 65, 33, 23, 15, 5, 5, 64, + 68, 72, 74, 6, 46, 30, 23, 17, 29, 10, 4, 65, 67, 69, 37, 22, 13, 6, + 13, 2, 66, 71, 78, 76, 17, 5, 2, 66, 2, 71, 76, 81, 0, 40, 23, 14, + 7, 17, 3, 0, 65, 70, 62, 81, 72, 64, 66, 69, 1, 7, 8, 65, 7, 13, + 14, 77, 71, 73, 1, 15, 64, 66, 6, 12, 18, 73, 65, 12, 14, 80, 77, 80, + 81, 76, 66, 74, 70, 71, 68, 0, 73, 71, 70, 64, 76, 74, 81, 85, 68, 98, + 76, 64, 70, 9, 69, 65, 17, 67, 69, 66, 13, 75, 73, 92, 29, 33, 34, 27, + 17, 28, 24, 19, 18, 16, 15, 11, 70, 71, 78, 1, 3, 68, 85, 70, 70, 76, + 75, 76, 73, 74, 88, 85, 86, 31, 30, 29, 24, 13, 18, 15, 3, 9, 3, 2, + 73, 74, 78, 82, 12, 15, 71, 12, 13, 3, 66, 66, 68, 71, 70, 75, 84, 85, + 89, 93, 101, 79, 89, 96, 88, 74, 80, 64, 0, 1, 41, 11, 10, 68, 20, 52, + 39, 58, 10, 33, 28, 3, 77, 85, 90, 116, 115, 117, 69, 42, 28, 19, 9, 12, + 2, 65, 68, 75, 79, 72, 2, 67, 9, 9, 1, 4, 9, 12, 13, 7, 16, 15, + 1, 34, 29, 19, 12, 6, 0, 85, 90, 93, 111, 98, 78, 62, 62, 62, 62}, + + { + + 40, 6, 79, 40, 6, 79, 72, 4, 22, 11, 64, 77, 17, 23, 53, 14, 17, 66, + 11, 20, 0, 72, 65, 82, 98, 3, 7, 117, 121, 121, 26, 65, 68, 11, 20, 0, + 74, 1, 14, 1, 0, 69, 74, 66, 80, 76, 91, 5, 70, 74, 0, 76, 75, 88, + 10, 64, 66, 70, 5, 4, 22, 0, 0, 0, 65, 93, 97, 5, 66, 67, 52, 7, + 69, 94, 76, 82, 3, 66, 74, 66, 80, 3, 74, 73, 68, 90, 91, 91, 93, 24, + 64, 9, 78, 64, 77, 70, 68, 72, 85, 77, 88, 15, 72, 1, 82, 79, 65, 71, + 2, 8, 3, 3, 15, 12, 76, 65, 1, 65, 2, 74, 12, 65, 9, 19, 4, 17, + 17, 10, 77, 11, 0, 73, 64, 89, 82, 70, 75, 65, 70, 68, 10, 66, 69, 2, + 6, 71, 72, 64, 93, 86, 70, 95, 67, 75, 10, 65, 75, 20, 1, 65, 77, 21, + 74, 85, 1, 95, 17, 25, 23, 20, 25, 19, 7, 22, 23, 71, 4, 11, 4, 1, + 74, 2, 66, 68, 0, 69, 76, 64, 68, 92, 76, 74, 83, 78, 89, 32, 38, 30, + 30, 29, 23, 19, 23, 14, 7, 11, 1, 73, 74, 96, 11, 8, 72, 8, 0, 65, + 70, 74, 78, 81, 83, 97, 75, 89, 100, 91, 100, 65, 33, 23, 15, 5, 5, 64, + 68, 72, 73, 6, 46, 30, 23, 17, 30, 11, 5, 65, 66, 68, 38, 23, 13, 6, + 14, 3, 66, 70, 77, 75, 18, 6, 2, 66, 3, 70, 75, 80, 1, 40, 23, 14, + 7, 18, 4, 1, 64, 69, 62, 81, 72, 0, 66, 69, 1, 7, 8, 65, 7, 13, + 15, 77, 71, 73, 0, 15, 65, 67, 5, 11, 18, 74, 66, 12, 14, 81, 78, 81, + 80, 75, 65, 74, 69, 71, 67, 1, 73, 71, 70, 64, 77, 74, 81, 85, 67, 98, + 76, 64, 70, 10, 69, 65, 18, 67, 69, 66, 14, 76, 73, 93, 28, 33, 34, 26, + 16, 27, 23, 18, 17, 15, 14, 10, 71, 72, 79, 0, 2, 70, 87, 71, 71, 77, + 76, 76, 73, 74, 88, 86, 85, 29, 28, 27, 22, 10, 16, 13, 1, 7, 1, 0, + 75, 76, 79, 83, 10, 13, 72, 10, 11, 1, 68, 68, 70, 73, 72, 77, 86, 87, + 91, 95, 102, 81, 91, 98, 87, 73, 79, 0, 1, 2, 43, 12, 11, 68, 21, 54, + 40, 60, 11, 32, 26, 1, 80, 88, 93, 119, 117, 118, 69, 42, 28, 19, 9, 12, + 2, 65, 68, 74, 78, 71, 3, 66, 10, 10, 2, 5, 10, 13, 14, 7, 17, 15, + 1, 33, 28, 17, 10, 4, 65, 87, 92, 94, 110, 97, 77, 62, 62, 62, 62}, + + { + + 38, 5, 80, 38, 5, 80, 71, 5, 22, 11, 65, 79, 15, 21, 52, 14, 19, 67, + 11, 20, 64, 73, 65, 84, 100, 2, 5, 119, 122, 122, 28, 64, 69, 11, 20, 64, + 74, 2, 14, 0, 0, 69, 74, 67, 81, 76, 92, 5, 70, 74, 64, 77, 75, 88, + 10, 64, 66, 70, 5, 3, 22, 0, 0, 0, 65, 94, 97, 5, 67, 68, 52, 6, + 69, 93, 76, 82, 3, 65, 73, 65, 79, 4, 73, 72, 67, 89, 91, 90, 93, 24, + 64, 9, 78, 64, 77, 70, 67, 72, 85, 77, 87, 15, 72, 1, 81, 79, 65, 71, + 2, 8, 3, 3, 15, 12, 77, 66, 1, 66, 2, 74, 11, 66, 8, 19, 3, 16, + 17, 9, 79, 10, 64, 75, 65, 90, 82, 70, 75, 65, 70, 68, 11, 66, 69, 2, + 7, 72, 72, 64, 95, 86, 70, 95, 67, 76, 10, 65, 76, 20, 1, 65, 79, 21, + 75, 86, 1, 96, 15, 24, 22, 19, 24, 18, 6, 21, 22, 73, 3, 10, 3, 1, + 75, 1, 67, 69, 64, 70, 77, 65, 69, 94, 78, 75, 85, 80, 89, 30, 36, 28, + 28, 27, 20, 16, 20, 11, 4, 8, 65, 76, 77, 99, 9, 6, 74, 5, 66, 68, + 73, 77, 81, 84, 86, 100, 76, 91, 102, 92, 101, 65, 33, 23, 15, 5, 5, 65, + 68, 72, 73, 6, 46, 30, 23, 17, 31, 11, 5, 65, 66, 68, 38, 23, 13, 6, + 14, 3, 66, 70, 76, 75, 19, 6, 2, 66, 3, 70, 75, 79, 1, 40, 23, 14, + 7, 18, 4, 1, 64, 69, 62, 81, 72, 0, 66, 69, 1, 7, 8, 65, 7, 13, + 15, 77, 71, 74, 64, 15, 66, 68, 4, 10, 17, 76, 68, 11, 13, 82, 80, 82, + 80, 75, 64, 74, 69, 71, 67, 1, 74, 71, 70, 64, 78, 74, 81, 85, 67, 99, + 76, 64, 70, 10, 70, 65, 18, 68, 70, 66, 14, 77, 74, 94, 27, 32, 33, 25, + 14, 26, 22, 17, 16, 14, 13, 9, 72, 74, 81, 65, 0, 72, 89, 73, 73, 78, + 77, 77, 73, 74, 88, 87, 85, 26, 25, 25, 19, 7, 13, 10, 65, 4, 65, 66, + 78, 79, 81, 84, 8, 11, 74, 8, 8, 65, 71, 71, 73, 75, 75, 80, 89, 89, + 94, 97, 104, 83, 93, 100, 86, 72, 78, 0, 1, 2, 44, 12, 11, 68, 21, 55, + 41, 61, 11, 30, 24, 65, 84, 91, 96, 122, 120, 120, 69, 42, 27, 18, 9, 12, + 2, 66, 68, 74, 78, 71, 3, 66, 11, 10, 2, 5, 10, 13, 14, 7, 17, 15, + 0, 31, 26, 15, 8, 2, 67, 90, 94, 95, 110, 96, 77, 62, 62, 62, 62}, + + { + + 37, 5, 80, 37, 5, 80, 69, 7, 23, 12, 65, 80, 14, 20, 52, 14, 22, 67, + 12, 21, 64, 73, 64, 85, 101, 2, 4, 120, 123, 122, 31, 1, 69, 12, 21, 64, + 73, 4, 15, 0, 1, 68, 73, 67, 81, 75, 92, 5, 69, 73, 64, 77, 74, 88, + 11, 0, 65, 69, 6, 3, 22, 0, 0, 0, 64, 94, 97, 6, 67, 68, 52, 6, + 69, 91, 75, 82, 4, 0, 71, 0, 77, 6, 71, 70, 65, 87, 90, 89, 92, 25, + 0, 10, 77, 0, 76, 69, 65, 71, 84, 76, 85, 16, 71, 2, 79, 78, 64, 70, + 3, 9, 4, 4, 16, 13, 77, 66, 2, 66, 2, 73, 11, 66, 8, 19, 3, 16, + 17, 9, 80, 10, 64, 76, 66, 90, 81, 69, 74, 64, 69, 67, 12, 65, 68, 3, + 9, 72, 72, 0, 96, 85, 69, 95, 66, 76, 11, 64, 76, 21, 1, 65, 80, 22, + 75, 87, 1, 96, 14, 24, 22, 19, 24, 18, 6, 21, 22, 74, 3, 10, 3, 1, + 75, 1, 67, 69, 64, 70, 78, 65, 69, 95, 79, 75, 86, 81, 89, 29, 35, 27, + 27, 26, 18, 14, 18, 9, 2, 6, 67, 78, 79, 101, 8, 5, 75, 3, 68, 70, + 75, 79, 83, 86, 88, 102, 76, 92, 103, 92, 101, 64, 34, 24, 16, 6, 5, 65, + 67, 71, 72, 7, 47, 31, 24, 17, 32, 12, 6, 64, 65, 67, 39, 24, 14, 7, + 15, 4, 65, 69, 74, 74, 21, 7, 3, 65, 4, 69, 74, 77, 2, 41, 24, 15, + 8, 19, 5, 2, 0, 68, 62, 80, 71, 1, 66, 68, 1, 8, 9, 64, 8, 14, + 16, 76, 71, 74, 64, 16, 66, 68, 3, 10, 17, 77, 69, 11, 13, 82, 81, 82, + 79, 74, 0, 73, 68, 70, 66, 2, 74, 70, 69, 0, 78, 73, 80, 84, 66, 99, + 76, 0, 70, 11, 70, 65, 19, 68, 70, 65, 15, 77, 74, 94, 27, 32, 33, 25, + 13, 26, 22, 17, 16, 14, 13, 9, 72, 75, 82, 66, 64, 73, 90, 74, 74, 78, + 77, 77, 72, 73, 87, 87, 84, 24, 23, 23, 17, 5, 11, 8, 67, 2, 67, 68, + 80, 81, 82, 84, 7, 10, 75, 7, 6, 67, 73, 73, 75, 77, 77, 82, 91, 90, + 96, 98, 105, 84, 94, 101, 84, 70, 76, 1, 2, 3, 46, 13, 12, 68, 22, 57, + 43, 62, 12, 29, 23, 67, 87, 93, 98, 124, 122, 121, 68, 43, 27, 18, 9, 13, + 2, 66, 68, 73, 77, 70, 4, 65, 13, 11, 3, 6, 11, 14, 15, 8, 18, 16, + 0, 30, 25, 14, 7, 1, 68, 92, 95, 95, 109, 94, 76, 62, 62, 62, 62}, + + { + + 36, 5, 80, 36, 5, 80, 67, 8, 23, 12, 65, 81, 13, 19, 52, 14, 24, 67, + 13, 22, 64, 74, 64, 86, 102, 1, 2, 122, 124, 123, 34, 2, 69, 13, 22, 64, + 73, 5, 15, 64, 1, 68, 72, 67, 81, 75, 92, 5, 69, 72, 64, 77, 74, 88, + 11, 0, 65, 69, 6, 3, 22, 0, 0, 0, 0, 94, 97, 6, 68, 68, 52, 6, + 69, 90, 74, 82, 4, 1, 69, 2, 76, 7, 69, 69, 0, 86, 89, 88, 91, 26, + 0, 11, 76, 0, 76, 68, 0, 71, 84, 76, 84, 16, 71, 2, 78, 78, 0, 70, + 3, 9, 4, 5, 16, 13, 78, 66, 2, 66, 2, 73, 11, 66, 7, 19, 2, 16, + 17, 9, 82, 10, 64, 78, 67, 91, 80, 68, 73, 0, 68, 66, 13, 64, 68, 4, + 10, 72, 72, 0, 97, 85, 68, 95, 66, 76, 12, 64, 77, 22, 1, 65, 81, 23, + 76, 88, 1, 97, 12, 23, 21, 18, 23, 17, 5, 20, 22, 75, 2, 10, 3, 1, + 75, 0, 68, 70, 65, 71, 79, 66, 70, 97, 80, 76, 87, 82, 89, 27, 34, 25, + 25, 24, 16, 12, 16, 6, 0, 4, 70, 81, 81, 104, 6, 3, 77, 1, 70, 72, + 77, 81, 85, 89, 90, 104, 77, 94, 104, 93, 101, 64, 34, 24, 16, 6, 5, 65, + 67, 71, 71, 7, 47, 31, 24, 17, 33, 12, 7, 64, 64, 66, 40, 25, 14, 7, + 16, 4, 65, 68, 73, 73, 22, 8, 3, 65, 5, 68, 73, 76, 2, 41, 24, 15, + 8, 20, 6, 3, 1, 67, 62, 80, 71, 1, 66, 68, 1, 8, 9, 64, 8, 14, + 17, 76, 71, 74, 65, 16, 67, 69, 2, 9, 17, 78, 70, 11, 13, 83, 82, 83, + 78, 73, 1, 73, 68, 70, 65, 3, 74, 70, 69, 0, 79, 73, 80, 84, 65, 99, + 76, 0, 70, 12, 70, 65, 20, 68, 70, 65, 16, 78, 74, 95, 26, 32, 33, 24, + 12, 25, 21, 16, 15, 13, 12, 8, 73, 76, 83, 68, 65, 75, 92, 75, 75, 79, + 78, 77, 72, 73, 87, 88, 83, 22, 21, 21, 15, 2, 9, 6, 70, 0, 69, 70, + 82, 83, 83, 85, 5, 8, 76, 5, 4, 69, 75, 75, 77, 79, 79, 84, 93, 92, + 98, 100, 106, 86, 96, 103, 83, 69, 75, 2, 3, 4, 48, 14, 13, 68, 23, 58, + 44, 62, 13, 28, 21, 70, 90, 96, 101, 126, 124, 122, 68, 43, 27, 18, 9, 13, + 2, 66, 68, 72, 76, 69, 5, 64, 14, 12, 4, 7, 12, 15, 16, 8, 19, 16, + 0, 29, 23, 12, 5, 64, 70, 94, 97, 96, 108, 93, 75, 62, 62, 62, 62}, + + { + + 35, 5, 80, 35, 5, 80, 65, 10, 24, 12, 66, 83, 11, 18, 52, 14, 26, 67, + 14, 23, 64, 75, 64, 87, 103, 1, 0, 123, 125, 123, 37, 3, 69, 14, 23, 64, + 72, 6, 16, 64, 1, 67, 71, 68, 82, 75, 92, 5, 69, 72, 64, 77, 74, 88, + 12, 0, 65, 69, 7, 3, 22, 0, 0, 0, 0, 94, 97, 7, 69, 68, 52, 6, + 69, 89, 73, 82, 5, 2, 67, 4, 74, 8, 67, 68, 2, 85, 88, 87, 90, 27, + 0, 12, 75, 0, 75, 68, 2, 71, 83, 75, 83, 16, 71, 3, 76, 77, 1, 70, + 3, 9, 4, 6, 17, 14, 78, 66, 2, 66, 2, 73, 11, 67, 7, 19, 1, 16, + 17, 9, 84, 10, 65, 80, 68, 92, 79, 67, 72, 1, 67, 66, 14, 64, 67, 5, + 11, 72, 72, 0, 98, 84, 67, 95, 66, 76, 13, 64, 78, 23, 1, 65, 82, 24, + 76, 89, 1, 98, 11, 22, 21, 17, 22, 16, 5, 19, 21, 76, 1, 10, 3, 1, + 76, 64, 69, 71, 66, 72, 80, 67, 70, 99, 81, 76, 88, 83, 89, 26, 32, 24, + 23, 22, 14, 10, 14, 4, 66, 2, 72, 83, 84, 106, 4, 1, 78, 64, 72, 75, + 80, 83, 88, 91, 92, 107, 78, 96, 105, 94, 101, 64, 35, 24, 16, 6, 5, 65, + 67, 71, 71, 7, 47, 31, 24, 17, 34, 13, 7, 0, 64, 65, 41, 25, 15, 7, + 17, 5, 64, 68, 72, 72, 23, 9, 4, 65, 6, 67, 72, 75, 3, 41, 24, 15, + 8, 21, 6, 4, 2, 66, 62, 79, 70, 2, 66, 68, 1, 8, 9, 64, 8, 14, + 17, 76, 71, 74, 65, 17, 68, 70, 1, 8, 17, 79, 71, 11, 13, 83, 83, 83, + 77, 72, 2, 72, 67, 69, 65, 4, 74, 70, 69, 1, 79, 73, 80, 83, 64, 99, + 76, 0, 70, 13, 70, 65, 21, 68, 70, 65, 17, 79, 74, 96, 25, 31, 32, 23, + 11, 24, 20, 15, 14, 12, 11, 7, 74, 77, 84, 69, 66, 76, 94, 77, 76, 80, + 78, 78, 72, 73, 87, 88, 82, 20, 19, 19, 13, 64, 7, 4, 72, 65, 71, 72, + 85, 85, 85, 86, 3, 7, 78, 3, 2, 71, 77, 77, 79, 81, 81, 86, 96, 94, + 100, 102, 107, 88, 98, 104, 82, 68, 74, 3, 4, 5, 49, 15, 14, 68, 24, 60, + 46, 62, 14, 26, 20, 72, 93, 99, 104, 126, 126, 123, 68, 43, 27, 18, 9, 13, + 2, 66, 68, 72, 75, 68, 6, 0, 15, 12, 4, 8, 12, 15, 16, 8, 19, 16, + 0, 28, 22, 10, 3, 66, 72, 96, 99, 97, 107, 91, 74, 62, 62, 62, 62}, + + { + + 33, 5, 80, 33, 5, 80, 64, 11, 24, 12, 66, 84, 10, 16, 52, 14, + 29, 68, 15, 23, 64, 76, 64, 88, 104, 0, 65, 125, 126, 124, 40, 4, + 69, 15, 23, 64, 72, 7, 16, 65, 1, 67, 71, 68, 82, 74, 92, 5, + 69, 71, 65, 78, 74, 88, 12, 1, 65, 68, 7, 3, 22, 0, 0, 0, + 1, 95, 97, 7, 70, 68, 52, 6, 69, 88, 72, 82, 5, 4, 66, 6, + 73, 9, 66, 66, 4, 84, 88, 86, 90, 27, 0, 13, 74, 0, 75, 67, + 4, 71, 83, 75, 82, 16, 70, 3, 75, 77, 1, 70, 4, 9, 4, 6, + 17, 14, 79, 67, 2, 66, 2, 73, 11, 67, 6, 19, 1, 16, 17, 8, + 86, 10, 65, 82, 69, 93, 78, 66, 71, 2, 67, 65, 15, 0, 67, 6, + 12, 72, 72, 1, 99, 84, 66, 95, 66, 76, 13, 64, 79, 24, 1, 65, + 84, 25, 77, 90, 1, 99, 9, 21, 20, 16, 21, 15, 4, 18, 21, 78, + 0, 9, 3, 1, 76, 65, 69, 72, 66, 73, 81, 68, 71, 101, 82, 77, + 89, 84, 89, 24, 31, 22, 21, 20, 12, 7, 12, 1, 68, 0, 75, 86, + 86, 109, 2, 0, 80, 67, 74, 77, 82, 86, 90, 94, 94, 109, 79, 97, + 107, 95, 101, 64, 35, 25, 16, 6, 5, 65, 67, 71, 70, 8, 48, 31, + 24, 17, 35, 13, 8, 0, 0, 65, 42, 26, 15, 7, 17, 5, 64, 67, + 71, 72, 24, 9, 4, 65, 7, 66, 72, 74, 3, 42, 24, 15, 8, 22, + 7, 4, 3, 65, 62, 79, 70, 2, 66, 68, 1, 8, 9, 0, 8, 14, + 18, 76, 71, 74, 66, 17, 69, 71, 0, 7, 17, 81, 72, 10, 13, 84, + 84, 84, 77, 72, 3, 72, 67, 69, 64, 5, 75, 70, 69, 1, 80, 73, + 80, 83, 0, 99, 76, 1, 70, 13, 70, 65, 22, 68, 71, 65, 18, 79, + 75, 97, 24, 31, 32, 22, 10, 23, 19, 14, 13, 11, 10, 6, 75, 78, + 85, 71, 68, 78, 96, 78, 77, 81, 79, 78, 72, 73, 87, 89, 82, 17, + 16, 17, 10, 67, 5, 2, 75, 67, 73, 74, 87, 87, 86, 87, 1, 5, + 79, 1, 0, 73, 79, 80, 81, 83, 83, 88, 98, 96, 102, 104, 108, 90, + 100, 106, 81, 67, 73, 3, 5, 6, 51, 16, 15, 68, 25, 61, 47, 62, + 14, 25, 18, 75, 96, 102, 107, 126, 126, 124, 68, 43, 27, 18, 9, 13, + 2, 66, 68, 71, 74, 67, 7, 0, 16, 13, 5, 9, 13, 16, 17, 9, + 20, 17, 0, 26, 20, 8, 1, 68, 74, 98, 101, 98, 106, 90, 74, 62, + 62, 62, 62}, + + { + + 32, 5, 80, 32, 5, 80, 1, 13, 24, 12, 67, 86, 8, 15, 52, 14, + 31, 68, 16, 24, 64, 76, 0, 89, 105, 0, 67, 126, 126, 124, 43, 6, + 69, 16, 24, 64, 72, 8, 16, 65, 1, 67, 70, 68, 83, 74, 92, 5, + 69, 70, 65, 78, 74, 88, 13, 1, 64, 68, 8, 3, 22, 0, 0, 0, + 1, 95, 97, 8, 71, 68, 52, 6, 69, 87, 71, 82, 6, 5, 64, 8, + 71, 10, 64, 65, 6, 83, 87, 85, 89, 28, 0, 14, 73, 0, 74, 66, + 6, 71, 83, 74, 80, 17, 70, 4, 74, 76, 2, 70, 4, 10, 5, 7, + 18, 15, 79, 67, 2, 66, 2, 73, 11, 68, 5, 19, 0, 16, 17, 8, + 88, 10, 66, 83, 70, 94, 77, 65, 70, 3, 66, 64, 16, 1, 66, 7, + 13, 72, 72, 1, 100, 83, 65, 95, 65, 76, 14, 64, 80, 25, 1, 65, + 85, 26, 77, 91, 1, 99, 8, 20, 20, 15, 20, 14, 3, 18, 21, 79, + 64, 9, 3, 1, 77, 65, 70, 73, 67, 74, 82, 68, 72, 103, 83, 78, + 90, 85, 89, 22, 30, 20, 19, 19, 10, 5, 10, 64, 70, 65, 77, 88, + 88, 111, 0, 65, 82, 69, 76, 79, 84, 88, 92, 97, 96, 112, 80, 99, + 108, 95, 101, 64, 35, 25, 16, 6, 5, 65, 67, 71, 70, 8, 48, 31, + 24, 17, 36, 14, 9, 0, 1, 64, 43, 27, 15, 8, 18, 6, 0, 67, + 70, 71, 26, 10, 4, 64, 8, 65, 71, 73, 4, 42, 25, 15, 8, 23, + 8, 5, 4, 64, 62, 79, 70, 3, 66, 68, 1, 8, 9, 0, 8, 15, + 18, 76, 71, 74, 67, 18, 70, 72, 64, 7, 17, 82, 73, 10, 13, 85, + 85, 84, 76, 71, 4, 71, 66, 68, 64, 6, 75, 70, 69, 1, 81, 73, + 80, 82, 1, 99, 76, 1, 70, 14, 70, 65, 23, 68, 71, 65, 19, 80, + 75, 97, 23, 31, 31, 22, 9, 22, 18, 13, 13, 10, 9, 5, 76, 79, + 86, 72, 69, 79, 98, 79, 78, 82, 80, 79, 72, 72, 87, 89, 81, 15, + 14, 15, 8, 69, 3, 0, 77, 69, 75, 76, 89, 89, 88, 88, 64, 3, + 80, 64, 65, 75, 81, 82, 83, 85, 85, 90, 101, 98, 104, 106, 109, 92, + 102, 107, 80, 65, 72, 4, 6, 7, 53, 17, 16, 68, 26, 62, 49, 62, + 15, 23, 16, 77, 99, 104, 110, 126, 126, 125, 68, 43, 27, 18, 9, 13, + 2, 66, 68, 71, 73, 66, 8, 1, 17, 14, 5, 10, 14, 17, 18, 9, + 20, 17, 0, 25, 19, 7, 0, 70, 76, 100, 103, 99, 105, 89, 73, 62, + 62, 62, 62}, + + { + + 31, 5, 81, 31, 5, 81, 3, 14, 25, 12, 67, 87, 7, 14, 52, 14, + 33, 68, 16, 25, 64, 77, 0, 90, 106, 64, 68, 126, 126, 125, 46, 7, + 69, 16, 25, 64, 71, 9, 17, 66, 2, 66, 69, 69, 83, 74, 92, 5, + 68, 70, 65, 78, 73, 88, 13, 1, 64, 68, 8, 3, 22, 0, 0, 0, + 2, 95, 97, 8, 71, 69, 52, 6, 69, 86, 71, 82, 6, 6, 1, 10, + 70, 11, 1, 64, 8, 82, 86, 84, 88, 29, 0, 15, 73, 1, 74, 66, + 8, 71, 82, 74, 79, 17, 70, 4, 72, 76, 3, 69, 4, 10, 5, 8, + 18, 15, 80, 67, 2, 66, 2, 73, 11, 68, 5, 19, 64, 16, 17, 8, + 90, 10, 66, 85, 71, 95, 77, 64, 70, 3, 65, 64, 17, 1, 66, 7, + 15, 72, 72, 1, 101, 83, 64, 95, 65, 76, 15, 0, 81, 25, 1, 65, + 86, 27, 78, 92, 1, 100, 6, 19, 19, 15, 19, 14, 3, 17, 20, 80, + 65, 9, 3, 1, 77, 66, 71, 74, 68, 74, 83, 69, 72, 105, 84, 78, + 91, 86, 89, 21, 28, 19, 17, 17, 8, 3, 8, 67, 73, 67, 80, 91, + 91, 114, 65, 67, 83, 71, 79, 82, 87, 90, 95, 99, 99, 114, 81, 101, + 109, 96, 101, 0, 36, 25, 17, 6, 5, 65, 67, 71, 69, 8, 48, 32, + 24, 17, 37, 14, 9, 1, 1, 0, 44, 27, 16, 8, 19, 6, 0, 66, + 69, 70, 27, 11, 5, 64, 8, 65, 70, 71, 4, 42, 25, 15, 9, 23, + 8, 6, 4, 0, 62, 78, 69, 3, 66, 68, 1, 8, 9, 0, 9, 15, + 19, 76, 71, 75, 67, 18, 71, 73, 65, 6, 17, 83, 74, 10, 13, 85, + 86, 85, 75, 70, 5, 71, 66, 68, 0, 6, 75, 69, 68, 2, 81, 73, + 80, 82, 2, 99, 76, 1, 70, 15, 70, 65, 24, 69, 71, 65, 19, 81, + 75, 98, 22, 30, 31, 21, 8, 22, 18, 12, 12, 10, 8, 4, 77, 80, + 87, 74, 70, 81, 100, 81, 80, 83, 80, 79, 72, 72, 86, 90, 80, 13, + 12, 13, 6, 72, 1, 65, 80, 71, 78, 78, 92, 91, 89, 88, 65, 2, + 82, 66, 68, 77, 83, 84, 85, 87, 88, 92, 103, 100, 106, 107, 111, 94, + 104, 109, 79, 64, 71, 5, 7, 8, 54, 18, 17, 68, 27, 62, 50, 62, + 16, 22, 15, 80, 102, 107, 112, 126, 126, 126, 67, 43, 27, 18, 9, 13, + 2, 66, 68, 70, 72, 66, 9, 2, 18, 14, 6, 11, 14, 17, 18, 9, + 21, 17, 0, 24, 17, 5, 65, 71, 77, 103, 104, 100, 104, 87, 72, 62, + 62, 62, 62}, + + { + + 30, 5, 81, 30, 5, 81, 5, 16, 25, 12, 68, 89, 5, 12, 52, 14, + 36, 69, 17, 25, 64, 78, 0, 91, 107, 64, 70, 126, 126, 125, 49, 8, + 69, 17, 25, 64, 71, 10, 17, 66, 2, 66, 69, 69, 84, 73, 92, 5, + 68, 69, 66, 78, 73, 88, 14, 2, 64, 67, 9, 3, 22, 0, 0, 0, + 2, 95, 97, 9, 72, 69, 52, 6, 69, 85, 70, 82, 7, 8, 2, 12, + 68, 12, 2, 1, 10, 81, 85, 83, 88, 30, 0, 16, 72, 1, 73, 65, + 10, 71, 82, 73, 78, 17, 69, 5, 71, 75, 3, 69, 5, 10, 5, 9, + 19, 16, 80, 68, 2, 66, 2, 73, 11, 69, 4, 19, 64, 16, 17, 7, + 92, 10, 67, 87, 72, 96, 76, 0, 69, 4, 64, 0, 18, 2, 65, 8, + 16, 72, 72, 2, 102, 82, 0, 95, 65, 76, 15, 0, 82, 26, 1, 65, + 88, 28, 78, 93, 1, 101, 5, 18, 19, 14, 18, 13, 2, 16, 20, 81, + 66, 9, 3, 1, 78, 67, 71, 75, 68, 75, 84, 70, 73, 107, 85, 79, + 92, 87, 89, 19, 27, 17, 15, 15, 6, 0, 6, 69, 75, 69, 82, 93, + 93, 116, 67, 68, 85, 74, 81, 84, 89, 93, 97, 102, 101, 117, 82, 102, + 111, 97, 101, 0, 36, 26, 17, 6, 5, 65, 67, 71, 69, 9, 49, 32, + 24, 17, 38, 15, 10, 1, 2, 1, 45, 28, 16, 8, 20, 7, 1, 66, + 68, 70, 28, 11, 5, 64, 9, 64, 70, 70, 5, 43, 25, 15, 9, 24, + 9, 7, 5, 1, 62, 78, 69, 4, 66, 68, 1, 8, 9, 1, 9, 15, + 19, 76, 71, 75, 68, 19, 72, 74, 66, 5, 17, 84, 75, 9, 13, 86, + 87, 85, 74, 70, 6, 70, 65, 67, 0, 7, 75, 69, 68, 2, 82, 73, + 80, 81, 3, 99, 76, 2, 70, 15, 70, 65, 25, 69, 71, 65, 20, 81, + 76, 99, 21, 30, 30, 20, 7, 21, 17, 11, 11, 9, 7, 3, 78, 81, + 88, 75, 72, 82, 102, 82, 81, 84, 81, 80, 72, 72, 86, 90, 79, 11, + 10, 11, 3, 75, 64, 67, 82, 73, 80, 80, 94, 93, 91, 89, 67, 0, + 83, 68, 70, 79, 85, 86, 87, 89, 90, 94, 106, 102, 108, 109, 112, 96, + 106, 110, 78, 0, 70, 5, 8, 9, 56, 19, 18, 68, 28, 62, 52, 62, + 16, 20, 13, 82, 105, 110, 115, 126, 126, 126, 67, 43, 27, 18, 9, 13, + 2, 66, 68, 70, 71, 65, 10, 3, 19, 15, 6, 12, 15, 18, 19, 10, + 21, 18, 0, 22, 16, 3, 67, 73, 79, 105, 106, 101, 103, 86, 72, 62, + 62, 62, 62}, + + { + + 28, 4, 81, 28, 4, 81, 6, 17, 25, 12, 68, 90, 4, 11, 52, 14, + 38, 69, 18, 26, 64, 79, 0, 92, 109, 65, 72, 126, 126, 126, 51, 9, + 69, 18, 26, 64, 71, 11, 17, 67, 2, 66, 68, 70, 84, 73, 93, 5, + 68, 69, 66, 79, 73, 88, 14, 2, 64, 67, 9, 3, 22, 0, 0, 0, + 3, 96, 97, 9, 73, 69, 52, 6, 69, 84, 69, 82, 7, 9, 4, 14, + 67, 13, 4, 2, 11, 80, 85, 82, 87, 30, 0, 17, 71, 1, 73, 65, + 11, 71, 82, 73, 77, 17, 69, 5, 70, 75, 4, 69, 5, 10, 5, 9, + 19, 16, 81, 68, 2, 66, 2, 73, 11, 69, 3, 19, 65, 16, 17, 7, + 94, 10, 67, 89, 73, 97, 75, 1, 68, 5, 64, 0, 19, 2, 65, 9, + 17, 72, 72, 2, 104, 82, 1, 95, 65, 77, 16, 0, 83, 27, 1, 65, + 89, 29, 79, 94, 1, 102, 3, 17, 18, 13, 17, 12, 1, 15, 19, 83, + 67, 8, 3, 1, 78, 68, 72, 76, 69, 76, 85, 71, 74, 109, 87, 80, + 93, 88, 89, 17, 25, 15, 13, 13, 4, 65, 4, 72, 78, 71, 85, 96, + 96, 119, 69, 70, 87, 76, 83, 87, 92, 95, 100, 105, 103, 119, 83, 104, + 112, 98, 102, 0, 36, 26, 17, 6, 5, 65, 67, 71, 68, 9, 49, 32, + 24, 17, 39, 15, 10, 1, 2, 1, 46, 28, 16, 8, 20, 7, 1, 65, + 67, 69, 29, 12, 5, 64, 10, 0, 69, 69, 5, 43, 25, 15, 9, 25, + 9, 7, 6, 1, 62, 78, 69, 4, 66, 68, 1, 8, 9, 1, 9, 15, + 20, 76, 71, 75, 69, 19, 73, 75, 67, 4, 17, 86, 77, 9, 13, 87, + 88, 86, 74, 69, 7, 70, 65, 67, 1, 8, 76, 69, 68, 2, 83, 73, + 80, 81, 3, 100, 76, 2, 70, 16, 70, 65, 25, 69, 72, 65, 21, 82, + 76, 100, 20, 29, 30, 19, 5, 20, 16, 10, 10, 8, 6, 2, 79, 82, + 89, 77, 73, 84, 104, 84, 82, 85, 82, 80, 72, 72, 86, 91, 79, 8, + 7, 9, 1, 78, 67, 70, 85, 75, 82, 82, 97, 95, 92, 90, 69, 65, + 85, 70, 72, 82, 88, 89, 90, 91, 92, 97, 108, 104, 111, 111, 113, 98, + 108, 112, 77, 1, 69, 6, 9, 9, 57, 20, 18, 68, 28, 62, 53, 62, + 17, 19, 11, 85, 108, 113, 118, 126, 126, 126, 67, 43, 27, 18, 9, 13, + 2, 66, 68, 69, 71, 64, 11, 3, 20, 15, 7, 12, 15, 18, 19, 10, + 22, 18, 64, 21, 14, 1, 69, 75, 81, 107, 108, 102, 103, 85, 71, 62, + 62, 62, 62}, + + { + + 27, 4, 81, 27, 4, 81, 8, 18, 26, 12, 68, 91, 3, 10, 52, 14, + 40, 69, 19, 27, 64, 79, 1, 93, 110, 66, 74, 126, 126, 126, 54, 11, + 69, 19, 27, 64, 70, 12, 18, 68, 2, 65, 67, 70, 84, 73, 93, 5, + 68, 68, 66, 79, 73, 88, 14, 2, 0, 67, 9, 3, 22, 0, 0, 0, + 4, 96, 97, 9, 74, 69, 52, 6, 69, 83, 68, 82, 7, 10, 6, 16, + 65, 15, 6, 3, 13, 79, 84, 81, 86, 31, 1, 18, 70, 1, 72, 64, + 13, 71, 81, 73, 75, 18, 69, 5, 68, 75, 5, 69, 5, 11, 6, 10, + 19, 16, 81, 68, 3, 66, 2, 72, 11, 69, 3, 19, 66, 16, 17, 7, + 96, 10, 67, 90, 74, 97, 74, 2, 67, 6, 0, 1, 20, 3, 65, 10, + 18, 72, 72, 2, 105, 81, 2, 95, 64, 77, 17, 0, 84, 28, 1, 65, + 90, 30, 80, 95, 1, 102, 2, 16, 18, 12, 16, 11, 1, 15, 19, 84, + 68, 8, 3, 1, 78, 68, 73, 77, 70, 77, 86, 71, 74, 110, 88, 80, + 94, 89, 89, 16, 24, 14, 12, 12, 2, 67, 2, 74, 80, 73, 87, 99, + 98, 122, 70, 72, 88, 78, 85, 89, 94, 97, 102, 107, 105, 121, 83, 106, + 113, 98, 102, 0, 37, 26, 17, 7, 5, 65, 66, 71, 67, 9, 49, 32, + 25, 17, 40, 16, 11, 2, 3, 2, 47, 29, 17, 9, 21, 8, 1, 64, + 66, 68, 31, 13, 6, 0, 11, 1, 68, 68, 6, 43, 26, 16, 9, 26, + 10, 8, 7, 2, 62, 77, 68, 5, 66, 68, 1, 9, 10, 1, 9, 16, + 21, 76, 71, 75, 69, 19, 74, 75, 68, 4, 17, 87, 78, 9, 13, 87, + 89, 87, 73, 68, 8, 70, 64, 67, 2, 9, 76, 69, 68, 3, 83, 73, + 80, 81, 4, 100, 76, 2, 70, 17, 70, 65, 26, 69, 72, 65, 22, 83, + 76, 100, 19, 29, 30, 19, 4, 19, 15, 9, 10, 7, 5, 2, 79, 83, + 90, 78, 74, 86, 106, 85, 83, 85, 82, 80, 71, 71, 86, 92, 78, 6, + 5, 7, 64, 80, 69, 72, 87, 77, 84, 84, 99, 97, 93, 91, 71, 66, + 86, 72, 74, 84, 90, 91, 92, 93, 94, 99, 110, 105, 113, 113, 114, 100, + 110, 114, 76, 3, 67, 7, 10, 10, 59, 21, 19, 68, 29, 62, 54, 62, + 18, 18, 10, 87, 111, 115, 121, 126, 126, 126, 67, 43, 27, 18, 9, 14, + 2, 66, 68, 68, 70, 0, 12, 4, 22, 16, 8, 13, 16, 19, 20, 10, + 23, 18, 64, 20, 13, 0, 70, 77, 83, 109, 110, 102, 102, 83, 70, 62, + 62, 62, 62}, + + { + + 26, 4, 81, 26, 4, 81, 10, 20, 26, 12, 69, 93, 1, 8, 52, 14, + 43, 70, 20, 27, 64, 80, 1, 94, 111, 66, 76, 126, 126, 126, 57, 12, + 69, 20, 27, 64, 70, 13, 18, 68, 2, 65, 67, 70, 85, 72, 93, 5, + 68, 67, 67, 79, 73, 88, 15, 3, 0, 66, 10, 3, 22, 0, 0, 0, + 4, 96, 97, 10, 75, 69, 52, 6, 69, 82, 67, 82, 8, 12, 7, 18, + 64, 16, 7, 5, 15, 78, 83, 80, 86, 32, 1, 19, 69, 1, 72, 0, + 15, 71, 81, 72, 74, 18, 68, 6, 67, 74, 5, 69, 6, 11, 6, 11, + 20, 17, 82, 69, 3, 66, 2, 72, 11, 70, 2, 19, 66, 16, 17, 6, + 98, 10, 68, 92, 75, 98, 73, 3, 66, 7, 1, 2, 21, 4, 64, 11, + 19, 72, 72, 3, 106, 81, 3, 95, 64, 77, 17, 0, 85, 29, 1, 65, + 92, 31, 80, 96, 1, 103, 0, 15, 17, 11, 15, 10, 0, 14, 19, 85, + 69, 8, 3, 1, 79, 69, 73, 78, 70, 78, 87, 72, 75, 112, 89, 81, + 95, 90, 89, 14, 23, 12, 10, 10, 0, 70, 0, 77, 82, 75, 90, 101, + 100, 124, 72, 73, 90, 81, 87, 91, 96, 100, 104, 110, 107, 124, 84, 107, + 115, 99, 102, 0, 37, 27, 17, 7, 5, 65, 66, 71, 67, 10, 50, 32, + 25, 17, 41, 16, 12, 2, 4, 3, 48, 30, 17, 9, 22, 8, 2, 64, + 65, 68, 32, 13, 6, 0, 12, 2, 68, 67, 6, 44, 26, 16, 9, 27, + 11, 9, 8, 3, 62, 77, 68, 5, 66, 68, 1, 9, 10, 2, 9, 16, + 21, 76, 71, 75, 70, 20, 75, 76, 69, 3, 17, 88, 79, 8, 13, 88, + 90, 87, 72, 68, 9, 69, 64, 66, 2, 10, 76, 69, 68, 3, 84, 73, + 80, 80, 5, 100, 76, 3, 70, 17, 70, 65, 27, 69, 72, 65, 23, 83, + 77, 101, 18, 29, 29, 18, 3, 18, 14, 8, 9, 6, 4, 1, 80, 84, + 91, 80, 76, 87, 108, 86, 84, 86, 83, 81, 71, 71, 86, 92, 77, 4, + 3, 5, 67, 83, 71, 74, 90, 79, 86, 86, 101, 99, 95, 92, 73, 68, + 87, 74, 76, 86, 92, 93, 94, 95, 96, 101, 113, 107, 115, 115, 115, 102, + 112, 115, 75, 4, 66, 7, 11, 11, 61, 22, 20, 68, 30, 62, 56, 62, + 18, 16, 8, 90, 114, 118, 124, 126, 126, 126, 67, 43, 27, 18, 9, 14, + 2, 66, 68, 68, 69, 1, 13, 5, 23, 17, 8, 14, 17, 20, 21, 11, + 23, 19, 64, 18, 11, 65, 72, 79, 85, 111, 112, 103, 101, 82, 70, 62, + 62, 62, 62}, + + { + + 25, 4, 82, 25, 4, 82, 12, 21, 27, 12, 69, 94, 0, 7, 52, 14, + 45, 70, 20, 28, 64, 81, 1, 95, 112, 67, 77, 126, 126, 126, 60, 13, + 69, 20, 28, 64, 69, 14, 19, 69, 3, 64, 66, 71, 85, 72, 93, 5, + 67, 67, 67, 79, 72, 88, 15, 3, 0, 66, 10, 3, 22, 0, 0, 0, + 5, 96, 97, 10, 75, 70, 52, 6, 69, 81, 67, 82, 8, 13, 9, 20, + 1, 17, 9, 6, 17, 77, 82, 79, 85, 33, 1, 20, 69, 2, 71, 0, + 17, 71, 80, 72, 73, 18, 68, 6, 65, 74, 6, 68, 6, 11, 6, 12, + 20, 17, 82, 69, 3, 66, 2, 72, 11, 70, 2, 19, 67, 16, 17, 6, + 100, 10, 68, 94, 76, 99, 73, 4, 66, 7, 2, 2, 22, 4, 64, 11, + 21, 72, 72, 3, 107, 80, 4, 95, 64, 77, 18, 1, 86, 29, 1, 65, + 93, 32, 81, 97, 1, 104, 64, 14, 17, 11, 14, 10, 0, 13, 18, 86, + 70, 8, 3, 1, 79, 70, 74, 79, 71, 78, 88, 73, 75, 114, 90, 81, + 96, 91, 89, 13, 21, 11, 8, 8, 65, 72, 65, 79, 85, 77, 92, 104, + 103, 126, 74, 75, 91, 83, 90, 94, 99, 102, 107, 112, 110, 126, 85, 109, + 116, 100, 102, 1, 38, 27, 18, 7, 5, 65, 66, 71, 66, 10, 50, 33, + 25, 17, 42, 17, 12, 3, 4, 4, 49, 30, 18, 9, 23, 9, 2, 0, + 64, 67, 33, 14, 7, 0, 12, 2, 67, 65, 7, 44, 26, 16, 10, 27, + 11, 10, 8, 4, 62, 76, 67, 6, 66, 68, 1, 9, 10, 2, 10, 16, + 22, 76, 71, 76, 70, 20, 76, 77, 70, 2, 17, 89, 80, 8, 13, 88, + 91, 88, 71, 67, 10, 69, 0, 66, 3, 10, 76, 68, 67, 4, 84, 73, + 80, 80, 6, 100, 76, 3, 70, 18, 70, 65, 28, 70, 72, 65, 23, 84, + 77, 102, 17, 28, 29, 17, 2, 18, 14, 7, 8, 6, 3, 0, 81, 85, + 92, 81, 77, 89, 110, 88, 86, 87, 83, 81, 71, 71, 85, 93, 76, 2, + 1, 3, 69, 86, 73, 76, 92, 81, 89, 88, 104, 101, 96, 92, 74, 69, + 89, 76, 79, 88, 94, 95, 96, 97, 99, 103, 115, 109, 117, 116, 117, 104, + 114, 117, 74, 5, 65, 8, 12, 12, 62, 23, 21, 68, 31, 62, 57, 62, + 19, 15, 7, 92, 117, 121, 126, 126, 126, 126, 66, 43, 27, 18, 9, 14, + 2, 66, 68, 67, 68, 1, 14, 6, 24, 17, 9, 15, 17, 20, 21, 11, + 24, 19, 64, 17, 10, 67, 74, 80, 86, 114, 113, 104, 100, 80, 69, 62, + 62, 62, 62}, + + { + + 23, 4, 82, 23, 4, 82, 13, 23, 27, 12, 70, 96, 65, 6, 52, 14, + 47, 70, 21, 29, 64, 82, 1, 96, 113, 67, 79, 126, 126, 126, 62, 14, + 69, 21, 29, 64, 69, 15, 19, 69, 3, 64, 65, 71, 86, 72, 93, 5, + 67, 66, 67, 80, 72, 88, 16, 3, 0, 66, 11, 3, 22, 0, 0, 0, + 5, 97, 97, 11, 76, 70, 52, 6, 69, 80, 66, 82, 9, 14, 11, 22, + 2, 18, 11, 7, 19, 76, 82, 78, 84, 33, 1, 21, 68, 2, 71, 1, + 19, 71, 80, 71, 72, 18, 68, 7, 64, 73, 7, 68, 6, 11, 6, 12, + 21, 18, 83, 69, 3, 66, 2, 72, 11, 71, 1, 19, 68, 16, 17, 6, + 102, 10, 69, 96, 77, 100, 72, 5, 65, 8, 2, 3, 23, 5, 0, 12, + 22, 72, 72, 3, 108, 80, 5, 95, 64, 77, 19, 1, 87, 30, 1, 65, + 94, 33, 81, 98, 1, 105, 66, 13, 16, 10, 13, 9, 64, 12, 18, 88, + 71, 7, 3, 1, 80, 71, 75, 80, 72, 79, 89, 74, 76, 116, 91, 82, + 97, 92, 89, 11, 20, 9, 6, 6, 67, 74, 67, 82, 87, 79, 95, 106, + 105, 126, 76, 77, 93, 85, 92, 96, 101, 104, 109, 115, 112, 126, 86, 111, + 117, 101, 102, 1, 38, 27, 18, 7, 5, 65, 66, 71, 66, 10, 50, 33, + 25, 17, 43, 17, 13, 3, 5, 4, 50, 31, 18, 9, 23, 9, 3, 0, + 0, 66, 34, 15, 7, 0, 13, 3, 66, 64, 7, 44, 26, 16, 10, 28, + 12, 10, 9, 5, 62, 76, 67, 6, 66, 68, 1, 9, 10, 2, 10, 16, + 22, 76, 71, 76, 71, 21, 77, 78, 71, 1, 17, 91, 81, 8, 13, 89, + 92, 88, 71, 66, 11, 68, 0, 65, 3, 11, 77, 68, 67, 4, 85, 73, + 80, 79, 7, 100, 76, 3, 70, 19, 70, 65, 29, 70, 73, 65, 24, 85, + 77, 103, 16, 28, 28, 16, 1, 17, 13, 6, 7, 5, 2, 64, 82, 86, + 93, 83, 78, 90, 112, 89, 87, 88, 84, 82, 71, 71, 85, 93, 76, 64, + 65, 1, 71, 89, 75, 78, 95, 83, 91, 90, 106, 103, 98, 93, 76, 71, + 90, 78, 81, 90, 96, 98, 98, 99, 101, 105, 118, 111, 119, 118, 118, 106, + 116, 118, 73, 6, 64, 9, 13, 13, 62, 24, 22, 68, 32, 62, 59, 62, + 20, 13, 5, 95, 120, 124, 126, 126, 126, 126, 66, 43, 27, 18, 9, 14, + 2, 66, 68, 67, 67, 2, 15, 6, 25, 18, 9, 16, 18, 21, 22, 11, + 24, 19, 64, 16, 8, 69, 76, 82, 88, 116, 115, 105, 99, 79, 68, 62, + 62, 62, 62}, + + { + + 22, 4, 82, 22, 4, 82, 15, 24, 27, 12, 70, 97, 66, 4, 52, 14, + 50, 71, 22, 29, 64, 82, 2, 97, 114, 68, 81, 126, 126, 126, 62, 16, + 69, 22, 29, 64, 69, 16, 19, 70, 3, 64, 65, 71, 86, 71, 93, 5, + 67, 65, 68, 80, 72, 88, 16, 4, 1, 65, 11, 3, 22, 0, 0, 0, + 6, 97, 97, 11, 77, 70, 52, 6, 69, 79, 65, 82, 9, 16, 12, 24, + 4, 19, 12, 9, 21, 75, 81, 77, 84, 34, 1, 22, 67, 2, 70, 2, + 21, 71, 80, 71, 70, 19, 67, 7, 0, 73, 7, 68, 7, 12, 7, 13, + 21, 18, 83, 70, 3, 66, 2, 72, 11, 71, 0, 19, 68, 16, 17, 5, + 104, 10, 69, 97, 78, 101, 71, 6, 64, 9, 3, 4, 24, 6, 0, 13, + 23, 72, 72, 4, 109, 79, 6, 95, 0, 77, 19, 1, 88, 31, 1, 65, + 96, 34, 82, 99, 1, 105, 67, 12, 16, 9, 12, 8, 65, 12, 18, 89, + 72, 7, 3, 1, 80, 71, 75, 81, 72, 80, 90, 74, 77, 118, 92, 83, + 98, 93, 89, 9, 19, 7, 4, 5, 69, 77, 69, 84, 89, 81, 97, 109, + 107, 126, 78, 78, 95, 88, 94, 98, 103, 107, 111, 118, 114, 126, 87, 112, + 119, 101, 102, 1, 38, 28, 18, 7, 5, 65, 66, 71, 65, 11, 51, 33, + 25, 17, 44, 18, 14, 3, 6, 5, 51, 32, 18, 10, 24, 10, 3, 1, + 1, 66, 36, 15, 7, 1, 14, 4, 66, 0, 8, 45, 27, 16, 10, 29, + 13, 11, 10, 6, 62, 76, 67, 7, 66, 68, 1, 9, 10, 3, 10, 17, + 23, 76, 71, 76, 72, 21, 78, 79, 72, 1, 17, 92, 82, 7, 13, 90, + 93, 89, 70, 66, 12, 68, 1, 65, 4, 12, 77, 68, 67, 4, 86, 73, + 80, 79, 8, 100, 76, 4, 70, 19, 70, 65, 30, 70, 73, 65, 25, 85, + 78, 103, 15, 28, 28, 16, 0, 16, 12, 5, 7, 4, 1, 65, 83, 87, + 94, 84, 80, 92, 114, 90, 88, 89, 85, 82, 71, 70, 85, 94, 75, 66, + 67, 64, 74, 91, 77, 80, 97, 85, 93, 92, 108, 105, 99, 94, 78, 73, + 91, 80, 83, 92, 98, 100, 100, 101, 103, 107, 120, 113, 121, 120, 119, 108, + 118, 120, 72, 8, 0, 9, 14, 14, 62, 25, 23, 68, 33, 62, 60, 62, + 20, 12, 3, 97, 123, 126, 126, 126, 126, 126, 66, 43, 27, 18, 9, 14, + 2, 66, 68, 66, 66, 3, 16, 7, 26, 19, 10, 17, 19, 22, 23, 12, + 25, 20, 64, 14, 7, 70, 77, 84, 90, 118, 117, 106, 98, 78, 68, 62, + 62, 62, 62}, + + { + + 21, 4, 82, 21, 4, 82, 17, 26, 28, 12, 71, 99, 68, 3, 52, 14, + 52, 71, 23, 30, 64, 83, 2, 98, 115, 68, 83, 126, 126, 126, 62, 17, + 69, 23, 30, 64, 68, 17, 20, 70, 3, 0, 64, 72, 87, 71, 93, 5, + 67, 65, 68, 80, 72, 88, 17, 4, 1, 65, 12, 3, 22, 0, 0, 0, + 6, 97, 97, 12, 78, 70, 52, 6, 69, 78, 64, 82, 10, 17, 14, 26, + 5, 20, 14, 10, 23, 74, 80, 76, 83, 35, 1, 23, 66, 2, 70, 2, + 23, 71, 79, 70, 69, 19, 67, 8, 2, 72, 8, 68, 7, 12, 7, 14, + 22, 19, 84, 70, 3, 66, 2, 72, 11, 72, 0, 19, 69, 16, 17, 5, + 106, 10, 70, 99, 79, 102, 70, 7, 0, 10, 4, 4, 25, 6, 1, 14, + 24, 72, 72, 4, 110, 79, 7, 95, 0, 77, 20, 1, 89, 32, 1, 65, + 97, 35, 82, 100, 1, 106, 69, 11, 15, 8, 11, 7, 65, 11, 17, 90, + 73, 7, 3, 1, 81, 72, 76, 82, 73, 81, 91, 75, 77, 120, 93, 83, + 99, 94, 89, 8, 17, 6, 2, 3, 71, 79, 71, 87, 92, 83, 100, 111, + 110, 126, 80, 80, 96, 90, 96, 101, 106, 109, 114, 120, 116, 126, 88, 114, + 120, 102, 102, 1, 39, 28, 18, 7, 5, 65, 66, 71, 65, 11, 51, 33, + 25, 17, 45, 18, 14, 4, 6, 6, 52, 32, 19, 10, 25, 10, 4, 1, + 2, 65, 37, 16, 8, 1, 15, 5, 65, 1, 8, 45, 27, 16, 10, 30, + 13, 12, 11, 7, 62, 75, 66, 7, 66, 68, 1, 9, 10, 3, 10, 17, + 23, 76, 71, 76, 72, 22, 79, 80, 73, 0, 17, 93, 83, 7, 13, 90, + 94, 89, 69, 65, 13, 67, 1, 64, 4, 13, 77, 68, 67, 5, 86, 73, + 80, 78, 9, 100, 76, 4, 70, 20, 70, 65, 31, 70, 73, 65, 26, 86, + 78, 104, 14, 27, 27, 15, 64, 15, 11, 4, 6, 3, 0, 66, 84, 88, + 95, 86, 81, 93, 116, 92, 89, 90, 85, 83, 71, 70, 85, 94, 74, 68, + 69, 66, 76, 94, 79, 82, 100, 87, 95, 94, 111, 107, 101, 95, 80, 74, + 93, 82, 85, 94, 100, 102, 102, 103, 105, 109, 123, 115, 123, 122, 120, 110, + 120, 121, 71, 9, 1, 10, 15, 15, 62, 26, 24, 68, 34, 62, 62, 62, + 21, 10, 2, 100, 126, 126, 126, 126, 126, 126, 66, 43, 27, 18, 9, 14, + 2, 66, 68, 66, 65, 4, 17, 8, 27, 19, 10, 18, 19, 22, 23, 12, + 25, 20, 64, 13, 5, 72, 79, 86, 92, 120, 119, 107, 97, 76, 67, 62, + 62, 62, 62}, + + { + + 20, 4, 82, 20, 4, 82, 19, 27, 28, 12, 71, 100, 69, 2, 52, 14, + 54, 71, 24, 31, 64, 84, 2, 99, 116, 69, 85, 126, 126, 126, 62, 18, + 69, 24, 31, 64, 68, 18, 20, 71, 3, 0, 0, 72, 87, 71, 93, 5, + 67, 64, 68, 80, 72, 88, 17, 4, 1, 65, 12, 3, 22, 0, 0, 0, + 7, 97, 97, 12, 79, 70, 52, 6, 69, 77, 0, 82, 10, 18, 16, 28, + 7, 21, 16, 11, 25, 73, 79, 75, 82, 36, 1, 24, 65, 2, 69, 3, + 25, 71, 79, 70, 68, 19, 67, 8, 3, 72, 9, 68, 7, 12, 7, 15, + 22, 19, 84, 70, 3, 66, 2, 72, 11, 72, 64, 19, 70, 16, 17, 5, + 108, 10, 70, 101, 80, 103, 69, 8, 1, 11, 5, 5, 26, 7, 1, 15, + 25, 72, 72, 4, 111, 78, 8, 95, 0, 77, 21, 1, 90, 33, 1, 65, + 98, 36, 83, 101, 1, 107, 70, 10, 15, 7, 10, 6, 66, 10, 17, 91, + 74, 7, 3, 1, 81, 73, 77, 83, 74, 82, 92, 76, 78, 122, 94, 84, + 100, 95, 89, 6, 16, 4, 0, 1, 73, 81, 73, 89, 94, 85, 102, 114, + 112, 126, 82, 82, 98, 92, 98, 103, 108, 111, 116, 123, 118, 126, 89, 116, + 121, 103, 102, 1, 39, 28, 18, 7, 5, 65, 66, 71, 64, 11, 51, 33, + 25, 17, 46, 19, 15, 4, 7, 7, 53, 33, 19, 10, 26, 11, 4, 2, + 3, 64, 38, 17, 8, 1, 16, 6, 64, 2, 9, 45, 27, 16, 10, 31, + 14, 13, 12, 8, 62, 75, 66, 8, 66, 68, 1, 9, 10, 3, 10, 17, + 24, 76, 71, 76, 73, 22, 80, 81, 74, 64, 17, 94, 84, 7, 13, 91, + 95, 90, 68, 64, 14, 67, 2, 64, 5, 14, 77, 68, 67, 5, 87, 73, + 80, 78, 10, 100, 76, 4, 70, 21, 70, 65, 32, 70, 73, 65, 27, 87, + 78, 105, 13, 27, 27, 14, 65, 14, 10, 3, 5, 2, 64, 67, 85, 89, + 96, 87, 82, 95, 118, 93, 90, 91, 86, 83, 71, 70, 85, 95, 73, 70, + 71, 68, 78, 97, 81, 84, 102, 89, 97, 96, 113, 109, 102, 96, 82, 76, + 94, 84, 87, 96, 102, 104, 104, 105, 107, 111, 125, 117, 125, 124, 121, 112, + 122, 123, 70, 10, 2, 11, 16, 16, 62, 27, 25, 68, 35, 62, 62, 62, + 22, 9, 0, 102, 126, 126, 126, 126, 126, 126, 66, 43, 27, 18, 9, 14, + 2, 66, 68, 65, 64, 5, 18, 9, 28, 20, 11, 19, 20, 23, 24, 12, + 26, 20, 64, 12, 4, 74, 81, 88, 94, 122, 121, 108, 96, 75, 66, 62, + 62, 62, 62}, + + { + + 18, 3, 83, 18, 3, 83, 20, 28, 28, 12, 72, 102, 71, 0, 51, 14, + 56, 72, 24, 31, 65, 85, 2, 101, 118, 70, 87, 126, 126, 126, 62, 19, + 70, 24, 31, 65, 68, 19, 20, 72, 3, 0, 0, 73, 88, 71, 94, 5, + 67, 64, 69, 81, 72, 88, 17, 4, 1, 65, 12, 2, 22, 0, 0, 0, + 7, 98, 97, 12, 80, 71, 52, 5, 69, 76, 0, 82, 10, 19, 17, 29, + 8, 22, 17, 12, 26, 72, 79, 74, 82, 36, 1, 24, 65, 2, 69, 3, + 26, 71, 79, 70, 67, 19, 67, 8, 4, 72, 9, 68, 7, 12, 7, 15, + 22, 19, 85, 71, 3, 67, 2, 72, 10, 73, 65, 19, 71, 15, 17, 4, + 110, 9, 71, 103, 81, 104, 69, 8, 1, 11, 5, 5, 27, 7, 1, 15, + 26, 73, 72, 4, 113, 78, 8, 95, 0, 78, 21, 1, 91, 33, 1, 65, + 100, 36, 84, 102, 1, 108, 72, 9, 14, 6, 9, 5, 67, 9, 16, 93, + 75, 6, 2, 1, 82, 74, 78, 84, 75, 83, 93, 77, 79, 124, 96, 85, + 102, 97, 89, 4, 14, 2, 65, 64, 76, 84, 76, 92, 97, 88, 105, 117, + 115, 126, 84, 84, 100, 95, 101, 106, 111, 114, 119, 126, 121, 126, 90, 118, + 123, 104, 103, 1, 39, 28, 18, 7, 5, 66, 66, 71, 64, 11, 51, 33, + 25, 17, 47, 19, 15, 4, 7, 7, 53, 33, 19, 10, 26, 11, 4, 2, + 4, 64, 39, 17, 8, 1, 16, 6, 64, 3, 9, 45, 27, 16, 10, 31, + 14, 13, 12, 8, 62, 75, 66, 8, 66, 68, 1, 9, 10, 3, 10, 17, + 24, 76, 71, 77, 74, 22, 81, 82, 75, 65, 16, 96, 86, 6, 12, 92, + 97, 91, 68, 64, 15, 67, 2, 64, 5, 14, 78, 68, 67, 5, 88, 73, + 80, 78, 10, 101, 76, 4, 70, 21, 71, 65, 32, 71, 74, 65, 27, 88, + 79, 106, 12, 26, 26, 13, 67, 13, 9, 2, 4, 1, 65, 68, 86, 91, + 98, 89, 84, 97, 120, 95, 92, 92, 87, 84, 71, 70, 85, 96, 73, 73, + 74, 70, 81, 100, 84, 87, 105, 92, 100, 99, 116, 112, 104, 97, 84, 78, + 96, 86, 90, 99, 105, 107, 107, 107, 110, 114, 126, 119, 126, 126, 123, 114, + 124, 125, 69, 11, 3, 11, 16, 16, 62, 27, 25, 68, 35, 62, 62, 62, + 22, 7, 65, 105, 126, 126, 126, 126, 126, 126, 66, 43, 26, 17, 9, 14, + 2, 67, 68, 65, 64, 5, 18, 9, 29, 20, 11, 19, 20, 23, 24, 12, + 26, 20, 65, 10, 2, 76, 83, 90, 96, 125, 123, 109, 96, 74, 66, 62, + 62, 62, 62}, + + { + + 17, 3, 83, 17, 3, 83, 22, 30, 29, 13, 72, 103, 72, 64, 51, 14, + 59, 72, 25, 32, 65, 85, 3, 102, 119, 70, 88, 126, 126, 126, 62, 21, + 70, 25, 32, 65, 67, 21, 21, 72, 4, 1, 1, 73, 88, 70, 94, 5, + 66, 0, 69, 81, 71, 88, 18, 5, 2, 64, 13, 2, 22, 0, 0, 0, + 8, 98, 97, 13, 80, 71, 52, 5, 69, 74, 1, 82, 11, 21, 19, 31, + 10, 24, 19, 14, 28, 70, 78, 73, 81, 37, 2, 25, 64, 3, 68, 4, + 28, 70, 78, 69, 65, 20, 66, 9, 6, 71, 10, 67, 8, 13, 8, 16, + 23, 20, 85, 71, 4, 67, 2, 71, 10, 73, 65, 19, 71, 15, 17, 4, + 111, 9, 71, 104, 82, 104, 68, 9, 2, 12, 6, 6, 28, 8, 2, 16, + 28, 73, 72, 5, 114, 77, 9, 95, 1, 78, 22, 2, 91, 34, 1, 65, + 101, 37, 84, 103, 1, 108, 73, 9, 14, 6, 9, 5, 67, 9, 16, 94, + 75, 6, 2, 1, 82, 74, 78, 84, 75, 83, 94, 77, 79, 125, 97, 85, + 103, 98, 89, 3, 13, 1, 66, 65, 78, 86, 78, 94, 99, 90, 107, 119, + 117, 126, 85, 85, 101, 97, 103, 108, 113, 116, 121, 126, 123, 126, 90, 119, + 124, 104, 103, 2, 40, 29, 19, 8, 5, 66, 65, 70, 0, 12, 52, 34, + 26, 17, 48, 20, 16, 5, 8, 8, 54, 34, 20, 11, 27, 12, 5, 3, + 6, 0, 41, 18, 9, 2, 17, 7, 0, 5, 10, 46, 28, 17, 11, 32, + 15, 14, 13, 9, 62, 74, 65, 9, 66, 67, 1, 10, 11, 4, 11, 18, + 25, 75, 71, 77, 74, 23, 81, 82, 76, 65, 16, 97, 87, 6, 12, 92, + 98, 91, 67, 0, 16, 66, 3, 0, 6, 15, 78, 67, 66, 6, 88, 72, + 79, 77, 11, 101, 76, 5, 70, 22, 71, 65, 33, 71, 74, 64, 28, 88, + 79, 106, 12, 26, 26, 13, 68, 13, 9, 2, 4, 1, 65, 68, 86, 92, + 99, 90, 85, 98, 121, 96, 93, 92, 87, 84, 70, 69, 84, 96, 72, 75, + 76, 72, 83, 102, 86, 89, 107, 94, 102, 101, 118, 114, 105, 97, 85, 79, + 97, 87, 92, 101, 107, 109, 109, 109, 112, 116, 126, 120, 126, 126, 124, 115, + 125, 126, 67, 13, 5, 12, 17, 17, 62, 28, 26, 68, 36, 62, 62, 62, + 23, 6, 66, 107, 126, 126, 126, 126, 126, 126, 65, 44, 26, 17, 9, 15, + 2, 67, 68, 64, 0, 6, 19, 10, 31, 21, 12, 20, 21, 24, 25, 13, + 27, 21, 65, 9, 1, 77, 84, 91, 97, 126, 124, 109, 95, 72, 65, 62, + 62, 62, 62}, + + { + + 16, 3, 83, 16, 3, 83, 24, 31, 29, 13, 72, 104, 73, 65, 51, 14, + 61, 72, 26, 33, 65, 86, 3, 103, 120, 71, 90, 126, 126, 126, 62, 22, + 70, 26, 33, 65, 67, 22, 21, 73, 4, 1, 2, 73, 88, 70, 94, 5, + 66, 1, 69, 81, 71, 88, 18, 5, 2, 64, 13, 2, 22, 0, 0, 0, + 9, 98, 97, 13, 81, 71, 52, 5, 69, 73, 2, 82, 11, 22, 21, 33, + 11, 25, 21, 15, 30, 69, 77, 72, 80, 38, 2, 26, 0, 3, 68, 5, + 30, 70, 78, 69, 64, 20, 66, 9, 7, 71, 11, 67, 8, 13, 8, 17, + 23, 20, 86, 71, 4, 67, 2, 71, 10, 73, 66, 19, 72, 15, 17, 4, + 113, 9, 71, 106, 83, 105, 67, 10, 3, 13, 7, 7, 29, 9, 2, 17, + 29, 73, 72, 5, 115, 77, 10, 95, 1, 78, 23, 2, 92, 35, 1, 65, + 102, 38, 85, 104, 1, 109, 75, 8, 13, 5, 8, 4, 68, 8, 16, 95, + 76, 6, 2, 1, 82, 75, 79, 85, 76, 84, 95, 78, 80, 126, 98, 86, + 104, 99, 89, 1, 12, 64, 68, 67, 80, 88, 80, 97, 101, 92, 110, 122, + 119, 126, 87, 87, 103, 99, 105, 110, 115, 118, 123, 126, 125, 126, 91, 121, + 125, 105, 103, 2, 40, 29, 19, 8, 5, 66, 65, 70, 1, 12, 52, 34, + 26, 17, 49, 20, 17, 5, 9, 9, 55, 35, 20, 11, 28, 12, 5, 4, + 7, 1, 42, 19, 9, 2, 18, 8, 1, 6, 10, 46, 28, 17, 11, 33, + 16, 15, 14, 10, 62, 74, 65, 9, 66, 67, 1, 10, 11, 4, 11, 18, + 26, 75, 71, 77, 75, 23, 82, 83, 77, 66, 16, 98, 88, 6, 12, 93, + 99, 92, 66, 1, 17, 66, 3, 0, 7, 16, 78, 67, 66, 6, 89, 72, + 79, 77, 12, 101, 76, 5, 70, 23, 71, 65, 34, 71, 74, 64, 29, 89, + 79, 107, 11, 26, 26, 12, 69, 12, 8, 1, 3, 0, 66, 69, 87, 93, + 100, 92, 86, 100, 123, 97, 94, 93, 88, 84, 70, 69, 84, 97, 71, 77, + 78, 74, 85, 105, 88, 91, 110, 96, 104, 103, 120, 116, 106, 98, 87, 81, + 98, 89, 94, 103, 109, 111, 111, 111, 114, 118, 126, 122, 126, 126, 125, 117, + 126, 126, 66, 14, 6, 13, 18, 18, 62, 29, 27, 68, 37, 62, 62, 62, + 24, 5, 68, 110, 126, 126, 126, 126, 126, 126, 65, 44, 26, 17, 9, 15, + 2, 67, 68, 0, 1, 7, 20, 11, 32, 22, 13, 21, 22, 25, 26, 13, + 28, 21, 65, 8, 64, 79, 86, 93, 99, 126, 126, 110, 94, 71, 64, 62, + 62, 62, 62}, + + { + + 15, 3, 83, 15, 3, 83, 26, 33, 30, 13, 73, 106, 75, 66, 51, 14, + 62, 72, 27, 34, 65, 87, 3, 104, 121, 71, 92, 126, 126, 126, 62, 23, + 70, 27, 34, 65, 66, 23, 22, 73, 4, 2, 3, 74, 89, 70, 94, 5, + 66, 1, 69, 81, 71, 88, 19, 5, 2, 64, 14, 2, 22, 0, 0, 0, + 9, 98, 97, 14, 82, 71, 52, 5, 69, 72, 3, 82, 12, 23, 23, 35, + 13, 26, 23, 16, 32, 68, 76, 71, 79, 39, 2, 27, 1, 3, 67, 5, + 32, 70, 77, 68, 0, 20, 66, 10, 9, 70, 12, 67, 8, 13, 8, 18, + 24, 21, 86, 71, 4, 67, 2, 71, 10, 74, 66, 19, 73, 15, 17, 4, + 115, 9, 72, 108, 84, 106, 66, 11, 4, 14, 8, 7, 30, 9, 3, 18, + 30, 73, 72, 5, 116, 76, 11, 95, 1, 78, 24, 2, 93, 36, 1, 65, + 103, 39, 85, 105, 1, 110, 76, 7, 13, 4, 7, 3, 68, 7, 15, 96, + 77, 6, 2, 1, 83, 76, 80, 86, 77, 85, 96, 79, 80, 126, 99, 86, + 105, 100, 89, 0, 10, 65, 70, 69, 82, 90, 82, 99, 104, 94, 112, 124, + 122, 126, 89, 89, 104, 101, 107, 113, 118, 120, 126, 126, 126, 126, 92, 123, + 126, 106, 103, 2, 41, 29, 19, 8, 5, 66, 65, 70, 1, 12, 52, 34, + 26, 17, 50, 21, 17, 6, 9, 10, 56, 35, 21, 11, 29, 13, 6, 4, + 8, 2, 43, 20, 10, 2, 19, 9, 2, 7, 11, 46, 28, 17, 11, 34, + 16, 16, 15, 11, 62, 73, 64, 10, 66, 67, 1, 10, 11, 4, 11, 18, + 26, 75, 71, 77, 75, 24, 83, 84, 78, 67, 16, 99, 89, 6, 12, 93, + 100, 92, 65, 2, 18, 65, 4, 1, 7, 17, 78, 67, 66, 7, 89, 72, + 79, 76, 13, 101, 76, 5, 70, 24, 71, 65, 35, 71, 74, 64, 30, 90, + 79, 108, 10, 25, 25, 11, 70, 11, 7, 0, 2, 64, 67, 70, 88, 94, + 101, 93, 87, 101, 125, 99, 95, 94, 88, 85, 70, 69, 84, 97, 70, 79, + 80, 76, 87, 108, 90, 93, 112, 98, 106, 105, 123, 118, 108, 99, 89, 82, + 100, 91, 96, 105, 111, 113, 113, 113, 116, 120, 126, 124, 126, 126, 126, 119, + 126, 126, 65, 15, 7, 14, 19, 19, 62, 30, 28, 68, 38, 62, 62, 62, + 25, 3, 69, 112, 126, 126, 126, 126, 126, 126, 65, 44, 26, 17, 9, 15, + 2, 67, 68, 0, 2, 8, 21, 12, 33, 22, 13, 22, 22, 25, 26, 13, + 28, 21, 65, 7, 65, 81, 88, 95, 101, 126, 126, 111, 93, 69, 0, 62, + 62, 62, 62}, + + }, + +}; diff --git a/common/svc/isvc_cabac_tables.h b/common/svc/isvc_cabac_tables.h new file mode 100644 index 0000000..6557b07 --- /dev/null +++ b/common/svc/isvc_cabac_tables.h @@ -0,0 +1,57 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** +****************************************************************************** +* @file isvc_cabac_tables.h +* +* @brief +* This file contains enumerations, macros and extern declarations of H264 +* cabac tables +* +* @author +* Ittiam +* +* @remarks +* none +****************************************************************************** +*/ + +#ifndef _ISVC_CABAC_TABLES_H_ +#define _ISVC_CABAC_TABLES_H_ + +#include "ih264_cabac_tables.h" +/** +****************************************************************************** +* @brief max range of cabac contexts in H264 (0-459) +****************************************************************************** +*/ +#define NUM_SVC_CABAC_CTXTS 467 + +extern const UWORD32 (*gau4_isvc_cabac_table)[4]; + +/*****************************************************************************/ +/* Cabac tables for context initialization depending upon type of Slice, */ +/* cabac init Idc value and Qp. */ +/*****************************************************************************/ +extern const UWORD8 gau1_isvc_cabac_ctxt_init_table[NUM_CAB_INIT_IDC_PLUS_ONE][QP_RANGE] + [NUM_SVC_CABAC_CTXTS]; + +#endif diff --git a/common/svc/isvc_common_tables.c b/common/svc/isvc_common_tables.c new file mode 100644 index 0000000..580a2e4 --- /dev/null +++ b/common/svc/isvc_common_tables.c @@ -0,0 +1,81 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ +/** +******************************************************************************* +* @file +* isvc_common_tables.c +* +* @brief +* Contains common global tables +* +* @author +* Harish M +* +* @par List of Functions: +* +* @remarks +* None +* +******************************************************************************* +*/ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* User include files */ +#include "ih264_typedefs.h" +#include "isvc_defs.h" +#include "isvc_macros.h" +#include "isvc_structs.h" +#include "ih264_common_tables.h" +#include "isvc_common_tables.h" + +/*****************************************************************************/ +/* Extern global definitions */ +/*****************************************************************************/ + +/** + ****************************************************************************** + * @brief while encoding, basing on the input configuration parameters, the + * the level of the bitstream is computed basing on the table below. + * input : table_idx + * output : level_idc or cpb size + * @remarks Table A-1 – level table limits + ****************************************************************************** + */ +const level_tables_t gas_isvc_lvl_tbl[16] = { + {IH264_LEVEL_10, 1485, 99, 396, 64, 175, 64}, + {IH264_LEVEL_1B, 1485, 99, 396, 128, 350, 64}, + {IH264_LEVEL_11, 3000, 396, 900, 192, 500, 128}, + {IH264_LEVEL_12, 6000, 396, 2376, 384, 1000, 128}, + {IH264_LEVEL_13, 11880, 396, 2376, 768, 2000, 128}, + {IH264_LEVEL_20, 11880, 396, 2376, 2000, 2000, 128}, + {IH264_LEVEL_21, 19800, 792, 4752, 4000, 4000, 256}, + {IH264_LEVEL_22, 20250, 1620, 8100, 4000, 4000, 256}, + {IH264_LEVEL_30, 40500, 1620, 8100, 10000, 10000, 256}, + {IH264_LEVEL_31, 108000, 3600, 18000, 14000, 14000, 512}, + {IH264_LEVEL_32, 216000, 5120, 20480, 20000, 20000, 512}, + {IH264_LEVEL_40, 245760, 8192, 32768, 20000, 25000, 512}, + {IH264_LEVEL_41, 245760, 8192, 32768, 50000, 62500, 512}, + {IH264_LEVEL_42, 522240, 8704, 34816, 50000, 62500, 512}, + {IH264_LEVEL_50, 589824, 22080, 110400, 135000, 135000, 512}, + {IH264_LEVEL_51, 983040, 36864, 184320, 240000, 240000, 512}, +}; diff --git a/common/svc/isvc_common_tables.h b/common/svc/isvc_common_tables.h new file mode 100644 index 0000000..ed4c8f4 --- /dev/null +++ b/common/svc/isvc_common_tables.h @@ -0,0 +1,50 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ +/** +******************************************************************************* +* @file +* isvc_common_tables.h +* +* @brief +* Common tables +* +* @author +* Harish +* +* @par List of Functions: +* +* @remarks +* None +* +******************************************************************************* +*/ + +#ifndef _ISVC_COMMON_TABLES_H_ +#define _ISVC_COMMON_TABLES_H_ + +/* Dependencies of ih264_common_tables.h */ +#include "ih264_defs.h" +#include "ih264_structs.h" + +#include "ih264_common_tables.h" + +extern const level_tables_t gas_isvc_lvl_tbl[16]; + +#endif diff --git a/common/svc/isvc_defs.h b/common/svc/isvc_defs.h new file mode 100644 index 0000000..0b55ac4 --- /dev/null +++ b/common/svc/isvc_defs.h @@ -0,0 +1,88 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** +******************************************************************************* +* @file +* isvc_defs.h +* +* @brief +* Contains macro defintions, and other typedefs used for SVC encoding +* +* @author +* ittiam +* +* @remarks +* None +* +******************************************************************************* +*/ + +#ifndef _ISVC_DEFS_H_ +#define _ISVC_DEFS_H_ + +#define MAX_NUM_TEMPORAL_LAYERS 3 + +#define MAX_NUM_SPATIAL_LAYERS 3 + +#define MAX_VUI_EXT_NUM_ENTRIES (MAX_NUM_TEMPORAL_LAYERS * MAX_NUM_SPATIAL_LAYERS) + +#define SVC_INTER_MB (1 << 0) /*!< Intra MBs other than IPCM and I_BL */ + +#define SVC_INTRA_MB (1 << 1) /*!< P or B MBs decoded or inferred*/ + +#define SVC_IPCM_MB (1 << 2) /*!< IPCM_MB decoder or inferred*/ + +#define SVC_IBL_MB (1 << 3) /*!< I_BL MB always inferred */ + +#define SVC_INTRA_INTER_MB \ + (1 << 4) /*!< Intra Inter MB will have an alternate prediction \ + process*/ + +#define MB_WIDTH_SHIFT 4 + +#define MB_HEIGHT_SHIFT 4 + +#define UV 1 + +#define NUM_SP_COMPONENTS 2 + +#define NUM_COMPONENTS 3 + +#define SVC_EXTRACT_MB_MODE(x) ((x) &0x1F) + +#define GET_BIT_TX_SIZE(x, y) ((x) & (1 << (7 - (y)))) + +typedef enum SVC_PROFILES_T +{ + IH264_SCALABLE_BASELINE = 83, + IH264_SCALABLE_HIGH_PROFILE = 86 +} SVC_PROFILES_T; + +typedef enum PRED_MODE_T +{ + L0 = 0, + L1 = 1, + BI = 2, + NUM_PRED_DIRS = 2, + INVALID_PRED_MODE = 4, +} PRED_MODE_T; + +#endif diff --git a/common/svc/isvc_inter_pred_filters.h b/common/svc/isvc_inter_pred_filters.h new file mode 100644 index 0000000..7573560 --- /dev/null +++ b/common/svc/isvc_inter_pred_filters.h @@ -0,0 +1,219 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** + ******************************************************************************* + * @file + * isvc_inter_pred_filters.h + * + * @brief + * Declarations of functions used for inter prediction + * + * @author + * Ittiam + * + * @par List of Functions: + * -ih264_inter_pred_luma_copy + * -ih264_interleave_copy + * -ih264_inter_pred_luma_horz + * -ih264_inter_pred_luma_vert + * -ih264_inter_pred_luma_horz_hpel_vert_hpel + * -ih264_inter_pred_luma_vert_qpel + * -ih264_inter_pred_luma_horz_qpel + * -ih264_inter_pred_luma_horz_qpel_vert_qpel + * -ih264_inter_pred_luma_horz_qpel_vert_hpel + * -ih264_inter_pred_luma_horz_hpel_vert_qpel + * -ih264_inter_pred_luma_bilinear + * -ih264_inter_pred_chroma + * -ih264_inter_pred_luma_copy_a9q + * -ih264_interleave_copy_a9 + * -ih264_inter_pred_luma_horz_a9q + * -ih264_inter_pred_luma_vert_a9q + * -ih264_inter_pred_luma_bilinear_a9q + * -ih264_inter_pred_luma_horz_hpel_vert_hpel_a9q + * -ih264_inter_pred_luma_horz_qpel_a9q + * -ih264_inter_pred_luma_vert_qpel_a9q + * -ih264_inter_pred_luma_horz_qpel_vert_qpel_a9q + * -ih264_inter_pred_luma_horz_qpel_vert_hpel_a9q + * -ih264_inter_pred_luma_horz_hpel_vert_qpel_a9q + * -ih264_inter_pred_chroma_a9q + * -ih264_inter_pred_luma_copy_av8 + * -ih264_interleave_copy_av8 + * -ih264_inter_pred_luma_horz_av8 + * -ih264_inter_pred_luma_vert_av8 + * -ih264_inter_pred_luma_bilinear_av8 + * -ih264_inter_pred_luma_horz_hpel_vert_hpel_av8 + * -ih264_inter_pred_luma_horz_qpel_av8 + * -ih264_inter_pred_luma_vert_qpel_av8 + * -ih264_inter_pred_luma_horz_qpel_vert_qpel_av8 + * -ih264_inter_pred_luma_horz_qpel_vert_hpel_av8 + * -ih264_inter_pred_luma_horz_hpel_vert_qpel_av8 + * -ih264_inter_pred_chroma_av8 + * -ih264_inter_pred_chroma_dx_zero_av8 + * -ih264_inter_pred_chroma_dy_zero_av8 + * -ih264_inter_pred_luma_copy_ssse3 + * -ih264_inter_pred_luma_copy_ssse3 + * -ih264_inter_pred_luma_horz_ssse3 + * -ih264_inter_pred_luma_vert_ssse3 + * -ih264_inter_pred_luma_bilinear_ssse3 + * -ih264_inter_pred_luma_horz_hpel_vert_hpel_ssse3 + * -ih264_inter_pred_luma_horz_qpel_ssse3 + * -ih264_inter_pred_luma_vert_qpel_ssse3 + * -ih264_inter_pred_luma_horz_qpel_vert_qpel_ssse3 + * -ih264_inter_pred_luma_horz_qpel_vert_hpel_ssse3 + * -ih264_inter_pred_luma_horz_hpel_vert_qpel_ssse3 + * -ih264_inter_pred_chroma_ssse3 + * + * @remarks + * None + * + ******************************************************************************* + */ + +#ifndef _ISVC_INTER_PRED_FILTERS_H_ +#define _ISVC_INTER_PRED_FILTERS_H_ + +/*****************************************************************************/ +/* Constant Data variables */ +/*****************************************************************************/ + +extern const WORD32 ih264_g_six_tap[3]; /* coefficients for 6 tap filtering*/ + +/*****************************************************************************/ +/* Extern Function Declarations */ +/*****************************************************************************/ + +typedef void FT_INTER_PRED_LUMA(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, WORD32 dst_strd, + WORD32 ht, WORD32 wd, UWORD8 *pu1_tmp, WORD32 dydx); + +typedef void FT_INTERLEAVE_COPY(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, WORD32 dst_strd, + WORD32 ht, WORD32 wd); + +typedef void FT_INTER_PRED_LUMA_BILINEAR(UWORD8 *pu1_src1, UWORD8 *pu1_src2, UWORD8 *pu1_dst, + WORD32 src_strd1, WORD32 src_strd2, WORD32 dst_strd, + WORD32 height, WORD32 width); + +typedef void FT_INTER_PRED_CHROMA(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, + WORD32 dst_strd, WORD32 dx, WORD32 dy, WORD32 ht, WORD32 wd); + +/* No NEON Declarations */ + +FT_INTER_PRED_LUMA ih264_inter_pred_luma_copy; + +FT_INTERLEAVE_COPY ih264_interleave_copy; + +FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz; + +FT_INTER_PRED_LUMA ih264_inter_pred_luma_vert; + +FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_hpel_vert_hpel; + +FT_INTER_PRED_LUMA ih264_inter_pred_luma_vert_qpel; + +FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_qpel; + +FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_qpel_vert_qpel; + +FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_qpel_vert_hpel; + +FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_hpel_vert_qpel; + +FT_INTER_PRED_LUMA_BILINEAR ih264_inter_pred_luma_bilinear; + +FT_INTER_PRED_CHROMA ih264_inter_pred_chroma; + +/* A9 NEON Declarations */ +FT_INTER_PRED_LUMA ih264_inter_pred_luma_copy_a9q; + +FT_INTERLEAVE_COPY ih264_interleave_copy_a9; + +FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_a9q; + +FT_INTER_PRED_LUMA ih264_inter_pred_luma_vert_a9q; + +FT_INTER_PRED_LUMA_BILINEAR ih264_inter_pred_luma_bilinear_a9q; + +FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_hpel_vert_hpel_a9q; + +FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_qpel_a9q; + +FT_INTER_PRED_LUMA ih264_inter_pred_luma_vert_qpel_a9q; + +FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_qpel_vert_qpel_a9q; + +FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_qpel_vert_hpel_a9q; + +FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_hpel_vert_qpel_a9q; + +FT_INTER_PRED_CHROMA ih264_inter_pred_chroma_a9q; + +/* AV8 NEON Declarations */ +FT_INTER_PRED_LUMA ih264_inter_pred_luma_copy_av8; + +FT_INTERLEAVE_COPY ih264_interleave_copy_av8; + +FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_av8; + +FT_INTER_PRED_LUMA ih264_inter_pred_luma_vert_av8; + +FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_hpel_vert_hpel_av8; + +FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_qpel_av8; + +FT_INTER_PRED_LUMA ih264_inter_pred_luma_vert_qpel_av8; + +FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_qpel_vert_qpel_av8; + +FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_qpel_vert_hpel_av8; + +FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_hpel_vert_qpel_av8; + +FT_INTER_PRED_CHROMA ih264_inter_pred_chroma_av8; + +FT_INTER_PRED_CHROMA ih264_inter_pred_chroma_dx_zero_av8; + +FT_INTER_PRED_CHROMA ih264_inter_pred_chroma_dy_zero_av8; + +/* SSSE3 Intrinsic Declarations */ +FT_INTER_PRED_LUMA ih264_inter_pred_luma_copy_ssse3; + +FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_ssse3; + +FT_INTER_PRED_LUMA ih264_inter_pred_luma_vert_ssse3; + +FT_INTER_PRED_LUMA_BILINEAR ih264_inter_pred_luma_bilinear_ssse3; + +FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_hpel_vert_hpel_ssse3; + +FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_qpel_ssse3; + +FT_INTER_PRED_LUMA ih264_inter_pred_luma_vert_qpel_ssse3; + +FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_qpel_vert_qpel_ssse3; + +FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_qpel_vert_hpel_ssse3; + +FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_hpel_vert_qpel_ssse3; + +FT_INTER_PRED_CHROMA ih264_inter_pred_chroma_ssse3; + +/** Nothing past this point */ + +#endif diff --git a/common/svc/isvc_intra_resample.c b/common/svc/isvc_intra_resample.c new file mode 100644 index 0000000..a643b36 --- /dev/null +++ b/common/svc/isvc_intra_resample.c @@ -0,0 +1,3257 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ +/*! + ************************************************************************** + * \file isvcd_resamp_svc.c + * + * \brief + * Contains routines that resample for SVC resampling + * + * Detailed_description + * + * \date + * + * + * \author + ************************************************************************** + */ +#include +#include + +#include "ih264_typedefs.h" +#include "ih264_macros.h" +#include "isvc_macros.h" +#include "ih264_platform_macros.h" +#include "isvc_intra_resample.h" +#include "ih264_debug.h" +#include "isvc_defs.h" +#include "isvc_structs.h" + +#define NUM_SEGMENTS 16 +#define NUM_INTRA_SAMP_FXNS 32 +#define INTERPOL_FILTER_SIZE_LUMA 64 +#define INTERPOL_FILTER_SIZE_CHROMA 32 + +typedef void(PF_INTRA_SAMP_PADDING)(WORD32 i4_x, WORD32 i4_y, WORD8 i1_xd_index, WORD8 i1_yd_index, + UWORD8 u1_seg_wd, UWORD8 u1_seg_ht, UWORD8 *pu1_refarray_1, + UWORD8 *pu1_refarray_2, WORD32 i4_refarray_stride, + WORD32 i4_mb_adjoin_x, WORD32 i4_mb_adjoin_y, + WORD32 i4_corner_pixel_available); + +static const WORD8 g_ai1_interp_filter_luma[INTERPOL_FILTER_SIZE_LUMA] = { + 0, -1, -2, -3, -3, -4, -4, -3, -3, -3, -2, -1, -1, -1, -1, -1, 32, 32, 31, 30, 28, 26, + 24, 22, 19, 16, 14, 11, 8, 6, 4, 2, 0, 2, 4, 6, 8, 11, 14, 16, 19, 22, 24, 26, + 28, 30, 31, 32, 0, -1, -1, -1, -1, -1, -2, -3, -3, -3, -4, -4, -3, -3, -2, -1}; + +static const UWORD8 g_au1_interp_filter_chroma[INTERPOL_FILTER_SIZE_CHROMA] = { + 32, 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, + 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30}; + +static const UWORD32 gu4_valid_segs_lookup[NUM_SEGMENTS] = { + 0x0F000000, 0xCF000000, 0x3F000000, 0xFF000000, 0x0F000000, 0xCF000000, 0x3F000000, 0xFF000000, + 0x0F000000, 0x8F000000, 0x6F000000, 0xEF000000, 0x1F000000, 0x9F000000, 0x7F000000, 0xFF000000}; + +/*****************************************************************************/ +/* */ +/* Function Name : isvc_copy_data */ +/* */ +/* Description : this module copies the data from source to destination */ +/* the amount of data to be copied is passed as input */ +/* */ +/* Inputs : pu1_src : pointer to the source buffer */ +/* u2_src_stride : source buffer stride */ +/* pu1_dst : pointer to the destination buffer */ +/* u2_dst_stride : destination buffer stride */ +/* u4_num_bytes : number of bytes to be copied */ +/* u4_num_lines : number of lines to be copied */ +/* Globals : none */ +/* Processing : it does a memcpy from source to destination */ +/* */ +/* Outputs : none */ +/* Returns : none */ +/* Issues : both buffers are assumed to be 2-D buffers */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 29 04 2009 vijayakumar creation */ +/* */ +/*****************************************************************************/ +/** \brief performs the 2-D memory transfer */ +static void isvc_copy_data(UWORD8 *pu1_src, WORD32 i4_src_stride, UWORD8 *pu1_dst, + WORD32 i4_dst_stride, WORD32 i4_num_bytes, WORD32 i4_num_lines) +{ + WORD32 i4_vert_lines; + ASSERT(NULL != pu1_src); + ASSERT(NULL != pu1_dst); + + for(i4_vert_lines = 0; i4_vert_lines < i4_num_lines; i4_vert_lines++) + { + memcpy(pu1_dst, pu1_src, i4_num_bytes); + pu1_src += i4_src_stride; + pu1_dst += i4_dst_stride; + } +} + +static void isvc_copy_data_semiplanr(UWORD8 *pu1_src, WORD32 i4_src_stride, UWORD8 *pu1_dst1, + UWORD8 *pu1_dst2, WORD32 i4_dst_stride, WORD32 i4_num_bytes, + WORD32 i4_num_lines) +{ + WORD32 i4_vert_lines, u4_i; + + ASSERT(NULL != pu1_src); + ASSERT(NULL != pu1_dst1); + ASSERT(NULL != pu1_dst2); + + for(i4_vert_lines = 0; i4_vert_lines < i4_num_lines; i4_vert_lines++) + { + for(u4_i = 0; u4_i < i4_num_bytes; u4_i++) + { + *(pu1_dst1 + u4_i) = *(pu1_src + (2 * u4_i)); + *(pu1_dst2 + u4_i) = *(pu1_src + (2 * u4_i) + 1); + } + pu1_src += i4_src_stride; + pu1_dst1 += i4_dst_stride; + pu1_dst2 += i4_dst_stride; + } +} + +static void isvc_left_right_padding(WORD32 i4_x, WORD32 i4_y, WORD8 i1_xd_index, WORD8 i1_yd_index, + UWORD8 u1_seg_wd, UWORD8 u1_seg_ht, UWORD8 *pu1_refarray_1, + UWORD8 *pu1_refarray_2, WORD32 i4_refarray_stride, + WORD32 i4_mb_adjoin_x, WORD32 i4_mb_adjoin_y, + WORD32 i4_corner_pixel_available) +{ + WORD32 i4_idx_i; + UWORD8 *pu1_src, *pu1_dst; + + UNUSED(i1_yd_index); + UNUSED(pu1_refarray_2); + UNUSED(i4_mb_adjoin_x); + UNUSED(i4_mb_adjoin_y); + UNUSED(i4_corner_pixel_available); + + pu1_dst = pu1_refarray_1 + i4_x + (i4_y * i4_refarray_stride); + pu1_src = pu1_dst + i1_xd_index; + + i1_xd_index = MIN(i1_xd_index, MAX_PIX_FILL_LUMA); + u1_seg_wd = MIN(u1_seg_wd, MAX_PIX_FILL_LUMA); + pu1_dst = pu1_src - i1_xd_index; + + for(i4_idx_i = 0; i4_idx_i < u1_seg_ht; i4_idx_i++) + { + memset(pu1_dst, *pu1_src, u1_seg_wd); + pu1_dst += i4_refarray_stride; + pu1_src += i4_refarray_stride; + } +} + +static void isvc_left_right_padding_chroma(WORD32 i4_x, WORD32 i4_y, WORD8 i1_xd_index, + WORD8 i1_yd_index, UWORD8 u1_seg_wd, UWORD8 u1_seg_ht, + UWORD8 *pu1_refarray_1, UWORD8 *pu1_refarray_2, + WORD32 i4_refarray_stride, WORD32 i4_mb_adjoin_x, + WORD32 i4_mb_adjoin_y, WORD32 i4_corner_pixel_available) +{ + WORD32 i4_idx_i; + UWORD8 *pu1_src_cb, *pu1_dst_cb; + UWORD8 *pu1_src_cr, *pu1_dst_cr; + WORD32 i4_tmp; + + UNUSED(i1_yd_index); + UNUSED(i4_mb_adjoin_x); + UNUSED(i4_mb_adjoin_y); + UNUSED(i4_corner_pixel_available); + + i4_tmp = i4_x + (i4_y * i4_refarray_stride); + pu1_dst_cb = pu1_refarray_1 + i4_tmp; + pu1_src_cb = pu1_dst_cb + i1_xd_index; + + pu1_dst_cr = pu1_refarray_2 + i4_tmp; + pu1_src_cr = pu1_dst_cr + i1_xd_index; + + i1_xd_index = MIN(i1_xd_index, MAX_PIX_FILL_CHROMA); + u1_seg_wd = MIN(u1_seg_wd, MAX_PIX_FILL_CHROMA); + pu1_dst_cb = pu1_src_cb - i1_xd_index; + pu1_dst_cr = pu1_src_cr - i1_xd_index; + + for(i4_idx_i = 0; i4_idx_i < u1_seg_ht; i4_idx_i++) + { + memset(pu1_dst_cb, *pu1_src_cb, u1_seg_wd); + pu1_dst_cb += i4_refarray_stride; + pu1_src_cb += i4_refarray_stride; + + memset(pu1_dst_cr, *pu1_src_cr, u1_seg_wd); + pu1_dst_cr += i4_refarray_stride; + pu1_src_cr += i4_refarray_stride; + } +} + +static void isvc_top_bot_padding(WORD32 i4_x, WORD32 i4_y, WORD8 i1_xd_index, WORD8 i1_yd_index, + UWORD8 u1_seg_wd, UWORD8 u1_seg_ht, UWORD8 *pu1_refarray_1, + UWORD8 *pu1_refarray_2, WORD32 i4_refarray_stride, + WORD32 i4_mb_adjoin_x, WORD32 i4_mb_adjoin_y, + WORD32 i4_corner_pixel_available) +{ + WORD32 i4_idx_i; + UWORD8 *pu1_src, *pu1_dst; + + UNUSED(i1_xd_index); + UNUSED(pu1_refarray_2); + UNUSED(i4_mb_adjoin_x); + UNUSED(i4_mb_adjoin_y); + UNUSED(i4_corner_pixel_available); + + pu1_dst = pu1_refarray_1 + i4_x + (i4_y * i4_refarray_stride); + pu1_src = pu1_dst + (i1_yd_index * i4_refarray_stride); + + i1_yd_index = MIN(i1_yd_index, MAX_PIX_FILL_LUMA); + u1_seg_ht = MIN(u1_seg_ht, MAX_PIX_FILL_LUMA); + pu1_dst = pu1_src - (i1_yd_index * i4_refarray_stride); + + for(i4_idx_i = 0; i4_idx_i < u1_seg_ht; i4_idx_i++) + { + memcpy(pu1_dst, pu1_src, u1_seg_wd); + pu1_dst += i4_refarray_stride; + } +} + +static void isvc_top_bot_padding_chroma(WORD32 i4_x, WORD32 i4_y, WORD8 i1_xd_index, + WORD8 i1_yd_index, UWORD8 u1_seg_wd, UWORD8 u1_seg_ht, + UWORD8 *pu1_refarray_1, UWORD8 *pu1_refarray_2, + WORD32 i4_refarray_stride, WORD32 i4_mb_adjoin_x, + WORD32 i4_mb_adjoin_y, WORD32 i4_corner_pixel_available) +{ + WORD32 i4_idx_i; + UWORD8 *pu1_src_cb, *pu1_dst_cb; + UWORD8 *pu1_src_cr, *pu1_dst_cr; + WORD32 i4_tmp; + + UNUSED(i1_xd_index); + UNUSED(i4_mb_adjoin_x); + UNUSED(i4_mb_adjoin_y); + UNUSED(i4_corner_pixel_available); + + i4_tmp = i4_x + (i4_y * i4_refarray_stride); + pu1_dst_cb = pu1_refarray_1 + i4_tmp; + pu1_dst_cr = pu1_refarray_2 + i4_tmp; + + i4_tmp = (i1_yd_index * i4_refarray_stride); + pu1_src_cb = pu1_dst_cb + i4_tmp; + pu1_src_cr = pu1_dst_cr + i4_tmp; + + i1_yd_index = MIN(i1_yd_index, MAX_PIX_FILL_CHROMA); + u1_seg_ht = MIN(u1_seg_ht, MAX_PIX_FILL_CHROMA); + + i4_tmp = (i1_yd_index * i4_refarray_stride); + pu1_dst_cb = pu1_src_cb - i4_tmp; + + pu1_dst_cr = pu1_src_cr - i4_tmp; + + for(i4_idx_i = 0; i4_idx_i < u1_seg_ht; i4_idx_i++) + { + memcpy(pu1_dst_cb, pu1_src_cb, u1_seg_wd); + pu1_dst_cb += i4_refarray_stride; + + memcpy(pu1_dst_cr, pu1_src_cr, u1_seg_wd); + pu1_dst_cr += i4_refarray_stride; + } +} + +static void isvc_diag_reconstruction(WORD32 i4_x, WORD32 i4_y, WORD8 i1_xd_index, WORD8 i1_yd_index, + UWORD8 u1_seg_wd, UWORD8 u1_seg_ht, UWORD8 *pu1_refarray_1, + UWORD8 *pu1_refarray_2, WORD32 i4_refarray_stride, + WORD32 i4_mb_adjoin_x, WORD32 i4_mb_adjoin_y, + WORD32 i4_corner_pixel_available) +{ + WORD32 i4_i; + UWORD8 *pu1_src_1, *pu1_src_2, *pu1_dst; + UWORD8 u1_filter_delay_buf[18]; + UWORD8 u1_out_buf[16]; + WORD32 i4_width, i4_height; + WORD32 i4_x_off, i4_y_off; + WORD32 i4_block_size = BLK_SIZE; + + UNUSED(pu1_refarray_2); + + pu1_dst = pu1_refarray_1 + i4_x + (i4_y * i4_refarray_stride); + pu1_src_1 = pu1_dst + i1_xd_index; + pu1_src_2 = pu1_dst + (i1_yd_index * i4_refarray_stride); + + i4_width = MAX(u1_seg_wd, (((i4_mb_adjoin_x >> 3) ^ 1) * i4_block_size)); + i4_height = MAX(u1_seg_ht, (((i4_mb_adjoin_y >> 4) ^ 1) * i4_block_size)); + + i4_x_off = (i4_width - u1_seg_wd); + i4_y_off = (i4_height - u1_seg_ht); + + if(i1_xd_index > 0 && i1_yd_index > 0) + { + /* Quadrant 1 Processing */ + + /* load the pixel in the filter delay buffer */ + memcpy(&u1_filter_delay_buf[0], pu1_src_2, (i4_width + 1)); + for(i4_i = i4_height; i4_i > 0; i4_i--) + { + u1_filter_delay_buf[i4_width + i4_i] = *pu1_src_1; + pu1_src_1 += i4_refarray_stride; + } + + if(0 == i4_corner_pixel_available) + { + /* interpolate the unavailable corner pixel */ + u1_filter_delay_buf[i4_width] = + (u1_filter_delay_buf[i4_width - 1] + u1_filter_delay_buf[i4_width + 1] + 1) >> 1; + } + + for(i4_i = 0; i4_i < (i4_width + i4_height - 1); i4_i++) + { + /* get the filtered output */ + u1_out_buf[i4_i] = ((u1_filter_delay_buf[i4_i]) + (u1_filter_delay_buf[i4_i + 1] * 2) + + (u1_filter_delay_buf[i4_i + 2]) + 2) >> + 2; + } + + /* fill the segment with diagonal reconstructed output */ + for(i4_i = 1; i4_i <= u1_seg_ht; i4_i++) + { + memcpy(pu1_dst, &u1_out_buf[i4_height - i4_i], u1_seg_wd); + pu1_dst += i4_refarray_stride; + } + } + else if(i1_xd_index < 0 && i1_yd_index > 0) + { + /* Quadrant 2 Processing */ + /* load the pixel in the filter delay buffer */ + for(i4_i = 0; i4_i < (i4_height + 1); i4_i++) + { + u1_filter_delay_buf[i4_i] = *pu1_src_1; + pu1_src_1 += i4_refarray_stride; + } + + pu1_src_2 -= i4_x_off; + memcpy(&u1_filter_delay_buf[i4_i], pu1_src_2, i4_width); + + if(0 == i4_corner_pixel_available) + { + /* interpolate the unavailable corner pixel */ + u1_filter_delay_buf[i4_i - 1] = + (u1_filter_delay_buf[i4_i] + u1_filter_delay_buf[i4_i - 2] + 1) >> 1; + } + + for(i4_i = 0; i4_i < (i4_width + i4_height - 1); i4_i++) + { + /* get the filtered output */ + u1_out_buf[i4_i] = ((u1_filter_delay_buf[i4_i]) + (u1_filter_delay_buf[i4_i + 1] * 2) + + (u1_filter_delay_buf[i4_i + 2]) + 2) >> + 2; + } + + /* fill the segment with diagonal reconstructed output */ + for(i4_i = 0; i4_i < u1_seg_ht; i4_i++) + { + memcpy(pu1_dst, &u1_out_buf[i4_x_off + i4_i], u1_seg_wd); + pu1_dst += i4_refarray_stride; + } + } + else if(i1_xd_index > 0 && i1_yd_index < 0) + { + /* Quadrant 3 Processing */ + /* load the pixel in the filter delay buffer */ + memcpy(&u1_filter_delay_buf[0], pu1_src_2, (i4_width + 1)); + + pu1_src_1 -= (i4_y_off * i4_refarray_stride); + for(i4_i = 1; i4_i <= i4_height; i4_i++) + { + u1_filter_delay_buf[i4_width + i4_i] = *pu1_src_1; + pu1_src_1 += i4_refarray_stride; + } + + if(0 == i4_corner_pixel_available) + { + /* interpolate the unavailable corner pixel */ + u1_filter_delay_buf[i4_width] = + (u1_filter_delay_buf[i4_width - 1] + u1_filter_delay_buf[i4_width + 1] + 1) >> 1; + } + + for(i4_i = 0; i4_i < (i4_width + i4_height - 1); i4_i++) + { + /* get the filtered output */ + u1_out_buf[i4_i] = ((u1_filter_delay_buf[i4_i]) + (u1_filter_delay_buf[i4_i + 1] * 2) + + (u1_filter_delay_buf[i4_i + 2]) + 2) >> + 2; + } + + /* fill the segment with diagonal reconstructed output */ + for(i4_i = 0; i4_i < u1_seg_ht; i4_i++) + { + memcpy(pu1_dst, &u1_out_buf[i4_y_off + i4_i], u1_seg_wd); + pu1_dst += i4_refarray_stride; + } + } + else + { + /* Quadrant 4 Processing */ + /* load the pixel in the filter delay buffer */ + pu1_src_1 += ((u1_seg_ht - 1) * i4_refarray_stride); + for(i4_i = 0; i4_i <= i4_height; i4_i++) + { + u1_filter_delay_buf[i4_i] = *pu1_src_1; + pu1_src_1 -= i4_refarray_stride; + } + + pu1_src_2 -= i4_x_off; + memcpy(&u1_filter_delay_buf[i4_i], pu1_src_2, i4_width); + + if(0 == i4_corner_pixel_available) + { + /* interpolate the unavailable corner pixel */ + u1_filter_delay_buf[i4_i - 1] = + (u1_filter_delay_buf[i4_i] + u1_filter_delay_buf[i4_i - 2] + 1) >> 1; + } + + for(i4_i = 0; i4_i < (i4_width + i4_height - 1); i4_i++) + { + /* get the filtered output */ + u1_out_buf[i4_i] = ((u1_filter_delay_buf[i4_i]) + (u1_filter_delay_buf[i4_i + 1] * 2) + + (u1_filter_delay_buf[i4_i + 2]) + 2) >> + 2; + } + + /* fill the segment with diagonal reconstructed output */ + for(i4_i = 1; i4_i <= u1_seg_ht; i4_i++) + { + memcpy(pu1_dst, &u1_out_buf[(u1_seg_ht + i4_x_off) - i4_i], u1_seg_wd); + pu1_dst += i4_refarray_stride; + } + } +} + +static void isvc_diag_reconstruction_chroma(WORD32 i4_x, WORD32 i4_y, WORD8 i1_xd_index, + WORD8 i1_yd_index, UWORD8 u1_seg_wd, UWORD8 u1_seg_ht, + UWORD8 *pu1_refarray_1, UWORD8 *pu1_refarray_2, + WORD32 i4_refarray_stride, WORD32 i4_mb_adjoin_x, + WORD32 i4_mb_adjoin_y, WORD32 i4_corner_pixel_available) +{ + WORD32 i4_i; + UWORD8 u1_filter_delay_buf_cb[18], u1_filter_delay_buf_cr[18]; + UWORD8 u1_out_buf_cb[16], u1_out_buf_cr[16]; + WORD32 i4_width, i4_height; + WORD32 i4_x_off, i4_y_off; + WORD32 i4_block_size = BLK_SIZE >> 1; + UWORD8 *pu1_src_1_cb, *pu1_src_2_cb, *pu1_dst_cb; + UWORD8 *pu1_src_1_cr, *pu1_src_2_cr, *pu1_dst_cr; + WORD32 i4_tmp; + + i4_tmp = i4_x + (i4_y * i4_refarray_stride); + pu1_dst_cb = pu1_refarray_1 + i4_tmp; + pu1_dst_cr = pu1_refarray_2 + i4_tmp; + + pu1_src_1_cb = pu1_dst_cb + i1_xd_index; + pu1_src_1_cr = pu1_dst_cr + i1_xd_index; + + i4_tmp = (i1_yd_index * i4_refarray_stride); + pu1_src_2_cb = pu1_dst_cb + i4_tmp; + pu1_src_2_cr = pu1_dst_cr + i4_tmp; + + i4_width = MAX(u1_seg_wd, (((i4_mb_adjoin_x >> 3) ^ 1) * i4_block_size)); + i4_height = MAX(u1_seg_ht, (((i4_mb_adjoin_y >> 4) ^ 1) * i4_block_size)); + + i4_x_off = (i4_width - u1_seg_wd); + i4_y_off = (i4_height - u1_seg_ht); + + if(i1_xd_index < 0 && i1_yd_index > 0) + { + /* Quadrant 1 Processing */ + + /* load the pixel in the filter delay buffer */ + for(i4_i = 0; i4_i < (i4_height + 1); i4_i++) + { + u1_filter_delay_buf_cb[i4_i] = *pu1_src_1_cb; + pu1_src_1_cb += i4_refarray_stride; + + u1_filter_delay_buf_cr[i4_i] = *pu1_src_1_cr; + pu1_src_1_cr += i4_refarray_stride; + } + + pu1_src_2_cb -= i4_x_off; + pu1_src_2_cr -= i4_x_off; + + memcpy(&u1_filter_delay_buf_cb[i4_i], pu1_src_2_cb, i4_width); + memcpy(&u1_filter_delay_buf_cr[i4_i], pu1_src_2_cr, i4_width); + + if(0 == i4_corner_pixel_available) + { + /* interpolate the unavailable corner pixel */ + u1_filter_delay_buf_cb[i4_i - 1] = + (u1_filter_delay_buf_cb[i4_i] + u1_filter_delay_buf_cb[i4_i - 2] + 1) >> 1; + + u1_filter_delay_buf_cr[i4_i - 1] = + (u1_filter_delay_buf_cr[i4_i] + u1_filter_delay_buf_cr[i4_i - 2] + 1) >> 1; + } + + for(i4_i = 0; i4_i < (i4_width + i4_height - 1); i4_i++) + { + /* get the filtered output */ + u1_out_buf_cb[i4_i] = + ((u1_filter_delay_buf_cb[i4_i]) + (u1_filter_delay_buf_cb[i4_i + 1] * 2) + + (u1_filter_delay_buf_cb[i4_i + 2]) + 2) >> + 2; + + u1_out_buf_cr[i4_i] = + ((u1_filter_delay_buf_cr[i4_i]) + (u1_filter_delay_buf_cr[i4_i + 1] * 2) + + (u1_filter_delay_buf_cr[i4_i + 2]) + 2) >> + 2; + } + + /* fill the segment with diagonal reconstructed output */ + for(i4_i = 0; i4_i < u1_seg_ht; i4_i++) + { + memcpy(pu1_dst_cb, &u1_out_buf_cb[i4_x_off + i4_i], u1_seg_wd); + pu1_dst_cb += i4_refarray_stride; + + memcpy(pu1_dst_cr, &u1_out_buf_cr[i4_x_off + i4_i], u1_seg_wd); + pu1_dst_cr += i4_refarray_stride; + } + } + else if(i1_xd_index > 0 && i1_yd_index > 0) + { + /* Quadrant 2 Processing */ + + /* load the pixel in the filter delay buffer */ + memcpy(&u1_filter_delay_buf_cb[0], pu1_src_2_cb, (i4_width + 1)); + memcpy(&u1_filter_delay_buf_cr[0], pu1_src_2_cr, (i4_width + 1)); + + for(i4_i = i4_height; i4_i > 0; i4_i--) + { + u1_filter_delay_buf_cb[i4_width + i4_i] = *pu1_src_1_cb; + pu1_src_1_cb += i4_refarray_stride; + + u1_filter_delay_buf_cr[i4_width + i4_i] = *pu1_src_1_cr; + pu1_src_1_cr += i4_refarray_stride; + } + + if(0 == i4_corner_pixel_available) + { + /* interpolate the unavailable corner pixel */ + u1_filter_delay_buf_cb[i4_width] = + (u1_filter_delay_buf_cb[i4_width - 1] + u1_filter_delay_buf_cb[i4_width + 1] + 1) >> + 1; + + u1_filter_delay_buf_cr[i4_width] = + (u1_filter_delay_buf_cr[i4_width - 1] + u1_filter_delay_buf_cr[i4_width + 1] + 1) >> + 1; + } + + for(i4_i = 0; i4_i < (i4_width + i4_height - 1); i4_i++) + { + /* get the filtered output */ + u1_out_buf_cb[i4_i] = + ((u1_filter_delay_buf_cb[i4_i]) + (u1_filter_delay_buf_cb[i4_i + 1] * 2) + + (u1_filter_delay_buf_cb[i4_i + 2]) + 2) >> + 2; + + u1_out_buf_cr[i4_i] = + ((u1_filter_delay_buf_cr[i4_i]) + (u1_filter_delay_buf_cr[i4_i + 1] * 2) + + (u1_filter_delay_buf_cr[i4_i + 2]) + 2) >> + 2; + } + + /* fill the segment with diagonal reconstructed output */ + for(i4_i = 1; i4_i <= u1_seg_ht; i4_i++) + { + memcpy(pu1_dst_cb, &u1_out_buf_cb[i4_height - i4_i], u1_seg_wd); + pu1_dst_cb += i4_refarray_stride; + + memcpy(pu1_dst_cr, &u1_out_buf_cr[i4_height - i4_i], u1_seg_wd); + pu1_dst_cr += i4_refarray_stride; + } + } + else if(i1_xd_index > 0 && i1_yd_index < 0) + { + /* Quadrant 3 Processing */ + + /* load the pixel in the filter delay buffer */ + memcpy(&u1_filter_delay_buf_cb[0], pu1_src_2_cb, (i4_width + 1)); + memcpy(&u1_filter_delay_buf_cr[0], pu1_src_2_cr, (i4_width + 1)); + + i4_tmp = (i4_y_off * i4_refarray_stride); + pu1_src_1_cb -= i4_tmp; + pu1_src_1_cr -= i4_tmp; + for(i4_i = 1; i4_i <= i4_height; i4_i++) + { + u1_filter_delay_buf_cb[i4_width + i4_i] = *pu1_src_1_cb; + pu1_src_1_cb += i4_refarray_stride; + + u1_filter_delay_buf_cr[i4_width + i4_i] = *pu1_src_1_cr; + pu1_src_1_cr += i4_refarray_stride; + } + + if(0 == i4_corner_pixel_available) + { + /* interpolate the unavailable corner pixel */ + u1_filter_delay_buf_cb[i4_width] = + (u1_filter_delay_buf_cb[i4_width - 1] + u1_filter_delay_buf_cb[i4_width + 1] + 1) >> + 1; + + u1_filter_delay_buf_cr[i4_width] = + (u1_filter_delay_buf_cr[i4_width - 1] + u1_filter_delay_buf_cr[i4_width + 1] + 1) >> + 1; + } + + for(i4_i = 0; i4_i < (i4_width + i4_height - 1); i4_i++) + { + /* get the filtered output */ + u1_out_buf_cb[i4_i] = + ((u1_filter_delay_buf_cb[i4_i]) + (u1_filter_delay_buf_cb[i4_i + 1] * 2) + + (u1_filter_delay_buf_cb[i4_i + 2]) + 2) >> + 2; + + u1_out_buf_cr[i4_i] = + ((u1_filter_delay_buf_cr[i4_i]) + (u1_filter_delay_buf_cr[i4_i + 1] * 2) + + (u1_filter_delay_buf_cr[i4_i + 2]) + 2) >> + 2; + } + + /* fill the segment with diagonal reconstructed output */ + for(i4_i = 0; i4_i < u1_seg_ht; i4_i++) + { + memcpy(pu1_dst_cb, &u1_out_buf_cb[i4_y_off + i4_i], u1_seg_wd); + pu1_dst_cb += i4_refarray_stride; + + memcpy(pu1_dst_cr, &u1_out_buf_cr[i4_y_off + i4_i], u1_seg_wd); + pu1_dst_cr += i4_refarray_stride; + } + } + else + { + /* Quadrant 4 Processing */ + + /* load the pixel in the filter delay buffer */ + i4_tmp = ((u1_seg_ht - 1) * i4_refarray_stride); + pu1_src_1_cb += i4_tmp; + pu1_src_1_cr += i4_tmp; + + for(i4_i = 0; i4_i <= i4_height; i4_i++) + { + u1_filter_delay_buf_cb[i4_i] = *pu1_src_1_cb; + pu1_src_1_cb -= i4_refarray_stride; + + u1_filter_delay_buf_cr[i4_i] = *pu1_src_1_cr; + pu1_src_1_cr -= i4_refarray_stride; + } + + pu1_src_2_cb -= i4_x_off; + pu1_src_2_cr -= i4_x_off; + + memcpy(&u1_filter_delay_buf_cb[i4_i], pu1_src_2_cb, i4_width); + memcpy(&u1_filter_delay_buf_cr[i4_i], pu1_src_2_cr, i4_width); + + if(0 == i4_corner_pixel_available) + { + /* interpolate the unavailable corner pixel */ + u1_filter_delay_buf_cb[i4_i - 1] = + (u1_filter_delay_buf_cb[i4_i] + u1_filter_delay_buf_cb[i4_i - 2] + 1) >> 1; + + u1_filter_delay_buf_cr[i4_i - 1] = + (u1_filter_delay_buf_cr[i4_i] + u1_filter_delay_buf_cr[i4_i - 2] + 1) >> 1; + } + + for(i4_i = 0; i4_i < (i4_width + i4_height - 1); i4_i++) + { + /* get the filtered output */ + u1_out_buf_cb[i4_i] = + ((u1_filter_delay_buf_cb[i4_i]) + (u1_filter_delay_buf_cb[i4_i + 1] * 2) + + (u1_filter_delay_buf_cb[i4_i + 2]) + 2) >> + 2; + + u1_out_buf_cr[i4_i] = + ((u1_filter_delay_buf_cr[i4_i]) + (u1_filter_delay_buf_cr[i4_i + 1] * 2) + + (u1_filter_delay_buf_cr[i4_i + 2]) + 2) >> + 2; + } + + /* fill the segment with diagonal reconstructed output */ + for(i4_i = 1; i4_i <= u1_seg_ht; i4_i++) + { + memcpy(pu1_dst_cb, &u1_out_buf_cb[(u1_seg_ht + i4_x_off) - i4_i], u1_seg_wd); + pu1_dst_cb += i4_refarray_stride; + + memcpy(pu1_dst_cr, &u1_out_buf_cr[(u1_seg_ht + i4_x_off) - i4_i], u1_seg_wd); + pu1_dst_cr += i4_refarray_stride; + } + } +} + +static void isvc_diag_padding(WORD32 i4_x, WORD32 i4_y, WORD8 i1_xd_index, WORD8 i1_yd_index, + UWORD8 u1_seg_wd, UWORD8 u1_seg_ht, UWORD8 *pu1_refarray_1, + UWORD8 *pu1_refarray_2, WORD32 i4_refarray_stride, + WORD32 i4_mb_adjoin_x, WORD32 i4_mb_adjoin_y, + WORD32 i4_corner_pixel_available) + +{ + WORD32 i4_idx_i; + UWORD8 *pu1_src, *pu1_dst; + + UNUSED(pu1_refarray_2); + UNUSED(i4_mb_adjoin_x); + UNUSED(i4_mb_adjoin_y); + UNUSED(i4_corner_pixel_available); + + pu1_dst = pu1_refarray_1 + i4_x + (i4_y * i4_refarray_stride); + pu1_src = pu1_dst + i1_xd_index + (i1_yd_index * i4_refarray_stride); + + i1_xd_index = MIN(i1_xd_index, MAX_PIX_FILL_LUMA); + u1_seg_wd = MIN(u1_seg_wd, MAX_PIX_FILL_LUMA); + + i1_yd_index = MIN(i1_yd_index, MAX_PIX_FILL_LUMA); + u1_seg_ht = MIN(u1_seg_ht, MAX_PIX_FILL_LUMA); + pu1_dst = pu1_src - i1_xd_index - (i1_yd_index * i4_refarray_stride); + + for(i4_idx_i = 0; i4_idx_i < u1_seg_ht; i4_idx_i++) + { + memset(pu1_dst, *pu1_src, u1_seg_wd); + pu1_dst += i4_refarray_stride; + } +} + +static void isvc_diag_padding_chroma(WORD32 i4_x, WORD32 i4_y, WORD8 i1_xd_index, WORD8 i1_yd_index, + UWORD8 u1_seg_wd, UWORD8 u1_seg_ht, UWORD8 *pu1_refarray_1, + UWORD8 *pu1_refarray_2, WORD32 i4_refarray_stride, + WORD32 i4_mb_adjoin_x, WORD32 i4_mb_adjoin_y, + WORD32 i4_corner_pixel_available) +{ + WORD32 i4_idx_i; + UWORD8 *pu1_src_cb, *pu1_dst_cb; + UWORD8 *pu1_src_cr, *pu1_dst_cr; + WORD32 i4_tmp; + + UNUSED(i4_mb_adjoin_x); + UNUSED(i4_mb_adjoin_y); + UNUSED(i4_corner_pixel_available); + + i4_tmp = i4_x + (i4_y * i4_refarray_stride); + pu1_dst_cb = pu1_refarray_1 + i4_tmp; + pu1_dst_cr = pu1_refarray_2 + i4_tmp; + + i4_tmp = i1_xd_index + (i1_yd_index * i4_refarray_stride); + pu1_src_cb = pu1_dst_cb + i4_tmp; + pu1_src_cr = pu1_dst_cr + i4_tmp; + + i1_xd_index = MIN(i1_xd_index, MAX_PIX_FILL_LUMA); + u1_seg_wd = MIN(u1_seg_wd, MAX_PIX_FILL_LUMA); + + i1_yd_index = MIN(i1_yd_index, MAX_PIX_FILL_LUMA); + u1_seg_ht = MIN(u1_seg_ht, MAX_PIX_FILL_LUMA); + + i4_tmp = (i1_xd_index + (i1_yd_index * i4_refarray_stride)); + pu1_dst_cb = pu1_src_cb - i4_tmp; + pu1_dst_cr = pu1_src_cr - i4_tmp; + + for(i4_idx_i = 0; i4_idx_i < u1_seg_ht; i4_idx_i++) + { + memset(pu1_dst_cb, *pu1_src_cb, u1_seg_wd); + pu1_dst_cb += i4_refarray_stride; + + memset(pu1_dst_cr, *pu1_src_cr, u1_seg_wd); + pu1_dst_cr += i4_refarray_stride; + } +} + +static PF_INTRA_SAMP_PADDING *gpf_lookup_fxns_luma[NUM_INTRA_SAMP_FXNS] = { + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + &isvc_left_right_padding, + NULL, + &isvc_diag_reconstruction, + NULL, + &isvc_left_right_padding, + NULL, + &isvc_diag_reconstruction, + NULL, + NULL, + &isvc_top_bot_padding, + &isvc_diag_reconstruction, + NULL, + NULL, + &isvc_top_bot_padding, + &isvc_diag_reconstruction, + NULL, + &isvc_left_right_padding, + &isvc_top_bot_padding, + &isvc_diag_reconstruction, + &isvc_diag_padding, + &isvc_left_right_padding, + &isvc_top_bot_padding, + &isvc_diag_reconstruction, +}; + +static PF_INTRA_SAMP_PADDING *gpf_lookup_fxns_chroma[NUM_INTRA_SAMP_FXNS] = { + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + &isvc_left_right_padding_chroma, + NULL, + &isvc_diag_reconstruction_chroma, + NULL, + &isvc_left_right_padding_chroma, + NULL, + &isvc_diag_reconstruction_chroma, + NULL, + NULL, + &isvc_top_bot_padding_chroma, + &isvc_diag_reconstruction_chroma, + NULL, + NULL, + &isvc_top_bot_padding_chroma, + &isvc_diag_reconstruction_chroma, + NULL, + &isvc_left_right_padding_chroma, + &isvc_top_bot_padding_chroma, + &isvc_diag_reconstruction_chroma, + &isvc_diag_padding_chroma, + &isvc_left_right_padding_chroma, + &isvc_top_bot_padding_chroma, + &isvc_diag_reconstruction_chroma, +}; + +static void isvc_get_ref_layer_avlblty_dyadic(WORD8 *pi1_ref_mb_modes, WORD32 i4_ref_mode_stride, + WORD32 i4_element_size, WORD32 i4_ref_mb_x, + WORD32 i4_ref_mb_y, WORD32 *pi4_avlblty, + WORD8 i1_curr_slice_id, WORD8 i1_cons_intr_samp_flag) +{ + WORD8 i1_mb_mode; + + pi1_ref_mb_modes += (i4_ref_mb_y * i4_ref_mode_stride * i4_element_size); + pi1_ref_mb_modes += (i4_ref_mb_x * i4_element_size); + i1_mb_mode = *pi1_ref_mb_modes; + i1_mb_mode = (i1_mb_mode < 0) ? i1_mb_mode : SVC_EXTRACT_MB_MODE(*pi1_ref_mb_modes); + + if(i1_mb_mode <= SVC_INTER_MB) + { + *pi4_avlblty = 0; + } + else + { + *pi4_avlblty = 1; + } + + if(1 == i1_cons_intr_samp_flag) + { + if(1 == *pi4_avlblty) + { + if(i1_mb_mode != i1_curr_slice_id) + { + *pi4_avlblty = 0; + } + } + } +} + +/*****************************************************************************/ +/* */ +/* Function Name : isvc_diagonal_construct_dyadic */ +/* */ +/* Description : This function fills the unavaible pixels in the reference*/ +/* array with diagonally constructed samples */ +/* Inputs : i4_x :current position in reference array X to be filled */ +/* i4_y :current position in reference array Y to be filled */ +/* i4_xd_index : diagonal index in horizontal direction */ +/* i4_yd_index : diagonal index in vertical direction */ +/* pu1_refarray : popinter to reference array */ +/* i4_refarray_wd: width of the reference array */ +/* Globals : none */ +/* Processing : Fills the sample which is unavailable with filtered */ +/* diagonal samples */ +/* Outputs : pixel filled */ +/* Returns : constructed pixel */ +/* */ +/* Issues : none */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 03 12 2010 Nithya creation */ +/* */ +/*****************************************************************************/ +static UWORD8 isvc_diagonal_construct_dyadic(WORD32 i4_x, WORD32 i4_y, WORD32 i4_xd_index, + WORD32 i4_yd_index, UWORD8 *pu1_refarray, + WORD32 i4_refarray_wd) +{ + WORD32 i4_diff_hor_ver, i4_sgn_xy; + WORD32 i4_xc, i4_yc; + WORD32 i4_samp1, i4_samp2, i4_samp3; + WORD32 i4_result; + UWORD8 *pu1_tmp; + + i4_diff_hor_ver = ABS(i4_xd_index) - ABS(i4_yd_index); + i4_sgn_xy = SIGN(i4_xd_index * i4_yd_index); + + if(i4_diff_hor_ver > 0) + { + i4_xc = i4_x - (i4_sgn_xy * i4_yd_index); + i4_yc = i4_y - i4_yd_index; + + pu1_tmp = pu1_refarray + (i4_yc * i4_refarray_wd); + + i4_samp1 = pu1_tmp[i4_xc - 1]; + i4_samp2 = pu1_tmp[i4_xc]; + i4_samp3 = pu1_tmp[i4_xc + 1]; + } + else if(i4_diff_hor_ver < 0) + { + i4_xc = i4_x - i4_xd_index; + i4_yc = i4_y - (i4_sgn_xy * i4_xd_index); + + pu1_tmp = pu1_refarray + ((i4_yc - 1) * i4_refarray_wd); + + i4_samp1 = pu1_tmp[i4_xc]; + pu1_tmp += i4_refarray_wd; + i4_samp2 = pu1_tmp[i4_xc]; + pu1_tmp += i4_refarray_wd; + i4_samp3 = pu1_tmp[i4_xc]; + } + else + { + WORD32 i4_ref_xd, i4_ref_yd; + + i4_ref_xd = i4_x - i4_xd_index; + i4_ref_yd = i4_y - i4_yd_index; + + i4_xc = i4_ref_xd + SIGN(i4_xd_index); + i4_yc = i4_ref_yd + SIGN(i4_yd_index); + + pu1_tmp = pu1_refarray + (i4_ref_yd * i4_refarray_wd); + + i4_samp1 = pu1_tmp[i4_xc]; + i4_samp2 = pu1_tmp[i4_ref_xd]; + pu1_tmp = pu1_refarray + (i4_yc * i4_refarray_wd); + i4_samp3 = pu1_tmp[i4_ref_xd]; + } + + i4_result = (i4_samp1 + (i4_samp2 << 1) + i4_samp3 + 2) >> 2; + + pu1_tmp = pu1_refarray + (i4_y * i4_refarray_wd); + /* Store the filled sample */ + pu1_tmp[i4_x] = i4_result; + + return i4_result; +} + +/*****************************************************************************/ +/* */ +/* Function Name : isvc_corner_samp_dyadic */ +/* */ +/* Description : This function fills the corner sample in the reference */ +/* array with diagonally constructed samples */ +/* Inputs : i4_x :current position in reference array X to be filled */ +/* i4_y :current position in reference array Y to be filled */ +/* i4_xd_index : diagonal index in horizontal direction */ +/* i4_yd_index : diagonal index in vertical direction */ +/* pu1_refarray_y : pointer to luma reference array */ +/* pu1_refarray_cb : pointer to Cb reference array */ +/* pu1_refarray_cr : pointer to Cr reference array */ +/* Globals : none */ +/* Processing : Fills the sample which is unavailable with filtered */ +/* diagonal samples */ +/* Outputs : pixel filled */ +/* Returns : none */ +/* */ +/* Issues : none */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 03 12 2010 Nithya creation */ +/* */ +/*****************************************************************************/ +static void isvc_corner_samp_dyadic(WORD32 i4_x, WORD32 i4_y, WORD32 i4_xD, WORD32 i4_yD, + UWORD8 *pu1_refarray_y, UWORD8 *pu1_refarray_cb, + UWORD8 *pu1_refarray_cr) +{ + WORD32 i4_ref_xD, i4_ref_yD; + WORD32 i4_c_ref_xD, i4_c_ref_yD; + WORD32 i4_xc, i4_yc; + WORD32 i4_c_xc, i4_c_yc; + WORD32 i4_samp1, i4_samp2; + UWORD8 *pu1_tmp_src, *pu1_tmp_dst; + + i4_ref_xD = i4_x - i4_xD; + i4_ref_yD = i4_y - i4_yD; + + i4_xc = i4_ref_xD + SIGN(i4_xD); + i4_yc = i4_ref_yD + SIGN(i4_yD); + + /* Luma */ + pu1_tmp_src = pu1_refarray_y + (i4_yc * DYADIC_REF_W_Y); + i4_samp1 = pu1_tmp_src[i4_ref_xD]; + pu1_tmp_src = pu1_refarray_y + (i4_ref_yD * DYADIC_REF_W_Y); + i4_samp2 = pu1_tmp_src[i4_xc]; + pu1_tmp_dst = pu1_tmp_src; + + pu1_tmp_dst[i4_ref_xD] = (i4_samp1 + i4_samp2 + 1) >> 1; + + /* Chroma */ + i4_c_ref_xD = i4_ref_xD >> 1; + i4_c_ref_yD = i4_ref_yD >> 1; + + i4_c_xc = i4_c_ref_xD + SIGN(i4_xD); + i4_c_yc = i4_c_ref_yD + SIGN(i4_yD); + + /* Cb */ + pu1_tmp_src = pu1_refarray_cb + (i4_c_yc * DYADIC_REF_W_C); + i4_samp1 = pu1_tmp_src[i4_c_ref_xD]; + pu1_tmp_src = pu1_refarray_cb + (i4_c_ref_yD * DYADIC_REF_W_C); + i4_samp2 = pu1_tmp_src[i4_c_xc]; + pu1_tmp_dst = pu1_tmp_src; + + pu1_tmp_dst[i4_c_ref_xD] = (i4_samp1 + i4_samp2 + 1) >> 1; + + /* Cr */ + pu1_tmp_src = pu1_refarray_cr + (i4_c_yc * DYADIC_REF_W_C); + i4_samp1 = pu1_tmp_src[i4_c_ref_xD]; + pu1_tmp_src = pu1_refarray_cr + (i4_c_ref_yD * DYADIC_REF_W_C); + i4_samp2 = pu1_tmp_src[i4_c_xc]; + pu1_tmp_dst = pu1_tmp_src; + + pu1_tmp_dst[i4_c_ref_xD] = (i4_samp1 + i4_samp2 + 1) >> 1; +} + +/*****************************************************************************/ +/* */ +/* Function Name : isvc_reflayer_construction_dyadic */ +/* */ +/* Description : This function constructs the reference array buffer */ +/* for dyadic cases used for intra resampling of a */ +/* component in an MB */ +/* */ +/* Inputs : pv_intra_samp_ctxt : intra sampling context */ +/* ps_ref_mb_mode_map : ref layer mb mode buffer desc */ +/* pu1_inp_luma : luma input (reference layer data) */ +/* pu1_inp_chroma : chroma input (reference layer data) */ +/* i4_inp_luma_stride : luma input buffer stride */ +/* i4_inp_chroma_stride : chroma input buffer stride */ +/* i4_top : indicates whether the core 8x8 reference block */ +/* is one of 0 and 1 or one of 2 and 3 */ +/* i4_left : indicates whether the core 8x8 reference block */ +/* is one of 0 and 2 or one of 1 and 3 */ +/* ps_ref_mb_coord : coordinates of the reference MB */ +/* Globals : none */ +/* Processing : it fills the reference layer data if they are falling in */ +/* INTRA MB region. If all the pixels are not filled it */ +/* calls the border extension algorithm to fill them */ +/* Outputs : none */ +/* Returns : none */ +/* */ +/* Issues : none */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 02 12 2010 Nithya creation */ +/* */ +/*****************************************************************************/ +static void isvc_reflayer_construction_dyadic(void *pv_intra_samp_ctxt, + mem_element_t *ps_ref_mb_mode_map, + UWORD8 *pu1_inp_luma, UWORD8 *pu1_inp_chroma, + WORD32 i4_inp_luma_stride, + WORD32 i4_inp_chroma_stride, WORD32 i4_top, + WORD32 i4_left, UWORD16 u2_mb_x, UWORD16 u2_mb_y) +{ + enum + { + TOPLEFT_MASK = 1, + LEFT_MASK = 2, + TOP_MASK = 4, + TOPRIGHT_MASK = 8, + BOTTOMLEFT_MASK = 16 + }; + + WORD32 i4_x, i4_y; + WORD32 i4_x0, i4_y0; + WORD32 i4_xc0, i4_yc0; + WORD32 i4_ref_xD, i4_ref_yD; + WORD32 i4_c_ref_xD, i4_c_ref_yD; + + intra_sampling_ctxt_t *ps_ctxt; + intra_samp_lyr_ctxt *ps_lyr_ctxt; + WORD8 *pi1_ref_mb_modes; + WORD32 i4_ref_mode_stride; + WORD32 i4_element_size; + WORD32 i4_mbaddr_y; + WORD32 i4_mbaddr_x; + + WORD32 i4_refarray_wd_luma, i4_refarray_wd_chroma; + WORD32 i4_refarray_ht_luma, i4_refarray_ht_chroma; + WORD32 i4_avlblty; + WORD8 i1_cons_intr_samp_flag; + WORD8 i1_slice_id; + WORD8 i1_corner_samp_avlbl_flag; + UWORD8 u1_ny_avlblty; + + UWORD8 *pu1_refarray_luma; + UWORD8 *pu1_refarray_cb, *pu1_refarray_cr; + + ps_ctxt = (intra_sampling_ctxt_t *) pv_intra_samp_ctxt; + ps_lyr_ctxt = &ps_ctxt->as_res_lyrs[ps_ctxt->i4_res_lyr_id]; + pi1_ref_mb_modes = (WORD8 *) ps_ref_mb_mode_map->pv_buffer; + i4_ref_mode_stride = ps_ref_mb_mode_map->i4_num_element_stride; + i4_element_size = ps_ref_mb_mode_map->i4_element_size; + + i1_cons_intr_samp_flag = ps_lyr_ctxt->i1_constrained_intra_rsmpl_flag; + + ASSERT(NULL != pi1_ref_mb_modes); + + pu1_refarray_luma = ps_ctxt->pu1_refarray_buffer; + pu1_refarray_cb = ps_ctxt->pu1_refarray_cb; + pu1_refarray_cr = ps_ctxt->pu1_refarray_cr; + + i4_mbaddr_x = u2_mb_x; + i4_mbaddr_y = u2_mb_y; + + i4_refarray_wd_luma = 20; + i4_refarray_ht_luma = 20; + + i4_refarray_wd_chroma = i4_refarray_wd_luma >> 1; + i4_refarray_ht_chroma = i4_refarray_ht_luma >> 1; + + if(1 == i1_cons_intr_samp_flag) + { + WORD8 *pi1_ref_mb_mode_tmp; + WORD8 i1_mb_mode; + + pi1_ref_mb_mode_tmp = pi1_ref_mb_modes; + pi1_ref_mb_mode_tmp += (i4_mbaddr_y * i4_ref_mode_stride * i4_element_size); + pi1_ref_mb_mode_tmp += (i4_mbaddr_x * i4_element_size); + i1_mb_mode = *pi1_ref_mb_mode_tmp; + i1_mb_mode = (i1_mb_mode < 0) ? i1_mb_mode : SVC_EXTRACT_MB_MODE(*pi1_ref_mb_mode_tmp); + + /* The reference layer MB should be intra */ + ASSERT(i1_mb_mode >= 0); + + i1_slice_id = i1_mb_mode; + } + else + { + i1_slice_id = -1; + } + + { + UWORD8 *pu1_src, *pu1_dst; + WORD32 i4_src_stride, i4_dst_stride; + + /* Copy luma */ + i4_src_stride = i4_inp_luma_stride; + i4_dst_stride = DYADIC_REF_W_Y; + pu1_src = pu1_inp_luma; + pu1_dst = pu1_refarray_luma; + + isvc_copy_data(pu1_src, i4_src_stride, pu1_dst, i4_dst_stride, i4_refarray_wd_luma, + i4_refarray_ht_luma); + + i4_src_stride = i4_inp_chroma_stride; + i4_dst_stride = DYADIC_REF_W_C; + pu1_src = pu1_inp_chroma; + isvc_copy_data_semiplanr(pu1_src, i4_src_stride, pu1_refarray_cb, pu1_refarray_cr, + i4_dst_stride, i4_refarray_wd_chroma, i4_refarray_ht_chroma); + } + + { + /* mb_x + left, mb_y + top */ + isvc_get_ref_layer_avlblty_dyadic(pi1_ref_mb_modes, i4_ref_mode_stride, i4_element_size, + i4_mbaddr_x + i4_left, i4_mbaddr_y + i4_top, &i4_avlblty, + i1_slice_id, i1_cons_intr_samp_flag); + u1_ny_avlblty = i4_avlblty; + + /* mb_x + left, mb_y */ + isvc_get_ref_layer_avlblty_dyadic(pi1_ref_mb_modes, i4_ref_mode_stride, i4_element_size, + i4_mbaddr_x + i4_left, i4_mbaddr_y, &i4_avlblty, + i1_slice_id, i1_cons_intr_samp_flag); + u1_ny_avlblty += (i4_avlblty << 1); + + /* mb_x, mb_y + top */ + isvc_get_ref_layer_avlblty_dyadic(pi1_ref_mb_modes, i4_ref_mode_stride, i4_element_size, + i4_mbaddr_x, i4_mbaddr_y + i4_top, &i4_avlblty, + i1_slice_id, i1_cons_intr_samp_flag); + u1_ny_avlblty += (i4_avlblty << 2); + + /* mb_x - left, mb_y + top */ + isvc_get_ref_layer_avlblty_dyadic(pi1_ref_mb_modes, i4_ref_mode_stride, i4_element_size, + i4_mbaddr_x - i4_left, i4_mbaddr_y + i4_top, &i4_avlblty, + i1_slice_id, i1_cons_intr_samp_flag); + u1_ny_avlblty += (i4_avlblty << 3); + + /* mb_x + left, mb_y - top */ + isvc_get_ref_layer_avlblty_dyadic(pi1_ref_mb_modes, i4_ref_mode_stride, i4_element_size, + i4_mbaddr_x + i4_left, i4_mbaddr_y - i4_top, &i4_avlblty, + i1_slice_id, i1_cons_intr_samp_flag); + u1_ny_avlblty += (i4_avlblty << 4); + } + + if((TOP_MASK | TOPLEFT_MASK | LEFT_MASK) == u1_ny_avlblty) + { + return; + } + + if(!(u1_ny_avlblty & (TOP_MASK | TOPLEFT_MASK | LEFT_MASK))) + { + UWORD8 *pu1_tmp_src, *pu1_tmp_dst1, *pu1_tmp_dst2; + UWORD8 *pu1_tmp_src1, *pu1_tmp_src2; + + /* Set the 4 corner samples to (x-xD,y-yD) */ + i4_x0 = 9 + (i4_left << 3) + i4_left; + i4_y0 = 9 + (i4_top << 3) + i4_top; + + i4_ref_xD = i4_x0 - i4_left - (i4_left >> 1); + i4_ref_yD = i4_y0 - i4_top - (i4_top >> 1); + + pu1_tmp_src = pu1_refarray_luma + (i4_ref_yD * DYADIC_REF_W_Y); + pu1_tmp_dst1 = pu1_refarray_luma + (i4_y0 * DYADIC_REF_W_Y); + pu1_tmp_dst2 = pu1_tmp_dst1 + DYADIC_REF_W_Y; + + pu1_tmp_dst1[i4_x0] = pu1_tmp_src[i4_ref_xD]; + pu1_tmp_dst1[i4_x0 + 1] = pu1_tmp_src[i4_ref_xD]; + pu1_tmp_dst2[i4_x0] = pu1_tmp_src[i4_ref_xD]; + pu1_tmp_dst2[i4_x0 + 1] = pu1_tmp_src[i4_ref_xD]; + + /* Set the corner sample of Cb and Cr to (x-xD,y-yD) */ + i4_xc0 = i4_x0 >> 1; + i4_yc0 = i4_y0 >> 1; + + i4_c_ref_yD = i4_ref_yD >> 1; + i4_c_ref_xD = i4_ref_xD >> 1; + + pu1_tmp_src1 = pu1_refarray_cb + (i4_c_ref_yD * DYADIC_REF_W_C); + pu1_tmp_dst1 = pu1_refarray_cb + (i4_yc0 * DYADIC_REF_W_C); + pu1_tmp_dst1[i4_xc0] = pu1_tmp_src1[i4_c_ref_xD]; + pu1_tmp_src2 = pu1_refarray_cr + (i4_c_ref_yD * DYADIC_REF_W_C); + pu1_tmp_dst2 = pu1_refarray_cr + (i4_yc0 * DYADIC_REF_W_C); + pu1_tmp_dst2[i4_xc0] = pu1_tmp_src2[i4_c_ref_xD]; + } + + if(!(u1_ny_avlblty & (TOP_MASK | TOPLEFT_MASK))) + { + UWORD8 *pu1_tmp_src, *pu1_tmp_dst1, *pu1_tmp_dst2; + UWORD8 *pu1_tmp_src1, *pu1_tmp_src2; + + /* Copy (x0,ref_yD), (x0+1,ref_yD), ..., (x0+7,ref_yD) to */ + /* (x0,y0), (x0+1,y0), ..., (x0+7,y0) and */ + /* (x0,y0+1), (x0+1,y0+1), ..., (x0+7,y0+1) */ + i4_x0 = 2; + i4_y0 = 9 + (i4_top << 3) + i4_top; + if(i4_left > 0) + { + i4_x0 += 8; + } + i4_ref_yD = i4_y0 - i4_top - (i4_top >> 1); + + pu1_tmp_src = pu1_refarray_luma + (i4_ref_yD * DYADIC_REF_W_Y); + pu1_tmp_dst1 = pu1_refarray_luma + (i4_y0 * DYADIC_REF_W_Y); + pu1_tmp_dst2 = pu1_tmp_dst1 + DYADIC_REF_W_Y; + + for(i4_x = i4_x0; i4_x < i4_x0 + 8; i4_x++) + { + pu1_tmp_dst1[i4_x] = pu1_tmp_src[i4_x]; + pu1_tmp_dst2[i4_x] = pu1_tmp_src[i4_x]; + } + + /* Cb and Cr copy */ + i4_xc0 = i4_x0 >> 1; + i4_yc0 = i4_y0 >> 1; + i4_c_ref_yD = i4_ref_yD >> 1; + + pu1_tmp_src1 = pu1_refarray_cb + (i4_c_ref_yD * DYADIC_REF_W_C); + pu1_tmp_dst1 = pu1_refarray_cb + (i4_yc0 * DYADIC_REF_W_C); + pu1_tmp_src2 = pu1_refarray_cr + (i4_c_ref_yD * DYADIC_REF_W_C); + pu1_tmp_dst2 = pu1_refarray_cr + (i4_yc0 * DYADIC_REF_W_C); + + for(i4_x = i4_xc0; i4_x < i4_xc0 + 4; i4_x++) + { + pu1_tmp_dst1[i4_x] = pu1_tmp_src1[i4_x]; + pu1_tmp_dst2[i4_x] = pu1_tmp_src2[i4_x]; + } + } + + if(!(u1_ny_avlblty & (TOPLEFT_MASK | LEFT_MASK))) + { + UWORD8 *pu1_tmp_src, *pu1_tmp_dst1, *pu1_tmp_dst2; + UWORD8 *pu1_tmp_src1, *pu1_tmp_src2; + + /* Copy (ref_xD,y0) to (x0,y0) and (x0+1,y0); */ + /* copy (ref_xD,y0+1) to (x0,y0+1) and (x0+1,y0+1); ... ;*/ + /* copy (ref_xD,y0+7) to (x0,y0+7) and (x0+1,y0+7) */ + i4_x0 = 9 + (i4_left << 3) + i4_left; + i4_y0 = 2; + if(i4_top > 0) + { + i4_y0 += 8; + } + i4_ref_xD = i4_x0 - i4_left - (i4_left >> 1); + + pu1_tmp_src = pu1_refarray_luma + (i4_y0 * DYADIC_REF_W_Y); + pu1_tmp_dst1 = pu1_tmp_src; + + for(i4_y = i4_y0; i4_y < i4_y0 + 8; i4_y++) + { + pu1_tmp_dst1[i4_x0] = pu1_tmp_src[i4_ref_xD]; + pu1_tmp_dst1[i4_x0 + 1] = pu1_tmp_src[i4_ref_xD]; + pu1_tmp_src += DYADIC_REF_W_Y; + pu1_tmp_dst1 += DYADIC_REF_W_Y; + } + + /* Cb and Cr copy */ + i4_xc0 = i4_x0 >> 1; + i4_yc0 = i4_y0 >> 1; + i4_c_ref_xD = i4_ref_xD >> 1; + + pu1_tmp_src1 = pu1_refarray_cb + (i4_yc0 * DYADIC_REF_W_C); + pu1_tmp_dst1 = pu1_tmp_src1; + pu1_tmp_src2 = pu1_refarray_cr + (i4_yc0 * DYADIC_REF_W_C); + pu1_tmp_dst2 = pu1_tmp_src2; + + for(i4_y = i4_yc0; i4_y < i4_yc0 + 4; i4_y++) + { + pu1_tmp_dst1[i4_xc0] = pu1_tmp_src1[i4_c_ref_xD]; + pu1_tmp_dst2[i4_xc0] = pu1_tmp_src2[i4_c_ref_xD]; + pu1_tmp_src1 += DYADIC_REF_W_C; + pu1_tmp_src2 += DYADIC_REF_W_C; + pu1_tmp_dst1 += DYADIC_REF_W_C; + pu1_tmp_dst2 += DYADIC_REF_W_C; + } + } + + if(!(u1_ny_avlblty & TOP_MASK)) + { + if(!(u1_ny_avlblty & TOPRIGHT_MASK)) + { + UWORD8 *pu1_tmp_src, *pu1_tmp_dst; + + i4_x0 = 9 - i4_left; + i4_y0 = 9 + (i4_top << 3) + i4_top; + + i4_ref_yD = i4_y0 - i4_top - (i4_top >> 1); + + /* Copy (x0,ref_yD) and (x0+1,ref_yD) to (x0,y0) and (x0+1,y0), and */ + /* to (x0,y0+1) and (x0+1,y0+1) */ + pu1_tmp_src = pu1_refarray_luma + (i4_ref_yD * DYADIC_REF_W_Y); + pu1_tmp_dst = pu1_refarray_luma + (i4_y0 * DYADIC_REF_W_Y); + + pu1_tmp_dst[i4_x0] = pu1_tmp_src[i4_x0]; + pu1_tmp_dst[i4_x0 + 1] = pu1_tmp_src[i4_x0 + 1]; + + pu1_tmp_dst += DYADIC_REF_W_Y; + + pu1_tmp_dst[i4_x0] = pu1_tmp_src[i4_x0]; + pu1_tmp_dst[i4_x0 + 1] = pu1_tmp_src[i4_x0 + 1]; + + /* Cb copy */ + i4_xc0 = i4_x0 >> 1; + i4_yc0 = i4_y0 >> 1; + i4_c_ref_yD = i4_ref_yD >> 1; + + pu1_tmp_src = pu1_refarray_cb + (i4_c_ref_yD * DYADIC_REF_W_C); + pu1_tmp_dst = pu1_refarray_cb + (i4_yc0 * DYADIC_REF_W_C); + + pu1_tmp_dst[i4_xc0] = pu1_tmp_src[i4_xc0]; + + /* Cr copy */ + pu1_tmp_src = pu1_refarray_cr + (i4_c_ref_yD * DYADIC_REF_W_C); + pu1_tmp_dst = pu1_refarray_cr + (i4_yc0 * DYADIC_REF_W_C); + + pu1_tmp_dst[i4_xc0] = pu1_tmp_src[i4_xc0]; + } + else + { + WORD32 i4_xD, i4_yD; + WORD32 i4_c_xD, i4_c_yD; + + isvc_get_ref_layer_avlblty_dyadic(pi1_ref_mb_modes, i4_ref_mode_stride, i4_element_size, + i4_mbaddr_x - i4_left, i4_mbaddr_y, &i4_avlblty, + i1_slice_id, i1_cons_intr_samp_flag); + i1_corner_samp_avlbl_flag = i4_avlblty; + + i4_x0 = 9 - i4_left; + i4_y0 = 9 + (i4_top << 3) + i4_top; + + i4_xc0 = i4_x0 >> 1; + i4_yc0 = i4_y0 >> 1; + + i4_ref_yD = i4_y0 - i4_top - (i4_top >> 1); + i4_ref_xD = i4_x0 - (i4_left * 7) - (i4_left >> 1); + + i4_c_ref_xD = i4_ref_xD >> 1; + i4_c_ref_yD = i4_ref_yD >> 1; + + i4_xD = i4_x0 - i4_ref_xD; + i4_yD = i4_y0 - i4_ref_yD; + + i4_c_xD = i4_xc0 - i4_c_ref_xD; + i4_c_yD = i4_yc0 - i4_c_ref_yD; + + /* Fill corner sample if not available */ + if(!i1_corner_samp_avlbl_flag) + { + isvc_corner_samp_dyadic(i4_x0, i4_y0, i4_xD, i4_yD, pu1_refarray_luma, + pu1_refarray_cb, pu1_refarray_cr); + } + + /* Call diagonal construction for luma */ + for(i4_y = i4_y0; i4_y < i4_y0 + 2; i4_y++) + { + for(i4_x = i4_x0; i4_x < i4_x0 + 2; i4_x++) + { + isvc_diagonal_construct_dyadic(i4_x, i4_y, i4_xD, i4_yD, pu1_refarray_luma, + DYADIC_REF_W_Y); + i4_xD++; + } + i4_yD++; + i4_xD -= 2; + } + + /* Call diagonal construction for chroma */ + isvc_diagonal_construct_dyadic(i4_xc0, i4_yc0, i4_c_xD, i4_c_yD, pu1_refarray_cb, + DYADIC_REF_W_C); + + isvc_diagonal_construct_dyadic(i4_xc0, i4_yc0, i4_c_xD, i4_c_yD, pu1_refarray_cr, + DYADIC_REF_W_C); + } + } + + if(!(u1_ny_avlblty & LEFT_MASK)) + { + if(!(u1_ny_avlblty & BOTTOMLEFT_MASK)) + { + UWORD8 *pu1_tmp_src, *pu1_tmp_dst; + + i4_x0 = 9 + (i4_left << 3) + i4_left; + i4_y0 = 9 - i4_top; + i4_ref_xD = i4_x0 - i4_left - (i4_left >> 1); + + /* Copy (ref_xD,y0) to (x0,y0), (x0+1,y0), and */ + /* copy (ref_xD,y0+1) to (x0,y0+1), (x0+1,y0+1) */ + pu1_tmp_src = pu1_refarray_luma + (i4_y0 * DYADIC_REF_W_Y); + pu1_tmp_dst = pu1_tmp_src; + + pu1_tmp_dst[i4_x0] = pu1_tmp_src[i4_ref_xD]; + pu1_tmp_dst[i4_x0 + 1] = pu1_tmp_src[i4_ref_xD]; + + pu1_tmp_src += DYADIC_REF_W_Y; + pu1_tmp_dst += DYADIC_REF_W_Y; + + pu1_tmp_dst[i4_x0] = pu1_tmp_src[i4_ref_xD]; + pu1_tmp_dst[i4_x0 + 1] = pu1_tmp_src[i4_ref_xD]; + + /* Cb copy */ + i4_xc0 = i4_x0 >> 1; + i4_yc0 = i4_y0 >> 1; + i4_c_ref_xD = i4_ref_xD >> 1; + + pu1_tmp_src = pu1_refarray_cb + (i4_yc0 * DYADIC_REF_W_C); + pu1_tmp_dst = pu1_tmp_src; + + pu1_tmp_dst[i4_xc0] = pu1_tmp_src[i4_c_ref_xD]; + + /* Cr copy */ + pu1_tmp_src = pu1_refarray_cr + (i4_yc0 * DYADIC_REF_W_C); + pu1_tmp_dst = pu1_tmp_src; + + pu1_tmp_dst[i4_xc0] = pu1_tmp_src[i4_c_ref_xD]; + } + else + { + WORD32 i4_xD, i4_yD; + WORD32 i4_c_xD, i4_c_yD; + + isvc_get_ref_layer_avlblty_dyadic(pi1_ref_mb_modes, i4_ref_mode_stride, i4_element_size, + i4_mbaddr_x, i4_mbaddr_y - i4_top, &i4_avlblty, + i1_slice_id, i1_cons_intr_samp_flag); + i1_corner_samp_avlbl_flag = i4_avlblty; + + i4_x0 = 9 + (i4_left << 3) + i4_left; + i4_y0 = 9 - i4_top; + + i4_xc0 = i4_x0 >> 1; + i4_yc0 = i4_y0 >> 1; + + i4_ref_xD = i4_x0 - i4_left - (i4_left >> 1); + i4_ref_yD = i4_y0 - (i4_top * 7) - (i4_top >> 1); + + i4_c_ref_xD = i4_ref_xD >> 1; + i4_c_ref_yD = i4_ref_yD >> 1; + + i4_xD = i4_x0 - i4_ref_xD; + i4_yD = i4_y0 - i4_ref_yD; + + i4_c_xD = i4_xc0 - i4_c_ref_xD; + i4_c_yD = i4_yc0 - i4_c_ref_yD; + + if(!i1_corner_samp_avlbl_flag) + { + isvc_corner_samp_dyadic(i4_x0, i4_y0, i4_xD, i4_yD, pu1_refarray_luma, + pu1_refarray_cb, pu1_refarray_cr); + } + + /* Call diagonal consrtuction for luma */ + for(i4_y = i4_y0; i4_y < i4_y0 + 2; i4_y++) + { + for(i4_x = i4_x0; i4_x < i4_x0 + 2; i4_x++) + { + isvc_diagonal_construct_dyadic(i4_x, i4_y, i4_xD, i4_yD, pu1_refarray_luma, + DYADIC_REF_W_Y); + i4_xD++; + } + i4_yD++; + i4_xD -= 2; + } + + /* Call diagonal construction for chroma */ + isvc_diagonal_construct_dyadic(i4_xc0, i4_yc0, i4_c_xD, i4_c_yD, pu1_refarray_cb, + DYADIC_REF_W_C); + + isvc_diagonal_construct_dyadic(i4_xc0, i4_yc0, i4_c_xD, i4_c_yD, pu1_refarray_cr, + DYADIC_REF_W_C); + } + } + + if(u1_ny_avlblty & TOPLEFT_MASK) + { + if(!(u1_ny_avlblty & LEFT_MASK)) + { + WORD32 i4_xD, i4_yD; + WORD32 i4_c_xD, i4_c_yD; + UWORD8 *pu1_tmp_dst; + UWORD8 u1_filled_samp; + + i1_corner_samp_avlbl_flag = (u1_ny_avlblty & 4) >> 2; + + i4_x0 = 9 + (i4_left << 3) + i4_left; + i4_y0 = 2; + i4_ref_yD = 1; + if(i4_top > 0) + { + i4_y0 += 8; + i4_ref_yD = 18; + } + + i4_ref_xD = i4_x0 - (i4_left) - (i4_left >> 1); + + i4_xD = i4_x0 - i4_ref_xD; + i4_yD = i4_y0 - i4_ref_yD; + + i4_xc0 = i4_x0 >> 1; + i4_yc0 = i4_y0 >> 1; + + i4_c_ref_xD = i4_ref_xD >> 1; + i4_c_ref_yD = i4_ref_yD >> 1; + + i4_c_xD = i4_xc0 - i4_c_ref_xD; + i4_c_yD = i4_yc0 - i4_c_ref_yD; + + /* Fill corner sample if unavailable */ + if(!i1_corner_samp_avlbl_flag) + { + isvc_corner_samp_dyadic(i4_x0, i4_y0, i4_xD, i4_yD, pu1_refarray_luma, + pu1_refarray_cb, pu1_refarray_cr); + } + + /* Call the diagonal construction for the 8 rows */ + if(i4_top == i4_left) + { + /* if top * left = 1 */ + /* (x0,y0) */ + u1_filled_samp = isvc_diagonal_construct_dyadic(i4_x0, i4_y0, i4_xD, i4_yD, + pu1_refarray_luma, DYADIC_REF_W_Y); + + pu1_tmp_dst = pu1_refarray_luma + (i4_y0 * DYADIC_REF_W_Y); + + /* (x0,y0+1), ..., (x0,y0+7) and */ + /* (x0+1,y0), ..., (x0+1,y0+6) */ + for(i4_y = i4_y0 + 1; i4_y < i4_y0 + 8; i4_y++) + { + i4_yD++; + u1_filled_samp = isvc_diagonal_construct_dyadic( + i4_x0, i4_y, i4_xD, i4_yD, pu1_refarray_luma, DYADIC_REF_W_Y); + pu1_tmp_dst[i4_x0 + 1] = u1_filled_samp; + pu1_tmp_dst += DYADIC_REF_W_Y; + } + + /* (x0+1,y0+7) */ + u1_filled_samp = isvc_diagonal_construct_dyadic( + i4_x0 + 1, i4_y0 + 7, i4_xD + 1, i4_yD, pu1_refarray_luma, DYADIC_REF_W_Y); + } + else + { + /* top * left = -1 */ + /* (x0+1,y0) */ + u1_filled_samp = isvc_diagonal_construct_dyadic(i4_x0 + 1, i4_y0, i4_xD + 1, i4_yD, + pu1_refarray_luma, DYADIC_REF_W_Y); + + pu1_tmp_dst = pu1_refarray_luma + (i4_y0 * DYADIC_REF_W_Y); + + /* (x0,y0), ..., (x0,y0+6) and */ + /* (x0+1,y0+1), ..., (x0+1,y0+7) */ + for(i4_y = i4_y0; i4_y < i4_y0 + 7; i4_y++) + { + u1_filled_samp = isvc_diagonal_construct_dyadic( + i4_x0, i4_y, i4_xD, i4_yD, pu1_refarray_luma, DYADIC_REF_W_Y); + + pu1_tmp_dst += DYADIC_REF_W_Y; + pu1_tmp_dst[i4_x0 + 1] = u1_filled_samp; + i4_yD++; + } + + /* (x0,y0+7) */ + u1_filled_samp = isvc_diagonal_construct_dyadic(i4_x0, i4_y0 + 7, i4_xD, i4_yD, + pu1_refarray_luma, DYADIC_REF_W_Y); + } + + /* For Cb and Cr */ + for(i4_y = i4_yc0; i4_y < i4_yc0 + 4; i4_y++) + { + u1_filled_samp = isvc_diagonal_construct_dyadic(i4_xc0, i4_y, i4_c_xD, i4_c_yD, + pu1_refarray_cb, DYADIC_REF_W_C); + u1_filled_samp = isvc_diagonal_construct_dyadic(i4_xc0, i4_y, i4_c_xD, i4_c_yD, + pu1_refarray_cr, DYADIC_REF_W_C); + i4_c_yD++; + } + } + + if(!(u1_ny_avlblty & TOP_MASK)) + { + WORD32 i4_xD, i4_yD; + WORD32 i4_c_xD, i4_c_yD; + UWORD8 *pu1_tmp_dst; + UWORD8 u1_filled_samp; + + i1_corner_samp_avlbl_flag = (u1_ny_avlblty & 2) >> 1; + + i4_y0 = 9 + (i4_top << 3) + (i4_top); + i4_x0 = 2; + i4_ref_xD = 1; + if(i4_left > 0) + { + i4_x0 += 8; + i4_ref_xD = 18; + } + + i4_ref_yD = i4_y0 - i4_top - (i4_top >> 1); + + i4_xD = i4_x0 - i4_ref_xD; + i4_yD = i4_y0 - i4_ref_yD; + + i4_xc0 = i4_x0 >> 1; + i4_yc0 = i4_y0 >> 1; + + i4_c_ref_xD = i4_ref_xD >> 1; + i4_c_ref_yD = i4_ref_yD >> 1; + + i4_c_xD = i4_xc0 - i4_c_ref_xD; + i4_c_yD = i4_yc0 - i4_c_ref_yD; + + if(!i1_corner_samp_avlbl_flag) + { + isvc_corner_samp_dyadic(i4_x0, i4_y0, i4_xD, i4_yD, pu1_refarray_luma, + pu1_refarray_cb, pu1_refarray_cr); + } + + /* Call the diagonal construction for the 2 rows */ + if(i4_top == i4_left) + { + /* if top * left = 1 */ + /* (x0,y0) */ + u1_filled_samp = isvc_diagonal_construct_dyadic(i4_x0, i4_y0, i4_xD, i4_yD, + pu1_refarray_luma, DYADIC_REF_W_Y); + + pu1_tmp_dst = pu1_refarray_luma + ((i4_y0 + 1) * DYADIC_REF_W_Y); + + /* (x0+1,y0), ..., (x0+7,y0) and */ + /* (x0,y0+1), ..., (x0+6,y0+1) */ + for(i4_x = i4_x0 + 1; i4_x < i4_x0 + 8; i4_x++) + { + i4_xD++; + u1_filled_samp = isvc_diagonal_construct_dyadic( + i4_x, i4_y0, i4_xD, i4_yD, pu1_refarray_luma, DYADIC_REF_W_Y); + pu1_tmp_dst[i4_x - 1] = u1_filled_samp; + } + + /* (x0+7,y0+1) */ + u1_filled_samp = isvc_diagonal_construct_dyadic( + i4_x0 + 7, i4_y0 + 1, i4_xD, i4_yD + 1, pu1_refarray_luma, DYADIC_REF_W_Y); + } + else + { + /* top * left = -1 */ + /* (x0,y0+1) */ + u1_filled_samp = isvc_diagonal_construct_dyadic(i4_x0, i4_y0 + 1, i4_xD, i4_yD + 1, + pu1_refarray_luma, DYADIC_REF_W_Y); + + pu1_tmp_dst = pu1_refarray_luma + ((i4_y0 + 1) * DYADIC_REF_W_Y); + + /* (x0,y0), ..., (x0,y0+6) and */ + /* (x0+1,y0+1), ..., (x0+1,y0+7) */ + for(i4_x = i4_x0; i4_x < i4_x0 + 7; i4_x++) + { + u1_filled_samp = isvc_diagonal_construct_dyadic( + i4_x, i4_y0, i4_xD, i4_yD, pu1_refarray_luma, DYADIC_REF_W_Y); + + pu1_tmp_dst[i4_x + 1] = u1_filled_samp; + i4_xD++; + } + + /* (x0+7,y0) */ + u1_filled_samp = isvc_diagonal_construct_dyadic(i4_x0 + 7, i4_y0, i4_xD, i4_yD, + pu1_refarray_luma, DYADIC_REF_W_Y); + } + + /* For Cb and Cr */ + for(i4_x = i4_xc0; i4_x < i4_xc0 + 4; i4_x++) + { + u1_filled_samp = isvc_diagonal_construct_dyadic(i4_x, i4_yc0, i4_c_xD, i4_c_yD, + pu1_refarray_cb, DYADIC_REF_W_C); + u1_filled_samp = isvc_diagonal_construct_dyadic(i4_x, i4_yc0, i4_c_xD, i4_c_yD, + pu1_refarray_cr, DYADIC_REF_W_C); + i4_c_xD++; + } + } + } + + if(!(u1_ny_avlblty & TOPLEFT_MASK)) + { + UWORD8 *pu1_tmp_dst1, *pu1_tmp_dst2; + UWORD8 *pu1_tmp_src1, *pu1_tmp_src2; + + if(u1_ny_avlblty & LEFT_MASK) + { + /* (mb_x+left,mb_y) available, (mb_x,mb_y+top) unavailable */ + i4_x0 = 9 + (i4_left << 3) + i4_left; + i4_y0 = 9 + (i4_top << 3) + i4_top; + i4_ref_yD = i4_y0 - i4_top - (i4_top >> 1); + + /* Copy (x0,ref_yD), (x0+1,ref_yD) to */ + /* (x0,y0), (x0+1,y0), and (x0,y0+1), (x0+1,y0+1) */ + pu1_tmp_src1 = pu1_refarray_luma + (i4_ref_yD * DYADIC_REF_W_Y); + pu1_tmp_dst1 = pu1_refarray_luma + (i4_y0 * DYADIC_REF_W_Y); + pu1_tmp_dst2 = pu1_tmp_dst1 + DYADIC_REF_W_Y; + + pu1_tmp_dst1[i4_x0] = pu1_tmp_src1[i4_x0]; + pu1_tmp_dst2[i4_x0] = pu1_tmp_src1[i4_x0]; + pu1_tmp_dst1[i4_x0 + 1] = pu1_tmp_src1[i4_x0 + 1]; + pu1_tmp_dst2[i4_x0 + 1] = pu1_tmp_src1[i4_x0 + 1]; + + /* Cb and Cr copy */ + i4_xc0 = i4_x0 >> 1; + i4_yc0 = i4_y0 >> 1; + i4_c_ref_yD = i4_ref_yD >> 1; + + pu1_tmp_src1 = pu1_refarray_cb + (i4_c_ref_yD * DYADIC_REF_W_C); + pu1_tmp_dst1 = pu1_refarray_cb + (i4_yc0 * DYADIC_REF_W_C); + pu1_tmp_src2 = pu1_refarray_cr + (i4_c_ref_yD * DYADIC_REF_W_C); + pu1_tmp_dst2 = pu1_refarray_cr + (i4_yc0 * DYADIC_REF_W_C); + + pu1_tmp_dst1[i4_xc0] = pu1_tmp_src1[i4_xc0]; + pu1_tmp_dst2[i4_xc0] = pu1_tmp_src2[i4_xc0]; + } + else if(u1_ny_avlblty & TOP_MASK) + { + /* (mb_x+left,mb_y) unavailable, + (mb_x,mb_y+top) available */ + i4_x0 = 9 + (i4_left << 3) + i4_left; + i4_y0 = 9 + (i4_top << 3) + i4_top; + i4_ref_xD = i4_x0 - i4_left - (i4_left >> 1); + + /* Copy (ref_xD,y0) to (x0,y0) and (x0+1,y0) */ + /* copy (ref_xD,y0+1) to (x0,y0+1) and (x0+1,y0+1) */ + pu1_tmp_src1 = pu1_refarray_luma + (i4_y0 * DYADIC_REF_W_Y); + pu1_tmp_dst1 = pu1_tmp_src1; + pu1_tmp_src2 = pu1_tmp_src1 + DYADIC_REF_W_Y; + pu1_tmp_dst2 = pu1_tmp_src2; + + pu1_tmp_dst1[i4_x0] = pu1_tmp_src1[i4_ref_xD]; + pu1_tmp_dst1[i4_x0 + 1] = pu1_tmp_src1[i4_ref_xD]; + pu1_tmp_dst2[i4_x0] = pu1_tmp_src2[i4_ref_xD]; + pu1_tmp_dst2[i4_x0 + 1] = pu1_tmp_src2[i4_ref_xD]; + + /* Copy Cb and Cr */ + i4_xc0 = i4_x0 >> 1; + i4_yc0 = i4_y0 >> 1; + i4_c_ref_xD = i4_ref_xD >> 1; + + pu1_tmp_src1 = pu1_refarray_cb + (i4_yc0 * DYADIC_REF_W_C); + pu1_tmp_dst1 = pu1_tmp_src1; + pu1_tmp_src2 = pu1_refarray_cr + (i4_yc0 * DYADIC_REF_W_C); + pu1_tmp_dst2 = pu1_tmp_src2; + + pu1_tmp_dst1[i4_xc0] = pu1_tmp_src1[i4_c_ref_xD]; + pu1_tmp_dst2[i4_xc0] = pu1_tmp_src2[i4_c_ref_xD]; + } + else if(u1_ny_avlblty & (TOP_MASK | LEFT_MASK)) + { + /* (mb_x+left,mb_y) available, + (mb_x,mb_y+top) available */ + WORD32 i4_xD, i4_yD; + WORD32 i4_c_xD, i4_c_yD; + + i4_y0 = 9 + (i4_top << 3) + i4_top; + i4_x0 = 9 + (i4_left << 3) + i4_left; + + i4_ref_xD = i4_x0 - i4_left - (i4_left >> 1); + i4_ref_yD = i4_y0 - i4_top - (i4_top >> 1); + + i4_xD = i4_x0 - i4_ref_xD; + i4_yD = i4_y0 - i4_ref_yD; + + i4_xc0 = i4_x0 >> 1; + i4_yc0 = i4_y0 >> 1; + + i4_c_ref_xD = i4_ref_xD >> 1; + i4_c_ref_yD = i4_ref_yD >> 1; + + i4_c_xD = i4_xc0 - i4_c_ref_xD; + i4_c_yD = i4_yc0 - i4_c_ref_yD; + + /* Call diagonal construction for luma */ + for(i4_y = i4_y0; i4_y < i4_y0 + 2; i4_y++) + { + for(i4_x = i4_x0; i4_x < i4_x0 + 2; i4_x++) + { + isvc_diagonal_construct_dyadic(i4_x, i4_y, i4_xD, i4_yD, pu1_refarray_luma, + DYADIC_REF_W_Y); + i4_xD++; + } + i4_yD++; + i4_xD -= 2; + } + + /* Call diagonal construction for chroma */ + isvc_diagonal_construct_dyadic(i4_xc0, i4_yc0, i4_c_xD, i4_c_yD, pu1_refarray_cb, + DYADIC_REF_W_C); + + isvc_diagonal_construct_dyadic(i4_xc0, i4_yc0, i4_c_xD, i4_c_yD, pu1_refarray_cr, + DYADIC_REF_W_C); + } + } +} + +/*****************************************************************************/ +/* */ +/* Function Name : isvc_get_ref_layer_mbtype */ +/* */ +/* Description : This function is used to find the mb type of the */ +/* corresponding MB in the reference layer */ +/* */ +/* Inputs : pv_intra_samp_ctxt : intra samp context */ +/* pi1_ref_mb_modes : ref mb modes buffer pointer */ +/* i4_ref_mode_stride : mb mode buffer stride */ +/* i4_x_ref : reference location X */ +/* i4_y_ref : reference location Y */ +/* pi4_mb_type : pointer to store the mb type */ +/* i4_chroma_flag : chroma flag */ +/* Globals : none */ +/* Processing : it derives the bit corresponding to reference MB and */ +/* stores the mbtype as INTRA if the bit is set */ +/* Outputs : none */ +/* Returns : none */ +/* */ +/* Issues : none */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 26 06 2009 vijayakumar creation */ +/* */ +/*****************************************************************************/ +static WORD8 isvc_get_ref_layer_mbtype(WORD8 *pi1_ref_mb_modes, WORD32 *pi4_mb_type, + WORD8 i1_curr_slice_id, WORD8 i1_cons_intr_samp_flag) +{ + WORD8 i1_intra_slice_id; + WORD8 i1_mb_mode; + + i1_mb_mode = *pi1_ref_mb_modes; + i1_mb_mode = (i1_mb_mode < 0) ? i1_mb_mode : SVC_EXTRACT_MB_MODE(*pi1_ref_mb_modes); + + if(i1_mb_mode <= SVC_INTER_MB) + { + *pi4_mb_type = SVC_INTER_MB; + i1_intra_slice_id = -1; + } + else + { + *pi4_mb_type = SVC_INTRA_MB; + i1_intra_slice_id = i1_mb_mode; + + if(1 == i1_cons_intr_samp_flag) + { + if(i1_mb_mode != i1_curr_slice_id) + { + *pi4_mb_type = SVC_INTER_MB; + } + } + } + return i1_intra_slice_id; +} + +/*****************************************************************************/ +/* */ +/* Function Name : isvc_fill_non_ava_pixel */ +/* */ +/* Description : This function does the core pixel level processing */ +/* while filling the non available pixel */ +/* */ +/* Inputs : pv_intra_samp_ctxt : intra sampling context */ +/* i4_refarray_wd : width of the reference array */ +/* i4_refarray_ht : height of the reference array */ +/* ps_mb_coord : current mb coord structure */ +/* i4_chroma_flag : chroam processing flag */ +/* Globals : none */ +/* Processing : based on the map buffer values the non available pixels */ +/* are filled using border extension algorithm */ +/* Outputs : none */ +/* Returns : none */ +/* */ +/* Issues : none */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 26 06 2009 vijayakumar creation */ +/* 07 03 2011 A.D.Almeida Optimized the filling pixels */ +/* */ +/*****************************************************************************/ +static void isvc_fill_non_avail_pixel(intra_samp_lyr_ctxt *ps_lyr_ctxt, UWORD8 *pu1_refarray_1, + UWORD8 *pu1_refarray_2, WORD32 i4_refarray_stride, + WORD32 i4_chroma_flag, UWORD8 u1_avail_map[4][4]) +{ + WORD32 i4_x, i4_y; + WORD32 i4_corner_pixel_available; + + seg_lookup_desc_t *ps_segments_x; + seg_lookup_desc_t *ps_segments_y; + seg_description_t *ps_seg_desc_x, *ps_seg_desc_y; + seg_description_t *ps_seg_x_tmp, *ps_seg_y_tmp; + UWORD8 u1_num_sgmts_x, u1_num_sgmts_y; + + WORD32 i4_x_offset; + WORD32 i4_y_offset; + WORD32 i4_refmb_wd; + WORD32 i4_refmb_ht; + WORD32 i4_xr_index, i4_yr_index; + WORD32 i4_j, i4_i; + WORD32 i4_cur_x; + UWORD32 u4_lookup_4bit, u4_lookup_5bit, u4_4thbit; + WORD32 i4_pad_size; + + WORD32 i4_x_min; + WORD32 i4_y_min; + WORD32 i4_x_start_pos, i4_y_start_pos; + + UWORD8 *pu1_ref_idx_x, *pu1_ref_idx_y; + + PF_INTRA_SAMP_PADDING *pf_intra_samp_padding; + PF_INTRA_SAMP_PADDING **pf_intra_samp_lookup; + + i4_x_offset = ps_lyr_ctxt->ps_offsets->i4_abscissa; + i4_y_offset = ps_lyr_ctxt->ps_offsets->i4_ordinate; + + i4_refmb_wd = (MB_SIZE >> i4_chroma_flag) - 1; + i4_refmb_ht = (MB_SIZE >> i4_chroma_flag) - 1; + + if(0 == i4_chroma_flag) + { + pf_intra_samp_lookup = gpf_lookup_fxns_luma; + } + else + { + pf_intra_samp_lookup = gpf_lookup_fxns_chroma; + } + + i4_x_min = ps_lyr_ctxt->i2_x_min_pos; + i4_y_min = ps_lyr_ctxt->i2_y_min_pos; + + i4_pad_size = 2 >> i4_chroma_flag; + i4_x_start_pos = (i4_x_min - i4_pad_size); + i4_y_start_pos = (i4_y_min - i4_pad_size); + + i4_xr_index = (i4_x_start_pos + i4_x_offset) & i4_refmb_wd; + i4_yr_index = (i4_y_start_pos + i4_y_offset) & i4_refmb_ht; + + ps_segments_x = (ps_lyr_ctxt->as_seg_lookup_horz + i4_xr_index); + ps_segments_y = (ps_lyr_ctxt->as_seg_lookup_vert + i4_yr_index); + + u1_num_sgmts_x = ps_segments_x->u1_num_segments; + u1_num_sgmts_y = ps_segments_y->u1_num_segments; + + ps_seg_desc_x = ps_segments_x->s_segments; + ps_seg_desc_y = ps_segments_y->s_segments; + + pu1_ref_idx_x = &(ps_lyr_ctxt->au1_refarray_x_idx[0]); + pu1_ref_idx_y = &(ps_lyr_ctxt->au1_refarray_y_idx[0]); + + i4_cur_x = pu1_ref_idx_x[i4_x_start_pos]; + + u4_4thbit = ps_segments_x->u4_start_pos; + + for(i4_j = 0; i4_j < u1_num_sgmts_y; i4_j++) + { + UWORD8 i4_idx_a, i4_idx_b; + UWORD8 u1_seg_ht, u1_seg_wd; + UWORD8 u1_mb_adjoin_x, u1_mb_adjoin_y; + WORD8 i1_nearst_mb_bdry_x, i1_nearst_mb_bdry_y; + UWORD32 u4_num_valid_segs; + WORD32 i4_idx_a_plus_ny, i4_idx_b_plus_nx, i4_index; + WORD8 i1_yd_index, i1_xd_index; + + ps_seg_y_tmp = &ps_seg_desc_y[i4_j]; + + i4_y = i4_y_start_pos + ps_seg_y_tmp->u1_seg_off; + u1_seg_ht = ps_seg_y_tmp->u1_seg_dim; + i1_yd_index = ps_seg_y_tmp->i1_dist_idx; + i1_nearst_mb_bdry_y = ps_seg_y_tmp->i1_nearst_mb_bdry; + u1_mb_adjoin_y = ps_seg_y_tmp->u1_mb_adjoin; + + i4_idx_a = pu1_ref_idx_y[i4_y]; + + i4_idx_a_plus_ny = (i4_idx_a + i1_nearst_mb_bdry_y); + + /* Pack the availabilities of the next three horizontal MBs in 3bit + format and 4th bit indicating if the start position is greater + than the mb_width/2 + */ + u4_lookup_4bit = u4_4thbit | u1_avail_map[i4_idx_a][i4_cur_x + 2] << 2 | + u1_avail_map[i4_idx_a][i4_cur_x + 1] << 1 | + u1_avail_map[i4_idx_a][i4_cur_x]; + + u4_num_valid_segs = gu4_valid_segs_lookup[u4_lookup_4bit]; + + i4_i = CLZ(~u4_num_valid_segs); + u4_num_valid_segs <<= (i4_i + 1); + + for(; i4_i < u1_num_sgmts_x; i4_i++) + { + ps_seg_x_tmp = &ps_seg_desc_x[i4_i]; + + i4_x = i4_x_start_pos + ps_seg_x_tmp->u1_seg_off; + i4_idx_b = pu1_ref_idx_x[i4_x]; + + u1_seg_wd = ps_seg_x_tmp->u1_seg_dim; + i1_xd_index = ps_seg_x_tmp->i1_dist_idx; + i1_nearst_mb_bdry_x = ps_seg_x_tmp->i1_nearst_mb_bdry; + u1_mb_adjoin_x = ps_seg_x_tmp->u1_mb_adjoin; + + i4_idx_b_plus_nx = (i4_idx_b + i1_nearst_mb_bdry_x); + + /* Find the avalability of (x,y-Yd),(x-Xd,y),(x-Xd,y-Yd) and pack + it to 3 bits. + */ + u4_lookup_5bit = u1_avail_map[i4_idx_a_plus_ny][i4_idx_b_plus_nx] << 2 | + u1_avail_map[i4_idx_a_plus_ny][i4_idx_b] << 1 | + u1_avail_map[i4_idx_a][i4_idx_b_plus_nx] | u1_mb_adjoin_x | + u1_mb_adjoin_y; + + i4_corner_pixel_available = u1_avail_map[i4_idx_a_plus_ny][i4_idx_b_plus_nx]; + + /* Use a function pointer table based on lookup to compute + Left,Top,Bottom,Right,Diagonal padding. + */ + pf_intra_samp_padding = pf_intra_samp_lookup[u4_lookup_5bit]; + + if(pf_intra_samp_padding != NULL) + { + pf_intra_samp_padding(i4_x, i4_y, i1_xd_index, i1_yd_index, u1_seg_wd, u1_seg_ht, + pu1_refarray_1, pu1_refarray_2, i4_refarray_stride, + u1_mb_adjoin_x, u1_mb_adjoin_y, i4_corner_pixel_available); + } + + /* increment to the next unavailable segment */ + i4_index = CLZ(~u4_num_valid_segs); + u4_num_valid_segs <<= (i4_index + 1); + i4_i += i4_index; + } + } +} + +/*****************************************************************************/ +/* */ +/* Function Name : isvc_intra_resamp_generate_segment_lookup */ +/* */ +/* Description : This function generates segment lookup used to derive */ +/* segments which have to be be intra resampled */ +/* */ +/* Inputs : pv_lookup_table : look up table */ +/* i4_dimension : dimension of the block which is used in*/ +/* resampling process. */ +/* i4_mb_size : size of the mb */ +/* Globals : None */ +/* Processing : This function generates segment lookup used to derive */ +/* segments which have to be be intra resampled */ +/* Outputs : none */ +/* Returns : none */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 03 03 2011 A.D.Almeida Creation */ +/* */ +void isvc_intra_resamp_generate_segment_lookup(seg_lookup_desc_t *ps_seg_lookup_table, + WORD32 i4_dimension, WORD32 i4_mb_size, + WORD32 i4_shift_val) +{ + WORD32 i4_x; + WORD32 i4_position, i4_dist_prev_mb, i4_dist_next_mb; + UWORD8 u1_seg_dim; + UWORD8 u1_num_sgmts; + WORD32 i4_block_size = i4_mb_size >> 1; + UWORD8 u1_offset = 0; + seg_lookup_desc_t *ps_segments; + seg_description_t *ps_seg_desc; + + memset(ps_seg_lookup_table, 0, i4_mb_size * sizeof(seg_lookup_desc_t)); + + for(i4_x = 0; i4_x < i4_mb_size; i4_x++) + { + ps_segments = &ps_seg_lookup_table[i4_x]; + ps_seg_desc = ps_segments->s_segments; + i4_position = i4_x; + + if(i4_x >= i4_block_size) + { + /* set the fourth bit so that later it can be directly OR ed */ + ps_segments->u4_start_pos = 8; + } + else + { + ps_segments->u4_start_pos = 0; + } + + u1_num_sgmts = 0; + u1_offset = 0; + + while(i4_position < (i4_x + i4_dimension)) + { + /* check and fill the nearest mb boundry flag */ + if((i4_position & (i4_mb_size - 1)) < i4_block_size) + { + ps_seg_desc->i1_nearst_mb_bdry = -1; + } + else + { + ps_seg_desc->i1_nearst_mb_bdry = 1; + } + + /* find the distance from the previous MB for start of segment*/ + i4_dist_prev_mb = (i4_position & (i4_mb_size - 1)); + + ps_seg_desc->i1_dist_idx = + ((i4_dist_prev_mb >= i4_mb_size >> 1) ? (i4_mb_size - i4_dist_prev_mb) + : -(i4_dist_prev_mb + 1)); + + /* find the size of the segment */ + u1_seg_dim = (i4_block_size - (i4_position & (i4_block_size - 1))); + i4_position += u1_seg_dim; + if(i4_position > (i4_x + i4_dimension)) + { + i4_position = (i4_x + i4_dimension); + u1_seg_dim = (i4_position & (i4_block_size - 1)); + } + + /* find the distance from the next MB for end of segment */ + i4_dist_next_mb = (i4_position & (i4_mb_size - 1)); + + ps_seg_desc->u1_seg_dim = u1_seg_dim; + ps_seg_desc->u1_seg_off = u1_offset; + + /* check if the segment has a adjoining MB edge */ + if(i4_dist_prev_mb == 0) + { + if(0 == u1_num_sgmts) + { + ps_seg_desc->u1_mb_adjoin = 0; + } + else + { + ps_seg_desc->u1_mb_adjoin = 1 << i4_shift_val; + } + } + else if(i4_dist_next_mb == 0) + { + if(i4_position == (i4_x + i4_dimension)) + { + ps_seg_desc->u1_mb_adjoin = 0; + } + else + { + ps_seg_desc->u1_mb_adjoin = 1 << i4_shift_val; + } + } + else + { + ps_seg_desc->u1_mb_adjoin = 0; + } + + u1_offset += u1_seg_dim; + u1_num_sgmts++; + ps_seg_desc++; + } + + /* fill the number of segments for this position */ + ps_segments->u1_num_segments = u1_num_sgmts; + } +} + +static void isvc_reflayer_construction(void *pv_intra_samp_ctxt, UWORD8 *pu1_inp_1, + WORD32 i4_inp_stride, WORD32 i4_refarray_stride, + mem_element_t *ps_ref_mb_mode_map, WORD32 i4_chroma_flag) +{ + WORD32 i4_x, i4_y; + + intra_sampling_ctxt_t *ps_ctxt; + intra_samp_lyr_ctxt *ps_lyr_ctxt; + WORD8 *pi1_ref_mb_modes, *pi1_ref_mb_modes_bkp_1; + WORD32 i4_ref_mode_stride; + WORD32 i4_element_size; + WORD32 i4_dummy; + WORD32 i4_mb_ht, i4_mb_wd; + + /* 4x4 mb grid buffer to store the mb availablity */ + UWORD8 u1_map_buf[BLK_SIZE][BLK_SIZE]; + WORD32 i4_ref_wd; + WORD32 i4_ref_ht; + WORD32 i4_x_offset; + WORD32 i4_y_offset; + WORD32 i4_refarray_wd; + WORD32 i4_refarray_ht; + WORD32 i4_mb_type; + WORD8 i1_cons_intr_samp_flag; + WORD8 i1_slice_id = 0; + WORD32 i4_mb_wd_sft, i4_mb_ht_sft; + + WORD32 i4_unfill_check; + UWORD8 *pu1_refarray_1, *pu1_refarray_2; + + UNUSED(i4_dummy); + memset(&u1_map_buf[0][0], 0, 16); + + ps_ctxt = (intra_sampling_ctxt_t *) pv_intra_samp_ctxt; + ps_lyr_ctxt = &ps_ctxt->as_res_lyrs[ps_ctxt->i4_res_lyr_id]; + pi1_ref_mb_modes = (WORD8 *) ps_ref_mb_mode_map->pv_buffer; + i4_ref_mode_stride = ps_ref_mb_mode_map->i4_num_element_stride; + i4_element_size = ps_ref_mb_mode_map->i4_element_size; + + /* get the condtrained intra sampling flag */ + i1_cons_intr_samp_flag = ps_lyr_ctxt->i1_constrained_intra_rsmpl_flag; + + ASSERT(NULL != pi1_ref_mb_modes); + + { + WORD32 i4_base_width = ps_lyr_ctxt->i4_ref_width; + WORD32 i4_base_height = ps_lyr_ctxt->i4_ref_height; + + i4_ref_wd = i4_base_width >> i4_chroma_flag; + i4_ref_ht = i4_base_height >> i4_chroma_flag; + + i4_mb_wd_sft = (MB_WIDTH_SHIFT - i4_chroma_flag); + i4_mb_ht_sft = (MB_HEIGHT_SHIFT - i4_chroma_flag); + } + + i4_x_offset = ps_lyr_ctxt->ps_offsets->i4_abscissa; + i4_y_offset = ps_lyr_ctxt->ps_offsets->i4_ordinate; + i4_refarray_wd = ps_lyr_ctxt->ps_ref_array_dims->i4_abscissa; + i4_refarray_ht = ps_lyr_ctxt->ps_ref_array_dims->i4_ordinate; + + i4_mb_wd = (MB_SIZE >> i4_chroma_flag); + i4_mb_ht = (MB_SIZE >> i4_chroma_flag); + + if(1 == i1_cons_intr_samp_flag) + { + WORD32 i4_x_min, i4_x_max; + WORD32 i4_y_min, i4_y_max; + + i4_x_min = ps_lyr_ctxt->i2_x_min_pos; + i4_x_max = ps_lyr_ctxt->i2_x_max_pos; + i4_y_min = ps_lyr_ctxt->i2_y_min_pos; + i4_y_max = ps_lyr_ctxt->i2_y_max_pos; + + i4_mb_type = SVC_INTER_MB; + { + WORD32 i4_x_ref; + WORD32 i4_y_ref; + WORD32 i4_mb_x, i4_mb_y; + + /* derive local varaibles */ + i4_y_ref = (i4_y_min + 1) + i4_y_offset; + i4_x_ref = (i4_x_min + 1) + i4_x_offset; + + i4_mb_x = (i4_x_ref >> i4_mb_wd_sft); + i4_mb_y = (i4_y_ref >> i4_mb_ht_sft); + + pi1_ref_mb_modes = (WORD8 *) ps_ref_mb_mode_map->pv_buffer; + + /* get the location of the byte which has the current mb mode */ + pi1_ref_mb_modes += (i4_mb_y * i4_ref_mode_stride * i4_element_size); + pi1_ref_mb_modes += (i4_mb_x * i4_element_size); + } + + for(i4_y = (i4_y_min + 1); i4_y <= (i4_y_max - 1);) + { + WORD32 i4_x_ref; + WORD32 i4_y_ref; + WORD32 i4_distleftX, i4_rangeX; + WORD32 i4_disttopY, i4_rangeY; + + i4_y_ref = (i4_y + i4_y_offset); + i4_disttopY = (i4_y_ref) & (i4_mb_ht - 1); + i4_rangeY = (i4_mb_ht - i4_disttopY); + + pi1_ref_mb_modes_bkp_1 = pi1_ref_mb_modes; + + for(i4_x = (i4_x_min + 1); i4_x <= (i4_x_max - 1);) + { + i4_x_ref = (i4_x + i4_x_offset); + i4_distleftX = (i4_x_ref) & (i4_mb_wd - 1); + i4_rangeX = (i4_mb_wd - i4_distleftX); + + /* get the referecne layer mb type */ + i1_slice_id = + isvc_get_ref_layer_mbtype(pi1_ref_mb_modes_bkp_1, &i4_mb_type, i1_slice_id, 0); + if(SVC_INTRA_MB == i4_mb_type) + { + break; + } + i4_x += i4_rangeX; + pi1_ref_mb_modes_bkp_1 += i4_element_size; + } + + if(SVC_INTRA_MB == i4_mb_type) + { + break; + } + + i4_y += i4_rangeY; + pi1_ref_mb_modes += (i4_ref_mode_stride * i4_element_size); + } + } + else + { + i1_slice_id = -1; + } + + i4_unfill_check = 0; + + /* --------------------------------------------------------------------- */ + /* Copying the data from recon buffer to refSample Array. + */ + /* NOTE: The copying of the data from recon buffer to refSample Array */ + /* can be optimized by bring in data at N-MB level,thus taking */ + /* advantage of the overlapping data which now gets copied every + * MB*/ + /* --------------------------------------------------------------------- */ + { + WORD32 i4_x_ref_start, i4_x_ref_end; + WORD32 i4_y_ref_start, i4_y_ref_end; + WORD32 i4_rangeW, i4_rangeH; + WORD32 i4_offset; + UWORD8 *pu1_src, *pu1_dst; + UWORD8 *pu1_dst1, *pu1_dst2; + + /* Copy (refW x refH) dimension into reference sample array */ + i4_x_ref_start = MAX(0, MIN((i4_ref_wd - 1), i4_x_offset)); + i4_x_ref_end = MAX(0, MIN((i4_ref_wd - 1), (i4_refarray_wd - 1) + i4_x_offset)); + i4_y_ref_start = MAX(0, MIN((i4_ref_ht - 1), i4_y_offset)); + i4_y_ref_end = MAX(0, MIN((i4_ref_ht - 1), (i4_refarray_ht - 1) + i4_y_offset)); + + /* find the actual data to be copied */ + i4_rangeW = (i4_x_ref_end - i4_x_ref_start + 1); + i4_rangeH = (i4_y_ref_end - i4_y_ref_start + 1); + + /* get the reconbuffer pointer and ref sample array pointer */ + i4_offset = + (i4_x_ref_start - i4_x_offset) + ((i4_y_ref_start - i4_y_offset) * i4_refarray_stride); + + if(0 == i4_chroma_flag) + { + pu1_refarray_1 = ps_ctxt->pu1_refarray_buffer; + pu1_refarray_2 = NULL; + + pu1_src = pu1_inp_1; + pu1_dst = pu1_refarray_1 + i4_offset; + + /* Copy luma data into refsample array */ + isvc_copy_data(pu1_src, i4_inp_stride, pu1_dst, i4_refarray_stride, i4_rangeW, + i4_rangeH); + } + else + { + pu1_refarray_1 = ps_ctxt->pu1_refarray_buffer; + pu1_refarray_2 = ps_ctxt->pu1_refarray_cb; + + pu1_src = pu1_inp_1; + pu1_dst1 = pu1_refarray_1 + i4_offset; + + pu1_dst2 = pu1_refarray_2 + i4_offset; + + isvc_copy_data_semiplanr(pu1_src, i4_inp_stride, pu1_dst1, pu1_dst2, i4_refarray_stride, + i4_rangeW, i4_rangeH); + } + } + { + WORD32 i4_i, i4_j; + UWORD8 *pu1_ref_idx_x, *pu1_ref_idx_y; + + WORD32 i4_x_ref; + WORD32 i4_y_ref; + WORD32 i4_mb_x, i4_mb_y; + + i4_y_ref = i4_y_offset; + i4_x_ref = i4_x_offset; + + i4_mb_x = (i4_x_ref >> i4_mb_wd_sft); + i4_mb_y = (i4_y_ref >> i4_mb_ht_sft); + + pi1_ref_mb_modes = (WORD8 *) ps_ref_mb_mode_map->pv_buffer; + + pi1_ref_mb_modes += (i4_mb_y * i4_ref_mode_stride * i4_element_size); + pi1_ref_mb_modes += (i4_mb_x * i4_element_size); + + pu1_ref_idx_x = &(ps_lyr_ctxt->au1_refarray_x_idx[0]); + pu1_ref_idx_y = &(ps_lyr_ctxt->au1_refarray_y_idx[0]); + + i4_j = 0; + for(i4_y = 0; i4_y < i4_refarray_ht;) + { + WORD32 i4_x_ref; + WORD32 i4_y_ref; + WORD32 i4_distleftX, i4_rangeX; + WORD32 i4_disttopY, i4_rangeY; + + i4_y_ref = i4_y + i4_y_offset; + i4_disttopY = (i4_y_ref) & (i4_mb_ht - 1); + i4_rangeY = (i4_mb_ht - i4_disttopY); + + memset(pu1_ref_idx_y, i4_j, i4_rangeY); + pu1_ref_idx_y += i4_rangeY; + + i4_i = 0; + pi1_ref_mb_modes_bkp_1 = pi1_ref_mb_modes; + for(i4_x = 0; i4_x < i4_refarray_wd;) + { + i4_x_ref = i4_x + i4_x_offset; + i4_distleftX = (i4_x_ref) & (i4_mb_wd - 1); + i4_rangeX = (i4_mb_wd - i4_distleftX); + + if(0 == i4_j) + { + memset(pu1_ref_idx_x, i4_i, i4_rangeX); + pu1_ref_idx_x += i4_rangeX; + } + + isvc_get_ref_layer_mbtype(pi1_ref_mb_modes_bkp_1, &i4_mb_type, i1_slice_id, + i1_cons_intr_samp_flag); + + if(SVC_INTRA_MB == i4_mb_type) + { + u1_map_buf[i4_j][i4_i] = 1; + i4_dummy = 1; + } + else + { + i4_unfill_check = 1; + } + + i4_x = i4_x + i4_rangeX; + i4_i++; + pi1_ref_mb_modes_bkp_1 += i4_element_size; + } + i4_j++; + i4_y = i4_y + i4_rangeY; + pi1_ref_mb_modes += (i4_ref_mode_stride * i4_element_size); + } + ASSERT(1 == i4_dummy); + } + + if(i4_unfill_check == 1) + { + isvc_fill_non_avail_pixel(ps_lyr_ctxt, pu1_refarray_1, pu1_refarray_2, i4_refarray_stride, + i4_chroma_flag, u1_map_buf); + } +} + +/*****************************************************************************/ +/* */ +/* Function Name : isvc_interpolate_base_luma_dyadic */ +/* */ +/* Description : This function takes the reference array buffer & performs*/ +/* intra resampling for dyadic scaling ratios */ +/* Inputs : pu1_inp_buf : ptr to the 12x12 reference sample buffer */ +/* pi2_tmp_filt_buf : ptr to the 12x16 buffer to hold the */ +/* vertically interpolated data */ +/* pu1_out_buf : output buffer pointer */ +/* i4_out_stride : output buffer stride */ +/* Globals : none */ +/* Processing : it does the interpolation in vertical direction followed */ +/* by horizontal direction */ +/* Outputs : resampled pixels */ +/* Returns : none */ +/* */ +/* Issues : none */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 03 12 2010 Nithya creation */ +/* */ +/*****************************************************************************/ +void isvc_interpolate_base_luma_dyadic(UWORD8 *pu1_inp_buf, WORD16 *pi2_tmp_filt_buf, + UWORD8 *pu1_out_buf, WORD32 i4_out_stride) +{ + WORD32 i4_x, i4_y; + WORD32 i4_coeff_0, i4_coeff_1, i4_coeff_2, i4_coeff_3; + WORD32 i4_samp_0, i4_samp_1, i4_samp_2, i4_samp_3; + WORD32 i4_rslt_1, i4_rslt_2; + WORD32 i4_filt_stride, i4_src_stride; + UWORD8 *pu1_inp, *pu1_out; + WORD16 *pi2_tmp; + + /* Filter coefficient values for phase 4 */ + i4_coeff_0 = -3; + i4_coeff_1 = 28; + i4_coeff_2 = 8; + i4_coeff_3 = -1; + + i4_filt_stride = 12; + i4_src_stride = DYADIC_REF_W_Y; + + pu1_inp = pu1_inp_buf; + pi2_tmp = pi2_tmp_filt_buf; + pu1_out = pu1_out_buf; + + /* Vertical interpolation */ + for(i4_x = 0; i4_x < 12; i4_x++) + { + /* y = 0, y_phase = 12 */ + i4_samp_0 = pu1_inp[i4_x]; + pu1_inp += i4_src_stride; + i4_samp_1 = pu1_inp[i4_x]; + pu1_inp += i4_src_stride; + i4_samp_2 = pu1_inp[i4_x]; + pu1_inp += i4_src_stride; + i4_samp_3 = pu1_inp[i4_x]; + pu1_inp += i4_src_stride; + + /* since y_phase 12 for y = 0 */ + i4_rslt_1 = i4_samp_0 * i4_coeff_3; + i4_rslt_1 += i4_samp_1 * i4_coeff_2; + i4_rslt_1 += i4_samp_2 * i4_coeff_1; + i4_rslt_1 += i4_samp_3 * i4_coeff_0; + + pi2_tmp[i4_x] = i4_rslt_1; + pi2_tmp += i4_filt_stride; + + for(i4_y = 1; i4_y < 15; i4_y += 2) + { + i4_samp_0 = i4_samp_1; + i4_samp_1 = i4_samp_2; + i4_samp_2 = i4_samp_3; + i4_samp_3 = pu1_inp[i4_x]; + + /* y_phase is 4 for odd values of y */ + /* and 12 for even values of y */ + i4_rslt_1 = i4_samp_0 * i4_coeff_0; + i4_rslt_1 += i4_samp_1 * i4_coeff_1; + i4_rslt_1 += i4_samp_2 * i4_coeff_2; + i4_rslt_1 += i4_samp_3 * i4_coeff_3; + + i4_rslt_2 = i4_samp_0 * i4_coeff_3; + i4_rslt_2 += i4_samp_1 * i4_coeff_2; + i4_rslt_2 += i4_samp_2 * i4_coeff_1; + i4_rslt_2 += i4_samp_3 * i4_coeff_0; + + /* Storing the results */ + pi2_tmp[i4_x] = i4_rslt_1; + pi2_tmp += i4_filt_stride; + pi2_tmp[i4_x] = i4_rslt_2; + + /* Incrementing the pointers */ + pi2_tmp += i4_filt_stride; + pu1_inp += i4_src_stride; + } + + /* y = 15, y_phase = 4 */ + i4_samp_0 = i4_samp_1; + i4_samp_1 = i4_samp_2; + i4_samp_2 = i4_samp_3; + i4_samp_3 = pu1_inp[i4_x]; + + i4_rslt_1 = i4_samp_0 * i4_coeff_0; + i4_rslt_1 += i4_samp_1 * i4_coeff_1; + i4_rslt_1 += i4_samp_2 * i4_coeff_2; + i4_rslt_1 += i4_samp_3 * i4_coeff_3; + + pi2_tmp[i4_x] = i4_rslt_1; + pu1_inp = pu1_inp_buf; + pi2_tmp = pi2_tmp_filt_buf; + } + + /* Horizontal interpolation */ + for(i4_y = 0; i4_y < 16; i4_y++) + { + /* x = 0, x_phase = 12 */ + i4_samp_0 = *pi2_tmp++; + i4_samp_1 = *pi2_tmp++; + i4_samp_2 = *pi2_tmp++; + i4_samp_3 = *pi2_tmp++; + + /* since x_phase 12 for x = 0 */ + i4_rslt_1 = i4_samp_0 * i4_coeff_3; + i4_rslt_1 += i4_samp_1 * i4_coeff_2; + i4_rslt_1 += i4_samp_2 * i4_coeff_1; + i4_rslt_1 += i4_samp_3 * i4_coeff_0; + i4_rslt_1 += 512; + + i4_rslt_1 >>= 10; + pu1_out[0] = CLIPUCHAR(i4_rslt_1); + + for(i4_x = 1; i4_x < 15; i4_x += 2) + { + i4_samp_0 = i4_samp_1; + i4_samp_1 = i4_samp_2; + i4_samp_2 = i4_samp_3; + i4_samp_3 = *pi2_tmp++; + + /* x_phase is 4 for odd values of x */ + /* and 12 for even values of x */ + i4_rslt_1 = i4_samp_0 * i4_coeff_0; + i4_rslt_1 += i4_samp_1 * i4_coeff_1; + i4_rslt_1 += i4_samp_2 * i4_coeff_2; + i4_rslt_1 += i4_samp_3 * i4_coeff_3; + i4_rslt_1 += 512; + + i4_rslt_2 = i4_samp_0 * i4_coeff_3; + i4_rslt_2 += i4_samp_1 * i4_coeff_2; + i4_rslt_2 += i4_samp_2 * i4_coeff_1; + i4_rslt_2 += i4_samp_3 * i4_coeff_0; + i4_rslt_2 += 512; + + i4_rslt_1 >>= 10; + i4_rslt_2 >>= 10; + + pu1_out[i4_x] = CLIPUCHAR(i4_rslt_1); + pu1_out[i4_x + 1] = CLIPUCHAR(i4_rslt_2); + } + + /* x = 15 */ + i4_samp_0 = i4_samp_1; + i4_samp_1 = i4_samp_2; + i4_samp_2 = i4_samp_3; + i4_samp_3 = *pi2_tmp++; + + i4_rslt_1 = i4_samp_0 * i4_coeff_0; + i4_rslt_1 += i4_samp_1 * i4_coeff_1; + i4_rslt_1 += i4_samp_2 * i4_coeff_2; + i4_rslt_1 += i4_samp_3 * i4_coeff_3; + i4_rslt_1 += 512; + + i4_rslt_1 >>= 10; + pu1_out[i4_x] = CLIPUCHAR(i4_rslt_1); + pu1_out += i4_out_stride; + } +} + +/*****************************************************************************/ +/* */ +/* Function Name : isvc_vert_interpol_chroma_dyadic */ +/* */ +/* Description : This function takes the reference array buffer & performs*/ +/* vertical intra resampling for dyadic scaling ratios for */ +/* chroma for the following ref_lyr_chroma_phase_y_plus1 and*/ +/* chroma_phase_y_plus1: */ +/* ref_lyr cur_lyr */ +/* 0 0 */ +/* 1 0 */ +/* 1 1 */ +/* 1 2 */ +/* 2 1 */ +/* 2 2 */ +/* Inputs : pu1_inp_buf : ptr to the 6x6 reference sample buffer */ +/* pi2_tmp_filt_buf : ptr to the 6x8 buffer to hold the */ +/* vertically interpolated data */ +/* i4_phase_0 : y phase for even values of y */ +/* i4_phase_1 : y phase for odd values of y */ +/* Globals : none */ +/* Processing : it does the interpolation in vertical direction */ +/* Outputs : vertically resampled samples */ +/* Returns : none */ +/* */ +/* Issues : none */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 06 12 2010 Nithya creation */ +/* */ +/*****************************************************************************/ +void isvc_vert_interpol_chroma_dyadic(UWORD8 *pu1_inp_buf, WORD16 *pi2_tmp_filt_buf, + WORD32 i4_phase_0, WORD32 i4_phase_1) +{ + WORD32 i4_x, i4_y; + WORD32 i4_coeff_0, i4_coeff_1, i4_coeff_2, i4_coeff_3; + WORD32 i4_samp_0, i4_samp_1; + WORD32 i4_rslt_1, i4_rslt_2; + WORD32 i4_filt_stride, i4_src_stride; + UWORD8 *pu1_inp; + WORD16 *pi2_tmp; + + i4_coeff_0 = 16 - i4_phase_0; + i4_coeff_1 = i4_phase_0; + i4_coeff_2 = 16 - i4_phase_1; + i4_coeff_3 = i4_phase_1; + + pu1_inp = pu1_inp_buf; + pi2_tmp = pi2_tmp_filt_buf; + i4_filt_stride = 6; + i4_src_stride = DYADIC_REF_W_C; + + /* Vertical interpolation */ + for(i4_x = 0; i4_x < 6; i4_x++) + { + /* y = 0, y_phase = phase_0 */ + i4_samp_0 = pu1_inp[i4_x]; + pu1_inp += i4_src_stride; + i4_samp_1 = pu1_inp[i4_x]; + pu1_inp += i4_src_stride; + + /* since y_phase = phase_0 for y = 0 */ + i4_rslt_1 = i4_samp_0 * i4_coeff_0; + i4_rslt_1 += i4_samp_1 * i4_coeff_1; + + pi2_tmp[i4_x] = i4_rslt_1; + pi2_tmp += i4_filt_stride; + + for(i4_y = 1; i4_y < 7; i4_y += 2) + { + i4_samp_0 = i4_samp_1; + i4_samp_1 = pu1_inp[i4_x]; + + /* y_phase is phase_1 for odd values of y */ + /* and phase_0 for even values of y */ + i4_rslt_1 = i4_samp_0 * i4_coeff_2; + i4_rslt_1 += i4_samp_1 * i4_coeff_3; + + i4_rslt_2 = i4_samp_0 * i4_coeff_0; + i4_rslt_2 += i4_samp_1 * i4_coeff_1; + + pi2_tmp[i4_x] = i4_rslt_1; + pi2_tmp += i4_filt_stride; + pi2_tmp[i4_x] = i4_rslt_2; + pi2_tmp += i4_filt_stride; + pu1_inp += i4_src_stride; + } + + /* y = 7, y_phase = phase_1 */ + i4_samp_0 = i4_samp_1; + i4_samp_1 = pu1_inp[i4_x]; + + i4_rslt_1 = i4_samp_0 * i4_coeff_2; + i4_rslt_1 += i4_samp_1 * i4_coeff_3; + + pi2_tmp[i4_x] = i4_rslt_1; + + pu1_inp = pu1_inp_buf; + pi2_tmp = pi2_tmp_filt_buf; + } +} + +/*****************************************************************************/ +/* */ +/* Function Name : isvc_horz_interpol_chroma_dyadic */ +/* */ +/* Description : This function takes the reference array buffer & performs*/ +/* horizontal intra resampling for dyadic scaling ratios for*/ +/* chroma with following ref_lyr_chroma_phase_x_plus1_flag */ +/* and chroma_phase_x_plus1_flag: */ +/* ref_lyr cur_lyr */ +/* 0 0 */ +/* 1 0 */ +/* 1 1 */ +/* Inputs : pi2_tmp_filt_buf : ptr to the 6x8 buffer containing the */ +/* vertically interpolated data */ +/* pu1_out_buf : pointer to the output buffer */ +/* i4_out_stride : output buffer stride */ +/* i4_phase_0 : x phase for even values of x */ +/* i4_phase_1 : x phase for odd values of x */ +/* Globals : none */ +/* Processing : it does the interpolation in vertical direction */ +/* Outputs : resampled samples */ +/* Returns : none */ +/* */ +/* Issues : none */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 06 12 2010 Nithya creation */ +/* */ +/*****************************************************************************/ +void isvc_horz_interpol_chroma_dyadic(WORD16 *pi2_tmp_filt_buf, UWORD8 *pu1_out_buf, + WORD32 i4_out_stride, WORD32 i4_phase_0, WORD32 i4_phase_1) +{ + WORD32 i4_x, i4_y; + WORD32 i4_coeff_0, i4_coeff_1, i4_coeff_2, i4_coeff_3; + WORD32 i4_samp_0, i4_samp_1; + WORD32 i4_rslt_1, i4_rslt_2; + WORD32 i4_dst_stride; + UWORD8 *pu1_out; + WORD16 *pi2_tmp; + + i4_coeff_0 = 16 - i4_phase_0; + i4_coeff_1 = i4_phase_0; + i4_coeff_2 = 16 - i4_phase_1; + i4_coeff_3 = i4_phase_1; + + pu1_out = pu1_out_buf; + pi2_tmp = pi2_tmp_filt_buf; + i4_dst_stride = i4_out_stride; + + /* Horizontal interpolation */ + for(i4_y = 0; i4_y < 8; i4_y++) + { + /* x = 0, x_phase = phase_0 */ + i4_samp_0 = *pi2_tmp++; + i4_samp_1 = *pi2_tmp++; + + /* since x_phase = phase_0 for x = 0 */ + i4_rslt_1 = i4_samp_0 * i4_coeff_0; + i4_rslt_1 += i4_samp_1 * i4_coeff_1; + + /* Round to 8-bit value */ + i4_rslt_1 += 128; + i4_rslt_1 >>= 8; + + pu1_out[0] = i4_rslt_1; + + for(i4_x = 1; i4_x < 7; i4_x += 2) + { + i4_samp_0 = i4_samp_1; + i4_samp_1 = *pi2_tmp++; + + /* x_phase is phase_1 for odd values of x */ + /* and phase_0 for even values of x */ + i4_rslt_1 = i4_samp_0 * i4_coeff_2; + i4_rslt_1 += i4_samp_1 * i4_coeff_3; + + i4_rslt_2 = i4_samp_0 * i4_coeff_0; + i4_rslt_2 += i4_samp_1 * i4_coeff_1; + + /* Rounding to 8-bit values */ + i4_rslt_1 += 128; + i4_rslt_1 >>= 8; + i4_rslt_2 += 128; + i4_rslt_2 >>= 8; + + pu1_out[2 * i4_x] = i4_rslt_1; + pu1_out[2 * (i4_x + 1)] = i4_rslt_2; + } + + /* y = 7, y_phase = phase_1 */ + i4_samp_0 = i4_samp_1; + i4_samp_1 = *pi2_tmp++; + + /* since x_phase = phase_1 for x = 7 */ + i4_rslt_1 = i4_samp_0 * i4_coeff_2; + i4_rslt_1 += i4_samp_1 * i4_coeff_3; + + /* Round to 8-bit value */ + i4_rslt_1 += 128; + i4_rslt_1 >>= 8; + + pu1_out[2 * 7] = i4_rslt_1; + pu1_out += i4_dst_stride; + } +} + +static void isvc_interpolate_intra_base(void *pv_intra_samp_ctxt, UWORD8 *pu1_out, + WORD32 i4_out_stride, WORD32 i4_refarray_wd, + WORD32 i4_chroma_flag, WORD32 i4_refarray_flag) +{ + intra_sampling_ctxt_t *ps_ctxt; + intra_samp_lyr_ctxt *ps_lyr_ctxt; + WORD32 i4_x, i4_y; + UWORD8 *pu1_refarray; + coordinates_t *ps_phase; + + WORD32 i4_temp_array_ht; + WORD32 *pi4_interp_buff; + WORD32 *pi4_interp_buff_temp; + + WORD32 i4_mb_wd; + WORD32 i4_mb_ht; + + WORD32 i4_x_min, i4_x_max; + + ps_ctxt = (intra_sampling_ctxt_t *) pv_intra_samp_ctxt; + ps_lyr_ctxt = &ps_ctxt->as_res_lyrs[ps_ctxt->i4_res_lyr_id]; + + if(0 == i4_refarray_flag) + { + pu1_refarray = ps_ctxt->pu1_refarray_buffer; + } + else + { + pu1_refarray = ps_ctxt->pu1_refarray_cb; + } + + i4_mb_wd = MB_SIZE >> i4_chroma_flag; + i4_mb_ht = MB_SIZE >> i4_chroma_flag; + + i4_x_min = ps_lyr_ctxt->i2_x_min_pos; + i4_x_max = ps_lyr_ctxt->i2_x_max_pos; + + ps_phase = ps_lyr_ctxt->ps_phase; + + i4_temp_array_ht = i4_mb_ht; + pi4_interp_buff = ps_ctxt->pi4_temp_interpolation_buffer; + pi4_interp_buff_temp = pi4_interp_buff; + + for(i4_y = 0; i4_y < i4_temp_array_ht; i4_y++) + { + for(i4_x = (i4_x_min - 1); i4_x <= (i4_x_max + 2); i4_x++) + { + WORD32 i4_y_ref = ps_lyr_ctxt->pi4_ref_array_positions_y[i4_y]; + WORD32 i4_y_phase = + ps_phase[(ps_lyr_ctxt->ps_mb_pos->i4_ordinate * i4_mb_ht + i4_y) % 3].i4_ordinate; + UWORD8 *pu1_refarray_temp = pu1_refarray + i4_x + (i4_y_ref * i4_refarray_wd); + + if(0 == i4_chroma_flag) + { + *(pi4_interp_buff + i4_x) = + (g_ai1_interp_filter_luma[i4_y_phase]) * + (*(pu1_refarray_temp - i4_refarray_wd)) + + + (g_ai1_interp_filter_luma[16 + i4_y_phase]) * (*(pu1_refarray_temp)) + + + (g_ai1_interp_filter_luma[32 + i4_y_phase]) * + (*(pu1_refarray_temp + i4_refarray_wd)) + + + (g_ai1_interp_filter_luma[48 + i4_y_phase]) * + (*(pu1_refarray_temp + (2 * i4_refarray_wd))); + } + else + { + *(pi4_interp_buff + i4_x) = + (g_au1_interp_filter_chroma[i4_y_phase]) * (*(pu1_refarray_temp)) + + (g_au1_interp_filter_chroma[16 + i4_y_phase]) * + (*(pu1_refarray_temp + i4_refarray_wd)); + } + } + + pi4_interp_buff = pi4_interp_buff + i4_refarray_wd; + } + + pi4_interp_buff = pi4_interp_buff_temp; + + for(i4_y = 0; i4_y < i4_temp_array_ht; i4_y++) + { + for(i4_x = 0; i4_x < i4_mb_wd; i4_x++) + { + WORD32 i4_x_ref = ps_lyr_ctxt->pi4_ref_array_positions_y[i4_x]; + WORD32 i4_x_phase = + ps_phase[(ps_lyr_ctxt->ps_mb_pos->i4_abscissa * MAX_REF_ARR_WD_HT + i4_x) % 3] + .i4_ordinate; + + pi4_interp_buff_temp = pi4_interp_buff + i4_x_ref; + + if(0 == i4_chroma_flag) + { + *(pu1_out + i4_x + (i4_y * i4_out_stride)) = CLIPUCHAR( + ((g_ai1_interp_filter_luma[i4_x_phase]) * (*(pi4_interp_buff_temp - 1)) + + (g_ai1_interp_filter_luma[16 + i4_x_phase]) * (*(pi4_interp_buff_temp)) + + (g_ai1_interp_filter_luma[32 + i4_x_phase]) * (*(pi4_interp_buff_temp + 1)) + + (g_ai1_interp_filter_luma[48 + i4_x_phase]) * (*(pi4_interp_buff_temp + 2)) + + 512) >> + 10); + } + else + { + *(pu1_out + (2 * i4_x) + (i4_y * i4_out_stride)) = CLIPUCHAR( + ((g_au1_interp_filter_chroma[i4_x_phase]) * (*(pi4_interp_buff_temp)) + + (g_au1_interp_filter_chroma[16 + i4_x_phase]) * (*(pi4_interp_buff_temp + 1)) + + 512) >> + 10); + } + } + + pi4_interp_buff = pi4_interp_buff + i4_refarray_wd; + } +} + +/*****************************************************************************/ +/* */ +/* Function Name : isvc_intra_samp_mb_dyadic */ +/* */ +/* Description : MB level function which performs the intra resampling */ +/* of data of an MB (luma and chroma inclusive) for dyadic */ +/* scaling ratios */ +/* */ +/* Inputs : pv_intra_samp_ctxt : intra sampling context */ +/* ps_ref_luma : reference layer luma data buffer desc */ +/* ps_ref_chroma : reference layer chroma data buffer desc */ +/* ps_ref_mb_mode_map : ref layer mb mode map buff desc */ +/* ps_curr_luma : current layer out luma buffer desc */ +/* ps_curr_chroma : current layer out chroma buffer desc */ +/* x,y : current mb coorinate */ +/* Globals : none */ +/* Processing : it calls the reference layer construction followed by */ +/* interpolation function for luma and cb and cr */ +/* Outputs : inter resampled data of current MB */ +/* Returns : none */ +/* */ +/* Issues : none */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 07 12 2010 Nithya creation */ +/* */ +/*****************************************************************************/ +void isvc_intra_samp_mb_dyadic(void *pv_intra_samp_ctxt, mem_element_t *ps_ref_luma, + mem_element_t *ps_ref_chroma, mem_element_t *ps_ref_mb_mode_map, + mem_element_t *ps_curr_luma, mem_element_t *ps_curr_chroma, + UWORD16 u2_mb_x, UWORD16 u2_mb_y, + WORD32 i4_scaled_ref_layer_left_offset, + WORD32 i4_scaled_ref_layer_top_offset) +{ + UWORD8 *pu1_inp_luma, *pu1_inp_chroma; + UWORD8 *pu1_out_luma, *pu1_out_chroma; + UWORD8 *pu1_out_cb, *pu1_out_cr; + UWORD8 *pu1_refarray_luma, *pu1_refarray_cb, *pu1_refarray_cr; + WORD16 *pi2_tmp_filt_buf; + WORD32 i4_inp_luma_stride, i4_inp_chroma_stride; + WORD32 i4_out_luma_stride, i4_out_chroma_stride; + UWORD16 u2_mb_x_ref, u2_mb_y_ref; + intra_sampling_ctxt_t *ps_ctxt; + intra_samp_lyr_ctxt *ps_lyr_ctxt; + WORD32 i4_scaled_mb_x, i4_scaled_mb_y; + WORD32 i4_top, i4_left; + + ps_ctxt = (intra_sampling_ctxt_t *) pv_intra_samp_ctxt; + ps_lyr_ctxt = &ps_ctxt->as_res_lyrs[ps_ctxt->i4_res_lyr_id]; + + i4_scaled_mb_x = u2_mb_x - (i4_scaled_ref_layer_left_offset >> 4); + i4_scaled_mb_y = u2_mb_y - (i4_scaled_ref_layer_top_offset >> 4); + + if(i4_scaled_mb_x & 0x1) + { + i4_left = 1; + } + else + { + i4_left = -1; + } + if(i4_scaled_mb_y & 0x1) + { + i4_top = 1; + } + else + { + i4_top = -1; + } + + u2_mb_x_ref = (i4_scaled_mb_x >> 1); + u2_mb_y_ref = (i4_scaled_mb_y >> 1); + + pu1_inp_luma = (UWORD8 *) ps_ref_luma->pv_buffer; + pu1_inp_chroma = (UWORD8 *) ps_ref_chroma->pv_buffer; + + i4_inp_luma_stride = ps_ref_luma->i4_num_element_stride; + i4_inp_chroma_stride = ps_ref_chroma->i4_num_element_stride; + + /* ------- Constructing refSampleArray ----------------------- */ + isvc_reflayer_construction_dyadic(pv_intra_samp_ctxt, ps_ref_mb_mode_map, pu1_inp_luma, + pu1_inp_chroma, i4_inp_luma_stride, i4_inp_chroma_stride, + i4_top, i4_left, u2_mb_x_ref, u2_mb_y_ref); + + /* --------------------------------------------------------------------- */ + /* LUMA INTERPOLATION */ + /* --------------------------------------------------------------------- */ + pu1_refarray_luma = ps_ctxt->pu1_refarray_buffer; + if(1 == i4_top) + { + pu1_refarray_luma += (DYADIC_REF_W_Y << 3); + } + if(1 == i4_left) + { + pu1_refarray_luma += 8; + } + pu1_out_luma = (UWORD8 *) ps_curr_luma->pv_buffer; + i4_out_luma_stride = ps_curr_luma->i4_num_element_stride; + pi2_tmp_filt_buf = (WORD16 *) ps_ctxt->pi4_temp_interpolation_buffer; + + ps_lyr_ctxt->pf_interpolate_luma(pu1_refarray_luma, pi2_tmp_filt_buf, pu1_out_luma, + i4_out_luma_stride); + + /* --------------------------------------------------------------------- */ + /* CHROMA INTERPOLATION */ + /* --------------------------------------------------------------------- */ + pu1_out_chroma = (UWORD8 *) ps_curr_chroma->pv_buffer; + i4_out_chroma_stride = ps_curr_chroma->i4_num_element_stride; + + /* CB */ + pu1_out_cb = pu1_out_chroma; + pu1_refarray_cb = ps_ctxt->pu1_refarray_cb; + + if(1 == i4_top) + { + pu1_refarray_cb += (DYADIC_REF_W_C << 2); + } + if(1 == i4_left) + { + pu1_refarray_cb += 4; + } + + /* Vertical interpolation */ + ps_lyr_ctxt->pf_vert_interpol_chroma(pu1_refarray_cb, pi2_tmp_filt_buf, + ps_lyr_ctxt->i4_y_phase_0, ps_lyr_ctxt->i4_y_phase_1); + + /* Horizontal interpolation */ + ps_lyr_ctxt->pf_horz_interpol_chroma(pi2_tmp_filt_buf, pu1_out_cb, i4_out_chroma_stride, + ps_lyr_ctxt->i4_x_phase_0, ps_lyr_ctxt->i4_x_phase_1); + + /* CR */ + pu1_out_cr = pu1_out_chroma + 1; + pu1_refarray_cr = ps_ctxt->pu1_refarray_cr; + + if(1 == i4_top) + { + pu1_refarray_cr += (DYADIC_REF_W_C << 2); + } + if(1 == i4_left) + { + pu1_refarray_cr += 4; + } + + /* Vertical interpolation */ + ps_lyr_ctxt->pf_vert_interpol_chroma(pu1_refarray_cr, pi2_tmp_filt_buf, + ps_lyr_ctxt->i4_y_phase_0, ps_lyr_ctxt->i4_y_phase_1); + + /* Horizontal interpolation */ + ps_lyr_ctxt->pf_horz_interpol_chroma(pi2_tmp_filt_buf, pu1_out_cr, i4_out_chroma_stride, + ps_lyr_ctxt->i4_x_phase_0, ps_lyr_ctxt->i4_x_phase_1); +} + +/*****************************************************************************/ +/* */ +/* Function Name : isvc_intra_samp_mb */ +/* */ +/* Description : MB level function which performs the intra resampling */ +/* of data of an MB (luma and chroma inclusive) */ +/* */ +/* Inputs : pv_intra_samp_ctxt : intra sampling context */ +/* ps_ref_luma : reference layer luma data buffer desc */ +/* ps_ref_chroma : reference layer chroma data buffer desc */ +/* ps_ref_mb_mode_map : ref layer mb mode map buff desc */ +/* ps_curr_luma : current layer out luma buffer desc */ +/* ps_curr_chroma : current layer out chroma buffer desc */ +/* x,y : current mb coorinate */ +/* Globals : none */ +/* Processing : it calls the reference layer construction followed by */ +/* interpolation function for luma and cb and cr */ +/* Outputs : inter resampled data of current MB */ +/* Returns : none */ +/* */ +/* Issues : none */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 07 12 2010 Nithya creation */ +/* */ +/*****************************************************************************/ +void isvc_intra_samp_mb(void *pv_intra_samp_ctxt_luma, void *pv_intra_samp_ctxt_chroma, + mem_element_t *ps_ref_luma, mem_element_t *ps_ref_chroma, + mem_element_t *ps_ref_mb_mode_map, mem_element_t *ps_curr_luma, + mem_element_t *ps_curr_chroma) +{ + UWORD8 *pu1_inp_luma, *pu1_inp_chroma; + UWORD8 *pu1_out_luma, *pu1_out_chroma; + UWORD8 *pu1_out_cb, *pu1_out_cr; + WORD32 i4_inp_luma_stride, i4_inp_chroma_stride; + WORD32 i4_out_luma_stride, i4_out_chroma_stride; + WORD32 i4_chroma_flag, i4_refarray_stride; + + intra_sampling_ctxt_t *ps_ctxt_luma; + intra_sampling_ctxt_t *ps_ctxt_chroma; + + ps_ctxt_luma = (intra_sampling_ctxt_t *) pv_intra_samp_ctxt_luma; + ps_ctxt_chroma = (intra_sampling_ctxt_t *) pv_intra_samp_ctxt_chroma; + + i4_refarray_stride = ps_ctxt_luma->i4_refarray_stride; + + pu1_inp_luma = (UWORD8 *) ps_ref_luma->pv_buffer; + pu1_inp_chroma = (UWORD8 *) ps_ref_chroma->pv_buffer; + + i4_inp_luma_stride = ps_ref_luma->i4_num_element_stride; + i4_inp_chroma_stride = ps_ref_chroma->i4_num_element_stride; + + pu1_out_luma = (UWORD8 *) ps_curr_luma->pv_buffer; + i4_out_luma_stride = ps_curr_luma->i4_num_element_stride; + + i4_chroma_flag = 0; + + /* ------- Constructing refSampleArray ----------------------- */ + isvc_reflayer_construction(pv_intra_samp_ctxt_luma, pu1_inp_luma, i4_inp_luma_stride, + i4_refarray_stride, ps_ref_mb_mode_map, i4_chroma_flag); + + /* ---- Interpolation process for Intra_Base prediction ------ */ + isvc_interpolate_intra_base(pv_intra_samp_ctxt_luma, pu1_out_luma, i4_out_luma_stride, + i4_refarray_stride, i4_chroma_flag, 0); + + pu1_out_chroma = (UWORD8 *) ps_curr_chroma->pv_buffer; + i4_out_chroma_stride = ps_curr_chroma->i4_num_element_stride; + + pu1_out_cb = pu1_out_chroma; + pu1_out_cr = pu1_out_cb + 1; + + i4_refarray_stride = ps_ctxt_chroma->i4_refarray_stride; + + i4_chroma_flag = 1; + + /* ------- Constructing refSampleArray ----------------------- */ + isvc_reflayer_construction(pv_intra_samp_ctxt_chroma, pu1_inp_chroma, i4_inp_chroma_stride, + i4_refarray_stride, ps_ref_mb_mode_map, i4_chroma_flag); + + /* ---- Cb Interpolation process for Intra_Base prediction ------ */ + isvc_interpolate_intra_base(pv_intra_samp_ctxt_chroma, pu1_out_cb, i4_out_chroma_stride, + i4_refarray_stride, i4_chroma_flag, 0); + + /* ---- Cr Interpolation process for Intra_Base prediction ------ */ + isvc_interpolate_intra_base(pv_intra_samp_ctxt_chroma, pu1_out_cr, i4_out_chroma_stride, + i4_refarray_stride, i4_chroma_flag, 1); +} diff --git a/common/svc/isvc_intra_resample.h b/common/svc/isvc_intra_resample.h new file mode 100644 index 0000000..b78055c --- /dev/null +++ b/common/svc/isvc_intra_resample.h @@ -0,0 +1,251 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +#ifndef _ISVC_INTRA_RESAMPLE_H_ +#define _ISVC_INTRA_RESAMPLE_H_ + +#include "ih264_typedefs.h" +#include "isvc_macros.h" +#include "ih264_platform_macros.h" +#include "isvc_structs.h" + +#define DYADIC_REF_W_Y 20 +#define DYADIC_REF_H_Y 20 +#define DYADIC_REF_W_C 10 +#define DYADIC_REF_H_C 10 + +#define MAX_NUM_RES_LYRS 4 + +#define MAX_PIX_FILL_LUMA 4 +#define MAX_PIX_FILL_CHROMA 2 + +#define MAX_REF_ARR_WD_HT 48 +#define MAX_REF_IDX_ARRAY (MAX_REF_ARR_WD_HT + MB_SIZE) + +#define CLIPUCHAR(x) CLIP3(0, 255, (x)) + +#define REF_ARRAY_WIDTH 48 +#define REF_ARRAY_HEIGHT 48 + +typedef void FT_INTERPOLATE_LUMA_2X(UWORD8 *pu1_inp_buf, WORD16 *pi2_tmp_filt_buf, + UWORD8 *pu1_out_buf, WORD32 i4_out_stride); + +typedef void FT_VERT_INTERPOLATE_CHROMA_2X(UWORD8 *pu1_inp_buf, WORD16 *pi2_tmp_filt_buf, + WORD32 i4_phase_0, WORD32 i4_phase_1); + +typedef void FT_HORZ_INTERPOLATE_CHROMA_2X(WORD16 *pi2_tmp_filt_buf, UWORD8 *pu1_out_buf, + WORD32 i4_out_stride, WORD32 i4_phase_0, + WORD32 i4_phase_1); + +typedef struct mem_element_t +{ + /* Buffer pointer */ + void *pv_buffer; + + /* size of the structure or unit */ + WORD32 i4_element_size; + + /* Stride of buffer in terms of number of elements.*/ + WORD32 i4_num_element_stride; +} mem_element_t; + +typedef struct seg_description_t +{ + /* describes segment dimension */ + UWORD8 u1_seg_dim; + + /* describes offset from start */ + UWORD8 u1_seg_off; + + /* describes whether mb is adjoining the segment + 0 => not adjoining 1 => adjoining */ + UWORD8 u1_mb_adjoin; + + /* distance to nearest MB */ + WORD8 i1_dist_idx; + + /* describes the nearest mb boundary + +1 => rightMB/bottomMB + -1 => leftMB/topMB */ + WORD8 i1_nearst_mb_bdry; +} seg_description_t; + +typedef struct seg_lookup_desc_t +{ + /* place holder to store the number of segments */ + UWORD8 u1_num_segments; + + /* this variable indicates where is start locatiion of the segment with + respect to less the block_width or greater than block width*/ + UWORD8 u4_start_pos; + + /* place holder to store per segment description */ + seg_description_t s_segments[4]; +} seg_lookup_desc_t; + +typedef struct intra_samp_lyr_ctxt +{ + /* mb position */ + coordinates_t *ps_mb_pos; + + /* reference layer width in terms luma samples */ + WORD32 i4_ref_width; + + /* reference layer height in terms luma samples */ + WORD32 i4_ref_height; + + /* Constrained intra resampling flag. Range is [0,1]. */ + WORD8 i1_constrained_intra_rsmpl_flag; + + /* Chroma xPhase for even values of x for dyadic cases */ + WORD32 i4_x_phase_0; + + /* Chroma xPhase for odd values of x for dyadic cases */ + WORD32 i4_x_phase_1; + + /* Chroma yPhase for even values of y for dyadic cases */ + WORD32 i4_y_phase_0; + + /* Chroma yPhase for odd values of y for dyadic cases */ + WORD32 i4_y_phase_1; + + FT_INTERPOLATE_LUMA_2X *pf_interpolate_luma; + + FT_VERT_INTERPOLATE_CHROMA_2X *pf_vert_interpol_chroma; + + FT_HORZ_INTERPOLATE_CHROMA_2X *pf_horz_interpol_chroma; + + WORD16 i2_x_min_pos; + + WORD16 i2_x_max_pos; + + WORD16 i2_y_min_pos; + + WORD16 i2_y_max_pos; + + coordinates_t *ps_phase; + + WORD32 *pi4_ref_array_positions_x; + + WORD32 *pi4_ref_array_positions_y; + + coordinates_t *ps_offsets; + + coordinates_t *ps_ref_array_dims; + + /* buffers to store lookup for horizontal segment description */ + seg_lookup_desc_t as_seg_lookup_horz[MB_SIZE]; + + /* buffers to store lookup for vertical segment description */ + seg_lookup_desc_t as_seg_lookup_vert[MB_SIZE]; + + /* buffers to store lookup for x indexes to get + availability from 4x4 availability grid */ + UWORD8 au1_refarray_x_idx[MAX_REF_IDX_ARRAY]; + + /* buffers to store lookup for y indexes to get + availability from 4x4 availability grid */ + UWORD8 au1_refarray_y_idx[MAX_REF_IDX_ARRAY]; +} intra_samp_lyr_ctxt; + +typedef struct intra_sampling_ctxt_t +{ + /* Array of resolution layer ctxt. */ + intra_samp_lyr_ctxt as_res_lyrs[MAX_NUM_RES_LYRS]; + + /* pointer to array of SPS */ + void *ps_sps; + + /* buffer to store the reference layer data before intra sampling */ + UWORD8 *pu1_refarray_buffer; + + /* buffer to hold the reference layer Cb data before intra + resampling (used for dyadic cases only) */ + UWORD8 *pu1_refarray_cb; + + /* buffer to hold the reference layer Cr data before intra + resampling (used for dyadic cases only) */ + UWORD8 *pu1_refarray_cr; + + /* intermideate buffer for interpolation */ + WORD32 *pi4_temp_interpolation_buffer; + + /* resolution id of the layer which is to be processed */ + WORD32 i4_res_lyr_id; + + /* reference layer width in terms luma samples */ + WORD32 i4_ref_width; + + /* reference layer width in terms luma samples */ + WORD32 i4_refarray_stride; + + /* reference layer height in terms luma samples */ + WORD32 i4_ref_height; +} intra_sampling_ctxt_t; + +typedef struct inter_lyr_mb_prms_t +{ + /* NNZs of Chroma. Here each bit corresonds + to a NNZs of 4x4 sub block. Lower 4 bits are + used for Cb and upper are used for Cr */ + UWORD8 u1_chroma_nnz; + + /* NNZs of Luma. Here each bit corresonds + to a NNZs of 4x4 sub block in raster scan order. */ + UWORD16 u2_luma_nnz; + + /* Packed MB mode transform size of an MB */ + WORD8 i1_mb_mode; +} inter_lyr_mb_prms_t; + +/* Function declarations */ +extern void isvc_intra_samp_mb_dyadic(void *pv_intra_samp_ctxt, mem_element_t *ps_ref_luma, + mem_element_t *ps_ref_chroma, + mem_element_t *ps_ref_mb_mode_map, + mem_element_t *ps_curr_luma, mem_element_t *ps_curr_chroma, + UWORD16 u2_mb_x, UWORD16 u2_mb_y, + WORD32 i4_scaled_ref_layer_left_offset, + WORD32 i4_scaled_ref_layer_top_offset); + +extern void isvc_intra_samp_mb(void *pv_intra_samp_ctxt_luma, void *pv_intra_samp_ctxt_chroma, + mem_element_t *ps_ref_luma, mem_element_t *ps_ref_chroma, + mem_element_t *ps_ref_mb_mode_map, mem_element_t *ps_curr_luma, + mem_element_t *ps_curr_chroma); + +extern void isvc_intra_resamp_generate_segment_lookup(seg_lookup_desc_t *ps_seg_lookup_table, + WORD32 i4_dimension, WORD32 i4_mb_size, + WORD32 i4_shift_val); + +/* C Declarations */ +extern FT_INTERPOLATE_LUMA_2X isvc_interpolate_base_luma_dyadic; +extern FT_VERT_INTERPOLATE_CHROMA_2X isvc_vert_interpol_chroma_dyadic; +extern FT_HORZ_INTERPOLATE_CHROMA_2X isvc_horz_interpol_chroma_dyadic; + +/* SSE42 Declarations */ +extern FT_INTERPOLATE_LUMA_2X isvc_interpolate_base_luma_dyadic_sse42; +extern FT_VERT_INTERPOLATE_CHROMA_2X isvc_vert_interpol_chroma_dyadic_sse42; +extern FT_HORZ_INTERPOLATE_CHROMA_2X isvc_horz_interpol_chroma_dyadic_sse42; + +/* NEON Declarations */ +extern FT_INTERPOLATE_LUMA_2X isvc_interpolate_base_luma_dyadic_neon; +extern FT_VERT_INTERPOLATE_CHROMA_2X isvc_vert_interpol_chroma_dyadic_neon; +extern FT_HORZ_INTERPOLATE_CHROMA_2X isvc_horz_interpol_chroma_dyadic_neon; + +#endif diff --git a/common/svc/isvc_iquant_itrans_recon.c b/common/svc/isvc_iquant_itrans_recon.c new file mode 100644 index 0000000..a3a25d5 --- /dev/null +++ b/common/svc/isvc_iquant_itrans_recon.c @@ -0,0 +1,1094 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ +/** + ******************************************************************************* + * @file + * ih264_iquant_itrans_recon.c + * + * @brief + * Contains definition of functions for h264 inverse quantization inverse + *transformation and recon + * + * @author + * Ittiam + * + * @par List of Functions: + * - ih264_iquant_itrans_recon_4x4() + * - ih264_iquant_itrans_recon_8x8() + * - ih264_iquant_itrans_recon_4x4_dc() + * - ih264_iquant_itrans_recon_8x8_dc() + * - ih264_iquant_itrans_recon_chroma_4x4() + * -ih264_iquant_itrans_recon_chroma_4x4_dc() + * + * @remarks + * + ******************************************************************************* + */ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ +#include + +#include "ih264_typedefs.h" +#include "ih264_debug.h" +#include "ih264_defs.h" +#include "ih264_trans_macros.h" +#include "ih264_macros.h" +#include "ih264_platform_macros.h" +#include "ih264_trans_data.h" +#include "ih264_size_defs.h" +#include "ih264_structs.h" +#include "isvc_trans_quant_itrans_iquant.h" + +/* + ******************************************************************************** + * + * @brief This function reconstructs a 4x4 sub block from quantized resiude and + * prediction buffer + * + * @par Description: + * The quantized residue is first inverse quantized, then inverse transformed. + * This inverse transformed content is added to the prediction buffer to recon- + * struct the end output + * + * @param[in] pi2_src + * quantized 4x4 block + * + * @param[in] pu1_pred + * prediction 4x4 block + * + * @param[in] pi2_res + * residue 4x4 block + * + * @param[out] pu1_out + * reconstructed 4x4 block + * + * @param[in] src_strd + * quantization buffer stride + * + * @param[in] i4_pred_stride, + * Prediction buffer stride + * + * @param[in] i4_out_stride + * recon buffer Stride + * + * @param[in] i4_res_stride + * residue buffer Stride + * + * @param[in] pu2_scaling_list + * pointer to scaling list + * + * @param[in] pu2_norm_adjust + * pointer to inverse scale matrix + * + * @param[in] u4_qp_div_6 + * Floor (qp/6) + * + * @param[in] pi2_tmp + * temporary buffer of size 1*16 + * + * @returns none + * + * @remarks none + * + ******************************************************************************* + */ +void isvc_iquant_itrans_recon_4x4(buffer_container_t *ps_src, buffer_container_t *ps_pred, + buffer_container_t *ps_res_pred, buffer_container_t *ps_res, + buffer_container_t *ps_rec, + iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants, + WORD16 *pi2_tmp, WORD16 *pi2_dc_src, WORD32 i4_iq_start_idx, + UWORD8 u1_res_accumulate) +{ + WORD16 x0, x1, x2, x3, i; + WORD32 q0, q1, q2, q3; + WORD16 i_macro; + + WORD16 *pi2_src = ps_src->pv_data; + WORD16 *pi2_res = ps_res->pv_data; + WORD16 *pi2_res_pred = ps_res_pred->pv_data; + UWORD8 *pu1_pred = ps_pred->pv_data; + UWORD8 *pu1_out = ps_rec->pv_data; + WORD32 i4_src_stride = ps_src->i4_data_stride; + WORD32 i4_res_stride = ps_res->i4_data_stride; + WORD32 i4_res_pred_stride = ps_res_pred->i4_data_stride; + WORD32 i4_pred_stride = ps_pred->i4_data_stride; + WORD32 i4_out_stride = ps_rec->i4_data_stride; + const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat; + const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat; + UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6; + WORD16 *pi2_src_ptr = pi2_src; + WORD16 *pi2_tmp_ptr = pi2_tmp; + UWORD8 *pu1_pred_ptr = pu1_pred; + WORD16 *pi2_res_ptr = pi2_res; + WORD16 *pi2_res_pred_ptr = pi2_res_pred; + UWORD8 *pu1_out_ptr = pu1_out; + WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0; + + /* inverse quant */ + /*horizontal inverse transform */ + for(i = 0; i < SUB_BLK_WIDTH_4x4; i++) + { + q0 = pi2_src_ptr[0]; + INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4); + if(i == 0 && i4_iq_start_idx == 1) q0 = pi2_dc_src[0]; // Restoring dc value for intra case + + q2 = pi2_src_ptr[2]; + INV_QUANT(q2, pu2_iscal_mat[2], pu2_weigh_mat[2], u4_qp_div_6, rnd_fact, 4); + + x0 = q0 + q2; + x1 = q0 - q2; + + q1 = pi2_src_ptr[1]; + INV_QUANT(q1, pu2_iscal_mat[1], pu2_weigh_mat[1], u4_qp_div_6, rnd_fact, 4); + + q3 = pi2_src_ptr[3]; + INV_QUANT(q3, pu2_iscal_mat[3], pu2_weigh_mat[3], u4_qp_div_6, rnd_fact, 4); + + x2 = (q1 >> 1) - q3; + x3 = q1 + (q3 >> 1); + + pi2_tmp_ptr[0] = x0 + x3; + pi2_tmp_ptr[1] = x1 + x2; + pi2_tmp_ptr[2] = x1 - x2; + pi2_tmp_ptr[3] = x0 - x3; + + pi2_src_ptr += i4_src_stride; + pi2_tmp_ptr += SUB_BLK_WIDTH_4x4; + pu2_iscal_mat += SUB_BLK_WIDTH_4x4; + pu2_weigh_mat += SUB_BLK_WIDTH_4x4; + } + + /* vertical inverse transform */ + pi2_tmp_ptr = pi2_tmp; + for(i = 0; i < SUB_BLK_WIDTH_4x4; i++) + { + pu1_pred_ptr = pu1_pred; + pi2_res_ptr = pi2_res; + pi2_res_pred_ptr = pi2_res_pred; + pu1_out = pu1_out_ptr; + + x0 = (pi2_tmp_ptr[0] + pi2_tmp_ptr[8]); + x1 = (pi2_tmp_ptr[0] - pi2_tmp_ptr[8]); + x2 = (pi2_tmp_ptr[4] >> 1) - pi2_tmp_ptr[12]; + x3 = pi2_tmp_ptr[4] + (pi2_tmp_ptr[12] >> 1); + + /* inverse prediction */ + i_macro = x0 + x3; + i_macro = ((i_macro + 32) >> 6); + pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate); + *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]); + pu1_pred_ptr += i4_pred_stride; + pu1_out += i4_out_stride; + pi2_res_ptr += i4_res_stride; + pi2_res_pred_ptr += i4_res_pred_stride; + + i_macro = x1 + x2; + i_macro = ((i_macro + 32) >> 6); + pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate); + *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]); + pu1_pred_ptr += i4_pred_stride; + pu1_out += i4_out_stride; + pi2_res_ptr += i4_res_stride; + pi2_res_pred_ptr += i4_res_pred_stride; + + i_macro = x1 - x2; + i_macro = ((i_macro + 32) >> 6); + pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate); + *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]); + pu1_pred_ptr += i4_pred_stride; + pu1_out += i4_out_stride; + pi2_res_ptr += i4_res_stride; + pi2_res_pred_ptr += i4_res_pred_stride; + + i_macro = x0 - x3; + i_macro = ((i_macro + 32) >> 6); + pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate); + *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]); + + pi2_tmp_ptr++; + pu1_out_ptr++; + pu1_pred++; + pi2_res++; + pi2_res_pred++; + } +} + +void isvc_iquant_itrans_recon_4x4_dc(buffer_container_t *ps_src, buffer_container_t *ps_pred, + buffer_container_t *ps_res_pred, buffer_container_t *ps_res, + buffer_container_t *ps_rec, + iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants, + WORD16 *pi2_tmp, WORD16 *pi2_dc_src, WORD32 i4_iq_start_idx, + UWORD8 u1_res_accumulate) +{ + WORD16 *pi2_src = ps_src->pv_data; + WORD16 *pi2_res = ps_res->pv_data; + WORD16 *pi2_res_pred = ps_res_pred->pv_data; + UWORD8 *pu1_pred = ps_pred->pv_data; + UWORD8 *pu1_out = ps_rec->pv_data; + WORD32 i4_res_stride = ps_res->i4_data_stride; + WORD32 i4_res_pred_stride = ps_res_pred->i4_data_stride; + WORD32 i4_pred_stride = ps_pred->i4_data_stride; + WORD32 i4_out_stride = ps_rec->i4_data_stride; + const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat; + const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat; + UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6; + UWORD8 *pu1_pred_ptr = pu1_pred; + WORD16 *pi2_res_ptr = pi2_res; + WORD16 *pi2_res_pred_ptr = pi2_res_pred; + UWORD8 *pu1_out_ptr = pu1_out; + WORD32 q0; + WORD16 i_macro, i; + WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0; + UNUSED(pi2_tmp); + + if(i4_iq_start_idx == 0) + { + q0 = pi2_src[0]; + INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4); + } + else + { + q0 = pi2_dc_src[0]; // Restoring dc value for intra case3 + } + i_macro = ((q0 + 32) >> 6); + for(i = 0; i < SUB_BLK_WIDTH_4x4; i++) + { + pu1_pred_ptr = pu1_pred; + pi2_res_ptr = pi2_res; + pi2_res_pred_ptr = pi2_res_pred; + pu1_out = pu1_out_ptr; + + /* inverse prediction */ + pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate); + *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]); + pu1_pred_ptr += i4_pred_stride; + pu1_out += i4_out_stride; + pi2_res_ptr += i4_res_stride; + pi2_res_pred_ptr += i4_res_pred_stride; + + pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate); + *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]); + pu1_pred_ptr += i4_pred_stride; + pu1_out += i4_out_stride; + pi2_res_ptr += i4_res_stride; + pi2_res_pred_ptr += i4_res_pred_stride; + + pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate); + *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]); + pu1_pred_ptr += i4_pred_stride; + pu1_out += i4_out_stride; + pi2_res_ptr += i4_res_stride; + pi2_res_pred_ptr += i4_res_pred_stride; + + pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate); + *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]); + + pu1_out_ptr++; + pu1_pred++; + pi2_res++; + pi2_res_pred++; + } +} + +/** + ******************************************************************************* + * + * @brief + * This function performs inverse quant and Inverse transform type Ci4 for 8x8 + *block + * + * @par Description: + * Performs inverse transform Ci8 and adds the residue to get the + * reconstructed block + * + * @param[in] pi2_src + * Input 8x8coefficients + * + * @param[in] pu1_pred + * Prediction 8x8 block + * + * @param[out] pu1_recon + * Output 8x8 block + * + * @param[in] q_div + * QP/6 + * + * @param[in] q_rem + * QP%6 + * + * @param[in] q_lev + * Quantizer level + * + * @param[in] src_strd + * Input stride + * + * @param[in] i4_pred_stride, + * Prediction stride + * + * @param[in] i4_out_stride + * Output Stride + * + * @param[in] pi4_tmp + * temporary buffer of size 1*16 we dont need a bigger blcok since we reuse + * the tmp for each block + * + * @param[in] pu4_iquant_mat + * Pointer to the inverse quantization matrix + * + * @returns Void + * + * @remarks + * None + * + ******************************************************************************* + */ +void isvc_iquant_itrans_recon_8x8(buffer_container_t *ps_src, buffer_container_t *ps_pred, + buffer_container_t *ps_res_pred, buffer_container_t *ps_res, + buffer_container_t *ps_rec, + iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants, + WORD16 *pi2_tmp, WORD16 *pi2_dc_src, WORD32 i4_iq_start_idx, + UWORD8 u1_res_accumulate) +{ + WORD32 i; + WORD16 i_z0, i_z1, i_z2, i_z3, i_z4, i_z5, i_z6, i_z7; + WORD16 i_y0, i_y1, i_y2, i_y3, i_y4, i_y5, i_y6, i_y7; + WORD16 i_macro; + WORD32 q; + + WORD16 *pi2_src = ps_src->pv_data; + WORD16 *pi2_res = ps_res->pv_data; + WORD16 *pi2_res_pred = ps_res_pred->pv_data; + UWORD8 *pu1_pred = ps_pred->pv_data; + UWORD8 *pu1_out = ps_rec->pv_data; + WORD32 i4_res_stride = ps_res->i4_data_stride; + WORD32 i4_res_pred_stride = ps_res_pred->i4_data_stride; + WORD32 i4_pred_stride = ps_pred->i4_data_stride; + WORD32 i4_out_stride = ps_rec->i4_data_stride; + const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat; + const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat; + UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6; + WORD16 *pi2_tmp_ptr = pi2_tmp; + UWORD8 *pu1_pred_ptr = pu1_pred; + WORD16 *pi2_res_ptr = pi2_res; + WORD16 *pi2_res_pred_ptr = pi2_res_pred; + UWORD8 *pu1_out_ptr = pu1_out; + WORD32 rnd_fact = (u4_qp_div_6 < 6) ? (1 << (5 - u4_qp_div_6)) : 0; + UNUSED(i4_iq_start_idx); + UNUSED(pi2_dc_src); + + ASSERT(ps_src->i4_data_stride == SUB_BLK_WIDTH_8x8); + + /*************************************************************/ + /* De quantization of coefficients. Will be replaced by SIMD */ + /* operations on platform. Note : DC coeff is not scaled */ + /*************************************************************/ + for(i = 0; i < (SUB_BLK_WIDTH_8x8 * SUB_BLK_WIDTH_8x8); i++) + { + q = pi2_src[i]; + INV_QUANT(q, pu2_iscal_mat[i], pu2_weigh_mat[i], u4_qp_div_6, rnd_fact, 6); + pi2_tmp_ptr[i] = q; + } + + /* Perform Inverse transform */ + /*--------------------------------------------------------------------*/ + /* IDCT [ Horizontal transformation ] */ + /*--------------------------------------------------------------------*/ + for(i = 0; i < SUB_BLK_WIDTH_8x8; i++) + { + /*------------------------------------------------------------------*/ + /* y0 = w0 + w4 */ + /* y1 = -w3 + w5 - w7 - (w7 >> 1) */ + /* y2 = w0 - w4 */ + /* y3 = w1 + w7 - w3 - (w3 >> 1) */ + /* y4 = (w2 >> 1) - w6 */ + /* y5 = -w1 + w7 + w5 + (w5 >> 1) */ + /* y6 = w2 + (w6 >> 1) */ + /* y7 = w3 + w5 + w1 + (w1 >> 1) */ + /*------------------------------------------------------------------*/ + i_y0 = (pi2_tmp_ptr[0] + pi2_tmp_ptr[4]); + + i_y1 = + ((WORD32) (-pi2_tmp_ptr[3]) + pi2_tmp_ptr[5] - pi2_tmp_ptr[7] - (pi2_tmp_ptr[7] >> 1)); + + i_y2 = (pi2_tmp_ptr[0] - pi2_tmp_ptr[4]); + + i_y3 = ((WORD32) pi2_tmp_ptr[1] + pi2_tmp_ptr[7] - pi2_tmp_ptr[3] - (pi2_tmp_ptr[3] >> 1)); + + i_y4 = ((pi2_tmp_ptr[2] >> 1) - pi2_tmp_ptr[6]); + + i_y5 = + ((WORD32) (-pi2_tmp_ptr[1]) + pi2_tmp_ptr[7] + pi2_tmp_ptr[5] + (pi2_tmp_ptr[5] >> 1)); + + i_y6 = (pi2_tmp_ptr[2] + (pi2_tmp_ptr[6] >> 1)); + + i_y7 = ((WORD32) pi2_tmp_ptr[3] + pi2_tmp_ptr[5] + pi2_tmp_ptr[1] + (pi2_tmp_ptr[1] >> 1)); + + /*------------------------------------------------------------------*/ + /* z0 = y0 + y6 */ + /* z1 = y1 + (y7 >> 2) */ + /* z2 = y2 + y4 */ + /* z3 = y3 + (y5 >> 2) */ + /* z4 = y2 - y4 */ + /* z5 = (y3 >> 2) - y5 */ + /* z6 = y0 - y6 */ + /* z7 = y7 - (y1 >> 2) */ + /*------------------------------------------------------------------*/ + i_z0 = i_y0 + i_y6; + i_z1 = i_y1 + (i_y7 >> 2); + i_z2 = i_y2 + i_y4; + i_z3 = i_y3 + (i_y5 >> 2); + i_z4 = i_y2 - i_y4; + i_z5 = (i_y3 >> 2) - i_y5; + i_z6 = i_y0 - i_y6; + i_z7 = i_y7 - (i_y1 >> 2); + + /*------------------------------------------------------------------*/ + /* x0 = z0 + z7 */ + /* x1 = z2 + z5 */ + /* x2 = z4 + z3 */ + /* x3 = z6 + z1 */ + /* x4 = z6 - z1 */ + /* x5 = z4 - z3 */ + /* x6 = z2 - z5 */ + /* x7 = z0 - z7 */ + /*------------------------------------------------------------------*/ + pi2_tmp_ptr[0] = i_z0 + i_z7; + pi2_tmp_ptr[1] = i_z2 + i_z5; + pi2_tmp_ptr[2] = i_z4 + i_z3; + pi2_tmp_ptr[3] = i_z6 + i_z1; + pi2_tmp_ptr[4] = i_z6 - i_z1; + pi2_tmp_ptr[5] = i_z4 - i_z3; + pi2_tmp_ptr[6] = i_z2 - i_z5; + pi2_tmp_ptr[7] = i_z0 - i_z7; + + /* move to the next row */ + // pi2_src_ptr += SUB_BLK_WIDTH_8x8; + pi2_tmp_ptr += SUB_BLK_WIDTH_8x8; + } + /*--------------------------------------------------------------------*/ + /* IDCT [ Vertical transformation] and Xij = (xij + 32)>>6 */ + /* */ + /* Add the prediction and store it back to reconstructed frame buffer */ + /* [Prediction buffer itself in this case] */ + /*--------------------------------------------------------------------*/ + + pi2_tmp_ptr = pi2_tmp; + for(i = 0; i < SUB_BLK_WIDTH_8x8; i++) + { + pu1_pred_ptr = pu1_pred; + pi2_res_ptr = pi2_res; + pi2_res_pred_ptr = pi2_res_pred; + pu1_out = pu1_out_ptr; + /*------------------------------------------------------------------*/ + /* y0j = w0j + w4j */ + /* y1j = -w3j + w5j -w7j -(w7j >> 1) */ + /* y2j = w0j -w4j */ + /* y3j = w1j + w7j -w3j -(w3j >> 1) */ + /* y4j = ( w2j >> 1 ) -w6j */ + /* y5j = -w1j + w7j + w5j + (w5j >> 1) */ + /* y6j = w2j + ( w6j >> 1 ) */ + /* y7j = w3j + w5j + w1j + (w1j >> 1) */ + /*------------------------------------------------------------------*/ + i_y0 = pi2_tmp_ptr[0] + pi2_tmp_ptr[32]; + + i_y1 = (WORD32) (-pi2_tmp_ptr[24]) + pi2_tmp_ptr[40] - pi2_tmp_ptr[56] - + (pi2_tmp_ptr[56] >> 1); + + i_y2 = pi2_tmp_ptr[0] - pi2_tmp_ptr[32]; + + i_y3 = (WORD32) pi2_tmp_ptr[8] + pi2_tmp_ptr[56] - pi2_tmp_ptr[24] - (pi2_tmp_ptr[24] >> 1); + + i_y4 = (pi2_tmp_ptr[16] >> 1) - pi2_tmp_ptr[48]; + + i_y5 = + (WORD32) (-pi2_tmp_ptr[8]) + pi2_tmp_ptr[56] + pi2_tmp_ptr[40] + (pi2_tmp_ptr[40] >> 1); + + i_y6 = pi2_tmp_ptr[16] + (pi2_tmp_ptr[48] >> 1); + + i_y7 = (WORD32) pi2_tmp_ptr[24] + pi2_tmp_ptr[40] + pi2_tmp_ptr[8] + (pi2_tmp_ptr[8] >> 1); + + /*------------------------------------------------------------------*/ + /* z0j = y0j + y6j */ + /* z1j = y1j + (y7j >> 2) */ + /* z2j = y2j + y4j */ + /* z3j = y3j + (y5j >> 2) */ + /* z4j = y2j -y4j */ + /* z5j = (y3j >> 2) -y5j */ + /* z6j = y0j -y6j */ + /* z7j = y7j -(y1j >> 2) */ + /*------------------------------------------------------------------*/ + i_z0 = i_y0 + i_y6; + i_z1 = i_y1 + (i_y7 >> 2); + i_z2 = i_y2 + i_y4; + i_z3 = i_y3 + (i_y5 >> 2); + i_z4 = i_y2 - i_y4; + i_z5 = (i_y3 >> 2) - i_y5; + i_z6 = i_y0 - i_y6; + i_z7 = i_y7 - (i_y1 >> 2); + + /*------------------------------------------------------------------*/ + /* x0j = z0j + z7j */ + /* x1j = z2j + z5j */ + /* x2j = z4j + z3j */ + /* x3j = z6j + z1j */ + /* x4j = z6j -z1j */ + /* x5j = z4j -z3j */ + /* x6j = z2j -z5j */ + /* x7j = z0j -z7j */ + /*------------------------------------------------------------------*/ + i_macro = ((i_z0 + i_z7 + 32) >> 6); + pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate); + *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]); + /* Change uc_recBuffer to Point to next element in the same column*/ + pu1_pred_ptr += i4_pred_stride; + pu1_out += i4_out_stride; + pi2_res_ptr += i4_res_stride; + pi2_res_pred_ptr += i4_res_pred_stride; + + i_macro = ((i_z2 + i_z5 + 32) >> 6); + pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate); + *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]); + pu1_pred_ptr += i4_pred_stride; + pu1_out += i4_out_stride; + pi2_res_ptr += i4_res_stride; + pi2_res_pred_ptr += i4_res_pred_stride; + + i_macro = ((i_z4 + i_z3 + 32) >> 6); + pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate); + *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]); + pu1_pred_ptr += i4_pred_stride; + pu1_out += i4_out_stride; + pi2_res_ptr += i4_res_stride; + pi2_res_pred_ptr += i4_res_pred_stride; + + i_macro = ((i_z6 + i_z1 + 32) >> 6); + pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate); + *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]); + pu1_pred_ptr += i4_pred_stride; + pu1_out += i4_out_stride; + pi2_res_ptr += i4_res_stride; + pi2_res_pred_ptr += i4_res_pred_stride; + + i_macro = ((i_z6 - i_z1 + 32) >> 6); + pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate); + *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]); + pu1_pred_ptr += i4_pred_stride; + pu1_out += i4_out_stride; + pi2_res_ptr += i4_res_stride; + pi2_res_pred_ptr += i4_res_pred_stride; + + i_macro = ((i_z4 - i_z3 + 32) >> 6); + pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate); + *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]); + pu1_pred_ptr += i4_pred_stride; + pu1_out += i4_out_stride; + pi2_res_ptr += i4_res_stride; + pi2_res_pred_ptr += i4_res_pred_stride; + + i_macro = ((i_z2 - i_z5 + 32) >> 6); + pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate); + *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]); + pu1_pred_ptr += i4_pred_stride; + pu1_out += i4_out_stride; + pi2_res_ptr += i4_res_stride; + pi2_res_pred_ptr += i4_res_pred_stride; + + i_macro = ((i_z0 - i_z7 + 32) >> 6); + pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate); + *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]); + + pi2_tmp_ptr++; + pu1_out_ptr++; + pu1_pred++; + pi2_res++; + pi2_res_pred++; + } +} + +/* + ******************************************************************************** + * + * @brief This function reconstructs a 4x4 sub block from quantized resiude and + * prediction buffer + * + * @par Description: + * The quantized residue is first inverse quantized, then inverse transformed. + * This inverse transformed content is added to the prediction buffer to recon- + * struct the end output + * + * @param[in] pi2_src + * quantized 4x4 block + * + * @param[in] pu1_pred + * prediction 4x4 block + * + * @param[out] pu1_out + * reconstructed 4x4 block + * + * @param[in] src_strd + * quantization buffer stride + * + * @param[in] i4_pred_stride, + * Prediction buffer stride + * + * @param[in] i4_out_stride + * recon buffer Stride + * + * @param[in] pu2_scaling_list + * pointer to scaling list + * + * @param[in] pu2_norm_adjust + * pointer to inverse scale matrix + * + * @param[in] u4_qp_div_6 + * Floor (qp/6) + * + * @param[in] pi4_tmp + * temporary buffer of size 1*16 + * + * @returns none + * + * @remarks none + * + ******************************************************************************* + */ +void isvc_iquant_itrans_recon_chroma_4x4(buffer_container_t *ps_src, buffer_container_t *ps_pred, + buffer_container_t *ps_res_pred, + buffer_container_t *ps_res, buffer_container_t *ps_rec, + iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants, + WORD16 *pi2_tmp, WORD16 *pi2_dc_src, + WORD32 i4_iq_start_idx, UWORD8 u1_res_accumulate) +{ + WORD16 x0, x1, x2, x3, i; + WORD32 q0, q1, q2, q3; + WORD16 i_macro; + + WORD16 *pi2_src = ps_src->pv_data; + WORD16 *pi2_res = ps_res->pv_data; + WORD16 *pi2_res_pred = ps_res_pred->pv_data; + UWORD8 *pu1_pred = ps_pred->pv_data; + UWORD8 *pu1_out = ps_rec->pv_data; + WORD32 i4_src_stride = ps_src->i4_data_stride; + WORD32 i4_res_stride = ps_res->i4_data_stride; + WORD32 i4_res_pred_stride = ps_res_pred->i4_data_stride; + WORD32 i4_pred_stride = ps_pred->i4_data_stride; + WORD32 i4_out_stride = ps_rec->i4_data_stride; + const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat; + const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat; + UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6; + WORD16 *pi2_src_ptr = pi2_src; + WORD16 *pi2_tmp_ptr = pi2_tmp; + UWORD8 *pu1_pred_ptr = pu1_pred; + WORD16 *pi2_res_ptr = pi2_res; + WORD16 *pi2_res_pred_ptr = pi2_res_pred; + UWORD8 *pu1_out_ptr = pu1_out; + WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0; + + UNUSED(i4_iq_start_idx); + + /* inverse quant */ + /*horizontal inverse transform */ + for(i = 0; i < SUB_BLK_WIDTH_4x4; i++) + { + if(i == 0) + { + q0 = pi2_dc_src[0]; + } + else + { + q0 = pi2_src_ptr[0]; + INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4); + } + + q2 = pi2_src_ptr[2]; + INV_QUANT(q2, pu2_iscal_mat[2], pu2_weigh_mat[2], u4_qp_div_6, rnd_fact, 4); + + x0 = q0 + q2; + x1 = q0 - q2; + + q1 = pi2_src_ptr[1]; + INV_QUANT(q1, pu2_iscal_mat[1], pu2_weigh_mat[1], u4_qp_div_6, rnd_fact, 4); + + q3 = pi2_src_ptr[3]; + INV_QUANT(q3, pu2_iscal_mat[3], pu2_weigh_mat[3], u4_qp_div_6, rnd_fact, 4); + + x2 = (q1 >> 1) - q3; + x3 = q1 + (q3 >> 1); + + pi2_tmp_ptr[0] = x0 + x3; + pi2_tmp_ptr[1] = x1 + x2; + pi2_tmp_ptr[2] = x1 - x2; + pi2_tmp_ptr[3] = x0 - x3; + + pi2_src_ptr += i4_src_stride; + pi2_tmp_ptr += SUB_BLK_WIDTH_4x4; + pu2_iscal_mat += SUB_BLK_WIDTH_4x4; + pu2_weigh_mat += SUB_BLK_WIDTH_4x4; + } + + /* vertical inverse transform */ + pi2_tmp_ptr = pi2_tmp; + for(i = 0; i < SUB_BLK_WIDTH_4x4; i++) + { + pu1_pred_ptr = pu1_pred; + pi2_res_ptr = pi2_res; + pi2_res_pred_ptr = pi2_res_pred; + pu1_out = pu1_out_ptr; + + x0 = (pi2_tmp_ptr[0] + pi2_tmp_ptr[8]); + x1 = (pi2_tmp_ptr[0] - pi2_tmp_ptr[8]); + x2 = (pi2_tmp_ptr[4] >> 1) - pi2_tmp_ptr[12]; + x3 = pi2_tmp_ptr[4] + (pi2_tmp_ptr[12] >> 1); + + /* inverse prediction */ + i_macro = x0 + x3; + i_macro = ((i_macro + 32) >> 6); + pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate); + *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]); + pu1_pred_ptr += i4_pred_stride; + pu1_out += i4_out_stride; + pi2_res_ptr += i4_res_stride; + pi2_res_pred_ptr += i4_res_pred_stride; + + i_macro = x1 + x2; + i_macro = ((i_macro + 32) >> 6); + pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate); + *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]); + pu1_pred_ptr += i4_pred_stride; + pu1_out += i4_out_stride; + pi2_res_ptr += i4_res_stride; + pi2_res_pred_ptr += i4_res_pred_stride; + + i_macro = x1 - x2; + i_macro = ((i_macro + 32) >> 6); + pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate); + *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]); + pu1_pred_ptr += i4_pred_stride; + pu1_out += i4_out_stride; + pi2_res_ptr += i4_res_stride; + pi2_res_pred_ptr += i4_res_pred_stride; + + i_macro = x0 - x3; + i_macro = ((i_macro + 32) >> 6); + pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate); + *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]); + + pi2_tmp_ptr++; + pu1_out_ptr += 2; // Interleaved store for output + pu1_pred += 2; // Interleaved load for pred buffer + pi2_res += 2; + pi2_res_pred += 2; + } +} + +/* + ******************************************************************************** + * + * @brief This function reconstructs a 4x4 sub block from quantized resiude and + * prediction buffer if only dc value is present for residue + * + * @par Description: + * The quantized residue is first inverse quantized, + * This inverse quantized content is added to the prediction buffer to recon- + * struct the end output + * + * @param[in] pi2_src + * quantized dc coefficient + * + * @param[in] pu1_pred + * prediction 4x4 block in interleaved format + * + * @param[in] i4_pred_stride, + * Prediction buffer stride in interleaved format + * + * @param[in] i4_out_stride + * recon buffer Stride + * + * @returns none + * + * @remarks none + * + ******************************************************************************* + */ + +void isvc_iquant_itrans_recon_chroma_4x4_dc(buffer_container_t *ps_src, buffer_container_t *ps_pred, + buffer_container_t *ps_res_pred, + buffer_container_t *ps_res, buffer_container_t *ps_rec, + iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants, + WORD16 *pi2_tmp, WORD16 *pi2_dc_src, + WORD32 i4_iq_start_idx, UWORD8 u1_res_accumulate) +{ + WORD32 q0; + WORD16 i_macro, i; + + WORD16 *pi2_src = ps_src->pv_data; + WORD16 *pi2_res = ps_res->pv_data; + WORD16 *pi2_res_pred = ps_res_pred->pv_data; + UWORD8 *pu1_pred = ps_pred->pv_data; + UWORD8 *pu1_out = ps_rec->pv_data; + WORD32 i4_res_stride = ps_res->i4_data_stride; + WORD32 i4_res_pred_stride = ps_res_pred->i4_data_stride; + WORD32 i4_pred_stride = ps_pred->i4_data_stride; + WORD32 i4_out_stride = ps_rec->i4_data_stride; + const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat; + const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat; + UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6; + UWORD8 *pu1_pred_ptr = pu1_pred; + WORD16 *pi2_res_ptr = pi2_res; + WORD16 *pi2_res_pred_ptr = pi2_res_pred; + UWORD8 *pu1_out_ptr = pu1_out; + + UNUSED(pi2_src); + UNUSED(pu2_iscal_mat); + UNUSED(pu2_weigh_mat); + UNUSED(u4_qp_div_6); + UNUSED(pi2_tmp); + UNUSED(i4_iq_start_idx); + + q0 = pi2_dc_src[0]; // Restoring dc value for intra case3 + i_macro = ((q0 + 32) >> 6); + + for(i = 0; i < SUB_BLK_WIDTH_4x4; i++) + { + pu1_pred_ptr = pu1_pred; + pi2_res_ptr = pi2_res; + pi2_res_pred_ptr = pi2_res_pred; + pu1_out = pu1_out_ptr; + + /* inverse prediction */ + pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate); + *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]); + pu1_pred_ptr += i4_pred_stride; + pu1_out += i4_out_stride; + pi2_res_ptr += i4_res_stride; + pi2_res_pred_ptr += i4_res_pred_stride; + + pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate); + *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]); + pu1_pred_ptr += i4_pred_stride; + pu1_out += i4_out_stride; + pi2_res_ptr += i4_res_stride; + pi2_res_pred_ptr += i4_res_pred_stride; + + pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate); + *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]); + pu1_pred_ptr += i4_pred_stride; + pu1_out += i4_out_stride; + pi2_res_ptr += i4_res_stride; + pi2_res_pred_ptr += i4_res_pred_stride; + + pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate); + *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]); + + pu1_out_ptr += 2; + pu1_pred += 2; + pi2_res += 2; + pi2_res_pred += 2; + } +} + +/* + ******************************************************************************** + * + * @brief This function reconstructs a 4x4 sub block from quantized residue and + * prediction buffer assuming cbf=0 + * + * @param[in] ps_src + * quantized 4x4 block + * + * @param[in] ps_pred + * prediction 4x4 block + * + * @param[in] ps_res + * residue 4x4 block + * + * @param[in] ps_res_pred + * residual pred 4x4 block + * + * @param[out] ps_out + * reconstructed 4x4 block + * + * @param[out] ps_iq_it_res_rec_constants + * reconstructed 4x4 block + * + * @param[out] pi2_tmp + * scratch buf + * + * @param[out] pi2_dc_src + * Pointer to dc coeff location + * + * @param[out] i4_iq_start_idx + * Idx of first coeff + * + * @param[in] pi2_tmp + * temporary buffer of size 1*16 + * + * @param[in] u1_res_accumulate + * Flag to control residual accumulation + * + * @returns none + * + ******************************************************************************* + */ +void isvc_zcbf_iquant_itrans_recon_4x4(buffer_container_t *ps_src, buffer_container_t *ps_pred, + buffer_container_t *ps_res_pred, buffer_container_t *ps_res, + buffer_container_t *ps_rec, + iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants, + WORD16 *pi2_tmp, WORD16 *pi2_dc_src, WORD32 i4_iq_start_idx, + UWORD8 u1_res_accumulate) +{ + WORD32 i, j; + + UWORD8 *pu1_out = ps_rec->pv_data; + WORD16 *pi2_res = ps_res->pv_data; + WORD16 *pi2_res_pred = ps_res_pred->pv_data; + UWORD8 *pu1_pred = ps_pred->pv_data; + WORD32 i4_out_stride = ps_rec->i4_data_stride; + WORD32 i4_res_stride = ps_res->i4_data_stride; + WORD32 i4_res_pred_stride = ps_res_pred->i4_data_stride; + WORD32 i4_pred_stride = ps_pred->i4_data_stride; + + UNUSED(ps_src); + UNUSED(ps_iq_it_res_rec_constants); + UNUSED(pi2_tmp); + UNUSED(pi2_dc_src); + UNUSED(i4_iq_start_idx); + + if(u1_res_accumulate) + { + for(i = 0; i < SUB_BLK_HEIGHT_4x4; i++) + { + for(j = 0; j < SUB_BLK_WIDTH_4x4; j++) + { + pi2_res[j + i * i4_res_stride] = isvc_get_residue( + 0, pi2_res_pred[j + i * i4_res_pred_stride], u1_res_accumulate); + pu1_out[j + i * i4_out_stride] = + CLIP3(0, UINT8_MAX, + pu1_pred[j + i * i4_pred_stride] + pi2_res[j + i * i4_res_stride]); + } + } + } + else + { + for(i = 0; i < SUB_BLK_HEIGHT_4x4; i++) + { + for(j = 0; j < SUB_BLK_WIDTH_4x4; j++) + { + pi2_res[j + i * i4_res_stride] = 0; + pu1_out[j + i * i4_out_stride] = pu1_pred[j + i * i4_pred_stride]; + } + } + } +} + +/* + ******************************************************************************** + * + * @brief This function reconstructs a 4x4 sub block from quantized residue and + * prediction buffer assuming cbf=0 + * + * @param[in] ps_src + * quantized 4x4 block + * + * @param[in] ps_pred + * prediction 4x4 block + * + * @param[in] ps_res + * residue 4x4 block + * + * @param[in] ps_res_pred + * residual pred 4x4 block + * + * @param[out] ps_out + * reconstructed 4x4 block + * + * @param[out] ps_iq_it_res_rec_constants + * reconstructed 4x4 block + * + * @param[out] pi2_tmp + * scratch buf + * + * @param[out] pi2_dc_src + * Pointer to dc coeff location + * + * @param[out] i4_iq_start_idx + * Idx of first coeff + * + * @param[in] pi2_tmp + * temporary buffer of size 1*16 + * + * @param[in] u1_res_accumulate + * Flag to control residual accumulation + * + * @returns none + * + ******************************************************************************* + */ +void isvc_chroma_zcbf_iquant_itrans_recon_4x4( + buffer_container_t *ps_src, buffer_container_t *ps_pred, buffer_container_t *ps_res_pred, + buffer_container_t *ps_res, buffer_container_t *ps_rec, + iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants, WORD16 *pi2_tmp, WORD16 *pi2_dc_src, + WORD32 i4_iq_start_idx, UWORD8 u1_res_accumulate) +{ + WORD32 i, j; + + UWORD8 *pu1_out = ps_rec->pv_data; + WORD32 i4_out_stride = ps_rec->i4_data_stride; + WORD16 *pi2_res = ps_res->pv_data; + WORD16 *pi2_res_pred = ps_res_pred->pv_data; + UWORD8 *pu1_pred = ps_pred->pv_data; + WORD32 i4_res_stride = ps_res->i4_data_stride; + WORD32 i4_res_pred_stride = ps_res_pred->i4_data_stride; + WORD32 i4_pred_stride = ps_pred->i4_data_stride; + + UNUSED(ps_src); + UNUSED(ps_iq_it_res_rec_constants); + UNUSED(pi2_tmp); + UNUSED(pi2_dc_src); + UNUSED(i4_iq_start_idx); + + if(u1_res_accumulate) + { + for(i = 0; i < SUB_BLK_HEIGHT_4x4; i++) + { + for(j = 0; j < SUB_BLK_WIDTH_4x4 * 2; j += 2) + { + pi2_res[j + i * i4_res_stride] = isvc_get_residue( + 0, pi2_res_pred[j + i * i4_res_pred_stride], u1_res_accumulate); + pu1_out[j + i * i4_out_stride] = CLIP3( + 0, UINT8_MAX, + ((WORD16) pu1_pred[j + i * i4_pred_stride]) + pi2_res[j + i * i4_res_stride]); + } + } + } + else + { + for(i = 0; i < SUB_BLK_HEIGHT_4x4; i++) + { + for(j = 0; j < SUB_BLK_WIDTH_4x4 * 2; j += 2) + { + pi2_res[j + i * i4_res_stride] = 0; + pu1_out[j + i * i4_out_stride] = pu1_pred[j + i * i4_pred_stride]; + } + } + } +} diff --git a/common/svc/isvc_macros.h b/common/svc/isvc_macros.h new file mode 100644 index 0000000..4bcab00 --- /dev/null +++ b/common/svc/isvc_macros.h @@ -0,0 +1,37 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** +******************************************************************************* +* @file +* isvc_macros.h +* +* @brief +* Contains macro definitions used in SVC +* +******************************************************************************* +*/ + +#ifndef _ISVC_MACROS_H_ +#define _ISVC_MACROS_H_ + +#define FORCEINLINE __attribute__((always_inline)) inline + +#endif diff --git a/common/svc/isvc_mem_fns.c b/common/svc/isvc_mem_fns.c new file mode 100644 index 0000000..345715a --- /dev/null +++ b/common/svc/isvc_mem_fns.c @@ -0,0 +1,317 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ +/** + ******************************************************************************* + * @file + * isvc_mem_fns.c + * + * @brief + * Functions used for memory operations + * + * @author + * Ittiam + * + * @par List of Functions: + * isvc_memcpy() + * isvc_memcpy_mul_8() + * isvc_memset() + * isvc_memset_mul_8() + * isvc_memset_16bit() + * isvc_memset_16bit_mul_8() + * isvc_memory_alloc() + * isvc_memory_free() + * + * @remarks + * None + * + ****************************************************************************** + */ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ +/* System include files */ +#include +#include +#include +#include +#include + +/* User include files */ +#include "ih264_typedefs.h" +#include "isvc_mem_fns.h" + +/** +******************************************************************************** +* @brief copies a 2d blk from one location to another +* +* @param[out] pu1_dst : dst pointer +* +* @param[in] i4_dst_stride: stride of destination +* +* @param[in] pu1_src : src ptr +* +* @param[in] i4_src_stride: stride of src +* +* @param[in] i4_blk_wd : blk width +* +* @param[in] i4_blk_ht : blk height +* +* @return void +******************************************************************************** +*/ + +void isvc_copy_2d(UWORD8 *pu1_dst, WORD32 i4_dst_stride, UWORD8 *pu1_src, WORD32 i4_src_stride, + WORD32 i4_blk_wd, WORD32 i4_blk_ht) +{ + WORD32 i; + + for(i = 0; i < i4_blk_ht; i++) + { + memmove(pu1_dst, pu1_src, i4_blk_wd * sizeof(pu1_dst[0])); + + pu1_dst += i4_dst_stride; + pu1_src += i4_src_stride; + } +} + +/** +******************************************************************************** +* @brief memsets a 2d blk +* +* @param[out] pu1_dst : dst pointer +* +* @param[in] i4_dst_stride: stride of destination +* +* @param[in] i4_blk_wd : blk width +* +* @param[in] i4_blk_ht : blk height +* +* @return void +******************************************************************************** +*/ +void isvc_memset_2d(UWORD8 *pu1_dst, WORD32 i4_dst_stride, UWORD8 u1_val, WORD32 i4_blk_wd, + WORD32 i4_blk_ht) +{ + WORD32 i; + + for(i = 0; i < i4_blk_ht; i++) + { + memset(pu1_dst, u1_val, i4_blk_wd); + + pu1_dst += i4_dst_stride; + } +} + +/** + ******************************************************************************* + * + * @brief + * Function for copying to an interleaved destination + * + * @par Description: + * Copies the array of width 'wd' and height 'ht' from the location pointed + * by 'src' to the location pointed by 'dst' + * + * @param[in] pu1_src + * UWORD8 pointer to the source + * + * @param[out] pu1_dst + * UWORD8 pointer to the destination + * + * @param[in] src_strd + * integer source stride + * + * @param[in] dst_strd + * integer destination stride + * + * @param[in] ht + * integer height of the array + * + * @param[in] wd + * integer width of the array + * + * @returns + * + * @remarks + * The alternate elements of src will be copied to alternate locations in dsr + * Other locations are not touched + * + ******************************************************************************* + */ +void isvc_interleaved_copy(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, WORD32 dst_strd, + WORD32 ht, WORD32 wd) +{ + WORD32 row, col; + wd *= 2; + + for(row = 0; row < ht; row++) + { + for(col = 0; col < wd; col += 2) + { + pu1_dst[col] = pu1_src[col]; + } + + pu1_src += src_strd; + pu1_dst += dst_strd; + } +} + +/** + ******************************************************************************* + * + * @brief + * Function for copying to an interleaved destination + * + * @par Description: + * Copies the array of width 'wd' and height 'ht' from the location pointed + * by 'src' to the location pointed by 'dst' + * + * @param[in] pu1_src + * UWORD8 pointer to the source + * + * @param[out] pu1_dst + * UWORD8 pointer to the destination + * + * @param[in] src_strd + * integer source stride + * + * @param[in] dst_strd + * integer destination stride + * + * @param[in] ht + * integer height of the array + * + * @param[in] wd + * integer width of the array + * + * @returns + * + * @remarks + * The alternate elements of src will be copied to alternate locations in dsr + * Other locations are not touched + * + ******************************************************************************* + */ +void isvc_16bit_interleaved_copy(WORD16 *pi2_src, WORD16 *pi2_dst, WORD32 src_strd, WORD32 dst_strd, + WORD32 ht, WORD32 wd) +{ + WORD32 row, col; + wd *= 2; + + for(row = 0; row < ht; row++) + { + for(col = 0; col < wd; col += 2) + { + pi2_dst[col] = pi2_src[col]; + } + + pi2_src += src_strd; + pi2_dst += dst_strd; + } +} + +/** + ******************************************************************************* + * + * @brief + * Function for memsetting to an interleaved destination + * + * @par Description: + * Memsets the array of width 'wd' and height 'ht' pointed by 'src' + * + * @param[in] pu1_src + * UWORD8 pointer to the source + * + * @param[in] src_strd + * integer source stride + * + * @param[in] value + * Value to set + * + * @param[in] ht + * integer height of the array + * + * @param[in] wd + * integer width of the array + * + * @returns + * + * @remarks + * The alternate elements of src will be copied to alternate locations in dsr + * Other locations are not touched + * + ******************************************************************************* + */ +void isvc_16bit_interleaved_memset(WORD16 *pi2_src, WORD32 i4_src_strd, WORD16 i2_value, + WORD32 i4_wd, WORD32 i4_ht) +{ + WORD32 row, col; + + i4_wd *= 2; + + for(row = 0; row < i4_ht; row++) + { + for(col = 0; col < i4_wd; col += 2) + { + pi2_src[col] = i2_value; + } + + pi2_src += i4_src_strd; + } +} + +/** + ******************************************************************************* + * + * @brief + * Checks if any pixel in a block is non-zero + * + * @param[in] pu1_data + * UWORD8 pointer to the block to be checked + * + * @param[in] i4_data_strd + * Stride of data buffer + * + * @param[in] u4_wd + * Width of the block + * + * @param[in] u4_ht + * Height of the block + * + ******************************************************************************* + */ +UWORD8 isvc_is_nonzero_blk(UWORD8 *pu1_data, WORD32 i4_data_strd, UWORD32 u4_wd, UWORD32 u4_ht) +{ + UWORD32 i, j; + + for(i = 0; i < u4_ht; i++) + { + for(j = 0; j < u4_wd; j++) + { + if(pu1_data[j + i * i4_data_strd]) + { + return 1; + } + } + } + + return 0; +} diff --git a/common/svc/isvc_mem_fns.h b/common/svc/isvc_mem_fns.h new file mode 100644 index 0000000..a4d95f7 --- /dev/null +++ b/common/svc/isvc_mem_fns.h @@ -0,0 +1,109 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ +/** +******************************************************************************* +* @file +* isvc_mem_fns.h +* +* @brief +* Function declarations used for memory functions +* +* @author +* Ittiam +* +* @remarks +* None +* +******************************************************************************* +*/ +#ifndef _ISVC_MEM_FNS_H_ +#define _ISVC_MEM_FNS_H_ + +#include "ih264_typedefs.h" + +typedef void *FT_MEM_ALLOC(UWORD32 u4_size); + +typedef void FT_MEM_FREE(void *pv_mem); + +typedef void FT_MEMCPY(UWORD8 *pu1_dst, UWORD8 *pu1_src, UWORD32 num_bytes); + +typedef void FT_COPY_2D(UWORD8 *pu1_dst, WORD32 i4_dst_stride, UWORD8 *pu1_src, + WORD32 i4_src_stride, WORD32 i4_blk_wd, WORD32 i4_blk_ht); + +typedef void FT_MEMSET_2D(UWORD8 *pu1_dst, WORD32 i4_dst_stride, UWORD8 u1_val, WORD32 i4_blk_wd, + WORD32 i4_blk_ht); + +typedef void FT_MEMSET(UWORD8 *pu1_dst, UWORD8 value, UWORD32 num_bytes); + +typedef void FT_MEMSET_16BIT(UWORD16 *pu2_dst, UWORD16 value, UWORD32 num_words); + +typedef void FT_16BIT_INTERLEAVED_COPY(WORD16 *pi2_src, WORD16 *pi2_dst, WORD32 src_strd, + WORD32 dst_strd, WORD32 ht, WORD32 wd); + +typedef void FT_16BIT_INTERLEAVED_MEMSET(WORD16 *pi2_src, WORD32 i4_src_strd, WORD16 i2_value, + WORD32 i4_wd, WORD32 i4_ht); + +typedef UWORD8 FT_NONZERO_CHECKER(UWORD8 *pu1_data, WORD32 i4_data_strd, UWORD32 u4_wd, + UWORD32 u4_ht); + +/* C function declarations */ +extern FT_MEMCPY ih264_memcpy; +extern FT_MEMCPY ih264_memcpy_mul_8; +extern FT_MEMSET ih264_memset; +extern FT_MEMSET ih264_memset_mul_8; +extern FT_MEMSET_16BIT ih264_memset_16bit; +extern FT_MEMSET_16BIT ih264_memset_16bit_mul_8; +extern FT_COPY_2D isvc_copy_2d; +extern FT_MEMSET_2D isvc_memset_2d; +extern FT_16BIT_INTERLEAVED_COPY isvc_16bit_interleaved_copy; +extern FT_16BIT_INTERLEAVED_MEMSET isvc_16bit_interleaved_memset; +extern FT_NONZERO_CHECKER isvc_is_nonzero_blk; +extern FT_MEM_ALLOC isvc_memory_alloc; +extern FT_MEM_FREE isvc_memory_free; + +/* A9 Q function declarations */ +extern FT_MEMCPY isvc_memcpy_a9q; +extern FT_MEMCPY ih264_memcpy_mul_8_a9q; +extern FT_MEMSET ih264_memset_a9q; +extern FT_MEMSET ih264_memset_mul_8_a9q; +extern FT_MEMSET_16BIT ih264_memset_16bit_a9q; +extern FT_MEMSET_16BIT ih264_memset_16bit_mul_8_a9q; + +/* AV8 function declarations */ +extern FT_MEMCPY ih264_memcpy_av8; +extern FT_MEMCPY ih264_memcpy_mul_8_av8; +extern FT_MEMSET ih264_memset_av8; +extern FT_MEMSET ih264_memset_mul_8_av8; +extern FT_MEMSET_16BIT ih264_memset_16bit_av8; +extern FT_MEMSET_16BIT ih264_memset_16bit_mul_8_av8; + +/* NEON function declarations */ +extern FT_MEMSET_2D isvc_memset_2d_neon; + +/* SSSE3 variants */ +extern FT_MEMCPY ih264_memcpy_mul_8_ssse3; +extern FT_MEMSET ih264_memset_mul_8_ssse3; +extern FT_MEMSET_16BIT ih264_memset_16bit_mul_8_ssse3; +extern FT_COPY_2D isvc_copy_2d_ssse3; + +/* SSE4.2 variants */ +extern FT_MEMSET_2D isvc_memset_2d_sse42; + +#endif diff --git a/common/svc/isvc_resi_trans_quant.c b/common/svc/isvc_resi_trans_quant.c new file mode 100644 index 0000000..0b48779 --- /dev/null +++ b/common/svc/isvc_resi_trans_quant.c @@ -0,0 +1,840 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ +/** + ******************************************************************************* + * @file + * ih264_resi_trans_quant.c + * + * @brief + * Contains function definitions single stage forward transform for H.264 + * It will calculate the residue, do the cf and then do quantization + * + * @author + * Ittiam + * + * @par List of Functions: + * - ih264_resi_trans_quant_4x4() + * - ih264_resi_trans_quant_chroma_4x4 + * - ih264_hadamard_quant_4x4 + * - ih264_hadamard_quant_2x2_uv + * - ih264_resi_trans_quant_8x8 + * + * @remarks + ******************************************************************************* + */ +/* System include files */ +#include +#include + +/* User include files */ +#include "ih264_typedefs.h" +#include "ih264_defs.h" +#include "ih264_size_defs.h" +#include "ih264_macros.h" +#include "ih264_trans_macros.h" +#include "ih264_trans_data.h" +#include "ih264_structs.h" +#include "isvc_trans_quant_itrans_iquant.h" + +static FORCEINLINE WORD16 isvc_subtract_upsampled_res(WORD16 i2_residue, WORD16 i2_upsampled_res) +{ + return (CLIP3(-((WORD16) UINT8_MAX), ((WORD16) UINT8_MAX), i2_residue - i2_upsampled_res)); +} + +/** + ******************************************************************************* + * + * @brief + * This function performs forward transform and quantization on a 4*4 block + * + * @par Description: + * The function accepts source buffer and estimation buffer. From these, it + * computes the residue. This is residue is then transformed and quantized. + * The transform and quantization are in placed computed. They use the residue + * buffer for this. + * + * @param[in] pu1_src + * Pointer to source sub-block + * + * @param[in] pu1_pred + * Pointer to prediction sub-block + * + * @param[in] pi2_out + * Pointer to residual sub-block + * + * @param[in] i4_src_stride + * Source stride + * + * @param[in] i4_pred_stride + * Prediction stride + * + * @param[in] dst_strd + * Destination stride + * + * @param[in] u4_qbits + * QP_BITS_h264_4x4 + floor(QP/6) + * + * @param[in] pu2_threshold_matrix + * Pointer to Forward Quant Threshold Matrix + * + * @param[in] pu2_scale_matrix + * Pointer to Forward Quant Scale Matrix + * + * @param[in] u4_round_factor + * Quantization Round factor + * + * @param[out] pu1_nnz + * Total non-zero coefficients in the current sub-block + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ +void isvc_resi_trans_quant_4x4(buffer_container_t *ps_src, buffer_container_t *ps_pred, + buffer_container_t *ps_out, buffer_container_t *ps_upsampled_res, + resi_trans_quant_constants_t *ps_quant_constants, UWORD8 *pu1_nnz, + WORD16 *pi2_dc_out, UWORD8 u1_use_upsampled_res) +{ + UWORD32 i; + WORD32 x0, x1, x2, x3, x4, x5, x6, x7; + WORD32 i4_value; + + UWORD8 *pu1_src = ps_src->pv_data; + UWORD8 *pu1_pred = ps_pred->pv_data; + WORD16 *pi2_out = ps_out->pv_data; + WORD16 *pi2_upsampled_res = ps_upsampled_res ? ps_upsampled_res->pv_data : NULL; + WORD32 i4_src_stride = ps_src->i4_data_stride; + WORD32 i4_pred_stride = ps_pred->i4_data_stride; + WORD32 i4_upsampled_res_stride = ps_upsampled_res ? ps_upsampled_res->i4_data_stride : 0; + WORD16 *pi2_out_tmp = pi2_out; + UWORD32 u4_nonzero_coeff = 0; + const UWORD16 *pu2_scale_matrix = ps_quant_constants->pu2_scale_matrix; + const UWORD16 *pu2_threshold_matrix = ps_quant_constants->pu2_threshold_matrix; + UWORD32 u4_qbits = ps_quant_constants->u4_qbits; + UWORD32 u4_round_factor = ps_quant_constants->u4_round_factor; + + for(i = 0; i < SUB_BLK_WIDTH_4x4; i++) + { + /* computing prediction error (residue) */ + x4 = pu1_src[0] - pu1_pred[0]; + x5 = pu1_src[1] - pu1_pred[1]; + x6 = pu1_src[2] - pu1_pred[2]; + x7 = pu1_src[3] - pu1_pred[3]; + + if(u1_use_upsampled_res) + { + x4 = isvc_subtract_upsampled_res(x4, pi2_upsampled_res[0]); + x5 = isvc_subtract_upsampled_res(x5, pi2_upsampled_res[1]); + x6 = isvc_subtract_upsampled_res(x6, pi2_upsampled_res[2]); + x7 = isvc_subtract_upsampled_res(x7, pi2_upsampled_res[3]); + } + + /* Horizontal transform */ + x0 = x4 + x7; + x1 = x5 + x6; + x2 = x5 - x6; + x3 = x4 - x7; + + pi2_out_tmp[0] = x0 + x1; + pi2_out_tmp[1] = (x3 << 1) + x2; + pi2_out_tmp[2] = x0 - x1; + pi2_out_tmp[3] = x3 - (x2 << 1); + + /* pointing to next row; */ + pu1_src += i4_src_stride; + pu1_pred += i4_pred_stride; + pi2_out_tmp += 4; + pi2_upsampled_res += i4_upsampled_res_stride; + } + + pi2_out_tmp = pi2_out; + + for(i = 0; i < SUB_BLK_WIDTH_4x4; i++) + { + /* Vertical transform and quantization */ + x4 = pi2_out_tmp[0]; + x5 = pi2_out_tmp[4]; + x6 = pi2_out_tmp[8]; + x7 = pi2_out_tmp[12]; + + x0 = x4 + x7; + x1 = x5 + x6; + x2 = x5 - x6; + x3 = x4 - x7; + + /* quantization is done in place */ + + i4_value = x0 + x1; + + if(i == 0) + { + (*pi2_dc_out) = i4_value; + } + + FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits, + u4_nonzero_coeff); + pi2_out_tmp[0] = i4_value; + + i4_value = (x3 << 1) + x2; + FWD_QUANT(i4_value, pu2_threshold_matrix[4], pu2_scale_matrix[4], u4_round_factor, u4_qbits, + u4_nonzero_coeff); + pi2_out_tmp[4] = i4_value; + + i4_value = x0 - x1; + FWD_QUANT(i4_value, pu2_threshold_matrix[8], pu2_scale_matrix[8], u4_round_factor, u4_qbits, + u4_nonzero_coeff); + pi2_out_tmp[8] = i4_value; + + i4_value = x3 - (x2 << 1); + FWD_QUANT(i4_value, pu2_threshold_matrix[12], pu2_scale_matrix[12], u4_round_factor, + u4_qbits, u4_nonzero_coeff); + pi2_out_tmp[12] = i4_value; + + pi2_out_tmp++; + pu2_scale_matrix++; + pu2_threshold_matrix++; + } + + /* Return total nonzero coefficients in the current sub block */ + *pu1_nnz = u4_nonzero_coeff; +} + +/** + ******************************************************************************* + * + * @brief + * This function performs forward transform and quantization on a 4*4 chroma + *block with interleaved values + * + * @par Description: + * The function accepts source buffer and estimation buffer. From these, it + * computes the residue. This is residue is then transformed and quantized. + * The transform and quantization are in placed computed. They use the residue + * buffer for this. + * + * @param[in] pu1_src + * Pointer to source sub-block + * + * @param[in] pu1_pred + * Pointer to prediction sub-block + * + * @param[in] pi2_out + * Pointer to residual sub-block + * + * @param[in] i4_src_stride + * Source stride + * + * @param[in] i4_pred_stride + * Prediction stride + * + * @param[in] dst_strd + * Destination stride + * + * @param[in] u4_qbits + * QP_BITS_h264_4x4 + floor(QP/6) + * + * @param[in] pu2_threshold_matrix + * Pointer to Forward Quant Threshold Matrix + * + * @param[in] pu2_scale_matrix + * Pointer to Forward Quant Scale Matrix + * + * @param[in] u4_round_factor + * Quantization Round factor + * + * @param[out] pu1_nnz + * Total non-zero coefficients in the current sub-block + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ +void isvc_resi_trans_quant_chroma_4x4(buffer_container_t *ps_src, buffer_container_t *ps_pred, + buffer_container_t *ps_out, + buffer_container_t *ps_upsampled_res, + resi_trans_quant_constants_t *ps_quant_constants, + UWORD8 *pu1_nnz, WORD16 *pi2_dc_out, + UWORD8 u1_use_upsampled_res) +{ + UWORD32 i; + WORD32 x0, x1, x2, x3, x4, x5, x6, x7; + WORD32 i4_value; + + UWORD8 *pu1_src = ps_src->pv_data; + UWORD8 *pu1_pred = ps_pred->pv_data; + WORD16 *pi2_out = ps_out->pv_data; + WORD16 *pi2_upsampled_res = ps_upsampled_res ? ps_upsampled_res->pv_data : NULL; + WORD32 i4_src_stride = ps_src->i4_data_stride; + WORD32 i4_pred_stride = ps_pred->i4_data_stride; + WORD32 i4_upsampled_res_stride = ps_upsampled_res ? ps_upsampled_res->i4_data_stride : 0; + WORD16 *pi2_out_tmp = pi2_out; + UWORD32 u4_nonzero_coeff = 0; + const UWORD16 *pu2_scale_matrix = ps_quant_constants->pu2_scale_matrix; + const UWORD16 *pu2_threshold_matrix = ps_quant_constants->pu2_threshold_matrix; + UWORD32 u4_qbits = ps_quant_constants->u4_qbits; + UWORD32 u4_round_factor = ps_quant_constants->u4_round_factor; + + for(i = 0; i < SUB_BLK_WIDTH_4x4; i++) + { + /* computing prediction error (residue) */ + x4 = pu1_src[0] - pu1_pred[0]; + x5 = pu1_src[2] - pu1_pred[2]; + x6 = pu1_src[4] - pu1_pred[4]; + x7 = pu1_src[6] - pu1_pred[6]; + + if(u1_use_upsampled_res) + { + x4 = isvc_subtract_upsampled_res(x4, pi2_upsampled_res[0]); + x5 = isvc_subtract_upsampled_res(x5, pi2_upsampled_res[1]); + x6 = isvc_subtract_upsampled_res(x6, pi2_upsampled_res[2]); + x7 = isvc_subtract_upsampled_res(x7, pi2_upsampled_res[3]); + } + + /* Horizontal transform */ + x0 = x4 + x7; + x1 = x5 + x6; + x2 = x5 - x6; + x3 = x4 - x7; + + pi2_out_tmp[0] = x0 + x1; + pi2_out_tmp[1] = (x3 << 1) + x2; + pi2_out_tmp[2] = x0 - x1; + pi2_out_tmp[3] = x3 - (x2 << 1); + + /* pointing to next row; */ + pu1_src += i4_src_stride; + pu1_pred += i4_pred_stride; + pi2_out_tmp += 4; + pi2_upsampled_res += i4_upsampled_res_stride; + } + pi2_out_tmp = pi2_out; + for(i = 0; i < SUB_BLK_WIDTH_4x4; i++) + { + /* Vertical transform and quantization */ + x4 = pi2_out_tmp[0]; + x5 = pi2_out_tmp[4]; + x6 = pi2_out_tmp[8]; + x7 = pi2_out_tmp[12]; + + x0 = x4 + x7; + x1 = x5 + x6; + x2 = x5 - x6; + x3 = x4 - x7; + + /* quantization is done in place */ + + i4_value = x0 + x1; + + if(i == 0) + { + *pi2_dc_out = i4_value; + } + + FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits, + u4_nonzero_coeff); + pi2_out_tmp[0] = i4_value; + + i4_value = (x3 << 1) + x2; + FWD_QUANT(i4_value, pu2_threshold_matrix[4], pu2_scale_matrix[4], u4_round_factor, u4_qbits, + u4_nonzero_coeff); + pi2_out_tmp[4] = i4_value; + + i4_value = x0 - x1; + FWD_QUANT(i4_value, pu2_threshold_matrix[8], pu2_scale_matrix[8], u4_round_factor, u4_qbits, + u4_nonzero_coeff); + pi2_out_tmp[8] = i4_value; + + i4_value = x3 - (x2 << 1); + FWD_QUANT(i4_value, pu2_threshold_matrix[12], pu2_scale_matrix[12], u4_round_factor, + u4_qbits, u4_nonzero_coeff); + pi2_out_tmp[12] = i4_value; + + pi2_out_tmp++; + pu2_scale_matrix++; + pu2_threshold_matrix++; + } + + /* Return total nonzero coefficients in the current sub block */ + *pu1_nnz = u4_nonzero_coeff; +} + +/** + ******************************************************************************* + * + * @brief + * This function performs forward hadamard transform and quantization on a 4*4 + *block + * + * @par Description: + * The function accepts source buffer and estimation buffer. From these, it + * computes the residue. This is residue is then transformed and quantized. + * The transform and quantization are in placed computed. They use the residue + * buffer for this. + * + * @param[in] pu1_src + * Pointer to source sub-block + * + * @param[in] pu1_pred + * Pointer to prediction sub-block + * + * @param[in] pi2_out + * Pointer to residual sub-block + * + * @param[in] i4_src_stride + * Source stride + * + * @param[in] i4_pred_stride + * Prediction stride + * + * @param[in] dst_strd + * Destination stride + * + * @param[in] u4_qbits + * QP_BITS_h264_4x4 + floor(QP/6) + * + * @param[in] pu2_threshold_matrix + * Pointer to Forward Quant Threshold Matrix + * + * @param[in] pu2_scale_matrix + * Pointer to Forward Quant Scale Matrix + * + * @param[in] u4_round_factor + * Quantization Round factor + * + * @param[out] pu1_nnz + * Total non-zero coefficients in the current sub-block + * + * @returns + * + * @remarks + * None + * + */ + +void isvc_hadamard_quant_4x4(WORD16 *pi2_src, WORD16 *pi2_dst, + resi_trans_quant_constants_t *ps_quant_constants, UWORD8 *pu1_nnz) +{ + WORD32 i; + WORD32 x0, x1, x2, x3, x4, x5, x6, x7, i4_value; + + const UWORD16 *pu2_scale_matrix = ps_quant_constants->pu2_scale_matrix; + const UWORD16 *pu2_threshold_matrix = ps_quant_constants->pu2_threshold_matrix; + UWORD32 u4_qbits = ps_quant_constants->u4_qbits; + UWORD32 u4_round_factor = ps_quant_constants->u4_round_factor; + + *pu1_nnz = 0; + + for(i = 0; i < SUB_BLK_WIDTH_4x4; i++) + { + x4 = pi2_src[0]; + x5 = pi2_src[1]; + x6 = pi2_src[2]; + x7 = pi2_src[3]; + + x0 = x4 + x7; + x1 = x5 + x6; + x2 = x5 - x6; + x3 = x4 - x7; + + pi2_dst[0] = x0 + x1; + pi2_dst[1] = x3 + x2; + pi2_dst[2] = x0 - x1; + pi2_dst[3] = x3 - x2; + + pi2_src += 4; + pi2_dst += 4; + } + + /* Vertical transform and quantization */ + pi2_dst -= SUB_BLK_WIDTH_4x4 << 2; + + for(i = 0; i < SUB_BLK_WIDTH_4x4; i++) + { + x4 = pi2_dst[0]; + x5 = pi2_dst[4]; + x6 = pi2_dst[8]; + x7 = pi2_dst[12]; + + x0 = x4 + x7; + x1 = x5 + x6; + x2 = x5 - x6; + x3 = x4 - x7; + + i4_value = (x0 + x1) >> 1; + FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits, + pu1_nnz[0]); + pi2_dst[0] = i4_value; + + i4_value = (x3 + x2) >> 1; + FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits, + pu1_nnz[0]); + pi2_dst[4] = i4_value; + + i4_value = (x0 - x1) >> 1; + FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits, + pu1_nnz[0]); + pi2_dst[8] = i4_value; + + i4_value = (x3 - x2) >> 1; + FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits, + pu1_nnz[0]); + pi2_dst[12] = i4_value; + + pi2_dst++; + } +} + +/** + ******************************************************************************* + * + * @brief + * This function performs forward hadamard transform and quantization on a 2*2 + *block for both U and V planes + * + * @par Description: + * The function accepts source buffer and estimation buffer. From these, it + * computes the residue. This is residue is then transformed and quantized. + * The transform and quantization are in placed computed. They use the residue + * buffer for this. + * + * @param[in] pu1_src + * Pointer to source sub-block + * + * @param[in] pu1_pred + * Pointer to prediction sub-block + * + * @param[in] pi2_out + * Pointer to residual sub-block + * + * @param[in] i4_src_stride + * Source stride + * + * @param[in] i4_pred_stride + * Prediction stride + * + * @param[in] dst_strd + * Destination stride + * + * @param[in] u4_qbits + * QP_BITS_h264_4x4 + floor(QP/6) + * + * @param[in] pu2_threshold_matrix + * Pointer to Forward Quant Threshold Matrix + * + * @param[in] pu2_scale_matrix + * Pointer to Forward Quant Scale Matrix + * + * @param[in] u4_round_factor + * Quantization Round factor + * + * @param[out] pu1_nnz + * Total non-zero coefficients in the current sub-block + * + * @returns + * + * @remarks + * NNZ for dc is populated at 0 and 5th position of pu1_nnz + * + */ + +void isvc_hadamard_quant_2x2_uv(WORD16 *pi2_src, WORD16 *pi2_dst, + resi_trans_quant_constants_t *ps_quant_constants, UWORD8 *pu1_nnz) +{ + WORD32 x0, x1, x2, x3, x4, x5, x6, x7; + WORD32 i4_value, plane; + + const UWORD16 *pu2_scale_matrix = ps_quant_constants->pu2_scale_matrix; + const UWORD16 *pu2_threshold_matrix = ps_quant_constants->pu2_threshold_matrix; + UWORD32 u4_qbits = ps_quant_constants->u4_qbits; + UWORD32 u4_round_factor = ps_quant_constants->u4_round_factor; + + for(plane = 0; plane < 2; plane++) + { + pu1_nnz[plane] = 0; + + /* Horizontal transform */ + x4 = pi2_src[0]; + x5 = pi2_src[1]; + x6 = pi2_src[2]; + x7 = pi2_src[3]; + + x0 = x4 + x5; + x1 = x4 - x5; + x2 = x6 + x7; + x3 = x6 - x7; + + /* Vertical transform and quantization */ + i4_value = (x0 + x2); + FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits, + pu1_nnz[plane]); + pi2_dst[0] = i4_value; + + i4_value = (x0 - x2); + FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits, + pu1_nnz[plane]); + pi2_dst[2] = i4_value; + + i4_value = (x1 - x3); + FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits, + pu1_nnz[plane]); + pi2_dst[3] = i4_value; + + i4_value = (x1 + x3); + FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits, + pu1_nnz[plane]); + pi2_dst[1] = i4_value; + + pi2_dst += 4; + pi2_src += 4; + } +} + +/* + ******************************************************************************* + * + * @brief + * This function performs Single stage forward transform CF8 and quantization + *on 8*8 blocks for h.264 + * + * @par Description: + * Performs single stage 8x8 forward transform CF8 after calculating the + *residue The result is then quantized + * + * @param[in] pu1_src + * Input 8x8 pixels + * + * @param[in] pu1_pred + * Input 8x8 pixels + * + * @param[in] pi1_out + * Output 8x8 pixels + * + * @param[in] u4_thresh + * Threshold under which the coeffs are not quantized + * + * @param[in] u4_qp_div + * QP/6 + * + * @param[in] u4_qp_rem + * QP%6 + * + * @param[in] u2_src_stride + * Source stride + * + * @param[in] i4_pred_stride + * stride for prediciton buffer + * + * @param[in] dst_strd + * stride for destination buffer + * + * @param[in] pu4_quant_mat + * Pointer to the 4x4 quantization matrix + * + * @returns Void + * + * + ******************************************************************************* + */ +void isvc_resi_trans_quant_8x8(buffer_container_t *ps_src, buffer_container_t *ps_pred, + buffer_container_t *ps_out, buffer_container_t *ps_upsampled_res, + resi_trans_quant_constants_t *ps_quant_constants, UWORD8 *pu1_nnz, + WORD16 *pi2_dc_out, UWORD8 u1_use_upsampled_res) +{ + UWORD32 i; + WORD32 a0, a1, a2, a3, a4, a5, a6, a7; + WORD32 r0, r1, r2, r3, r4, r5, r6, r7; + + UWORD8 *pu1_src = ps_src->pv_data; + UWORD8 *pu1_pred = ps_pred->pv_data; + WORD16 *pi2_out = ps_out->pv_data; + WORD16 *pi2_upsampled_res = ps_upsampled_res ? ps_upsampled_res->pv_data : NULL; + WORD32 i4_src_stride = ps_src->i4_data_stride; + WORD32 i4_pred_stride = ps_pred->i4_data_stride; + WORD32 i4_upsampled_res_stride = ps_upsampled_res ? ps_upsampled_res->i4_data_stride : 0; + WORD16 *pi2_out_tmp = pi2_out; + UWORD32 u4_nonzero_coeff = 0; + const UWORD16 *pu2_scale_matrix = ps_quant_constants->pu2_scale_matrix; + const UWORD16 *pu2_threshold_matrix = ps_quant_constants->pu2_threshold_matrix; + UWORD32 u4_qbits = ps_quant_constants->u4_qbits; + UWORD32 u4_round_factor = ps_quant_constants->u4_round_factor; + + UNUSED(pi2_dc_out); + + /*Horizontal transform */ + /* we are going to use the a's and r's in a twisted way since */ + /*i dont want to declare more variables */ + for(i = 0; i < SUB_BLK_WIDTH_8x8; ++i) + { + r0 = pu1_src[0]; + r0 -= pu1_pred[0]; + r1 = pu1_src[1]; + r1 -= pu1_pred[1]; + r2 = pu1_src[2]; + r2 -= pu1_pred[2]; + r3 = pu1_src[3]; + r3 -= pu1_pred[3]; + r4 = pu1_src[4]; + r4 -= pu1_pred[4]; + r5 = pu1_src[5]; + r5 -= pu1_pred[5]; + r6 = pu1_src[6]; + r6 -= pu1_pred[6]; + r7 = pu1_src[7]; + r7 -= pu1_pred[7]; + + if(u1_use_upsampled_res) + { + r0 = isvc_subtract_upsampled_res(r0, pi2_upsampled_res[0]); + r1 = isvc_subtract_upsampled_res(r1, pi2_upsampled_res[1]); + r2 = isvc_subtract_upsampled_res(r2, pi2_upsampled_res[2]); + r3 = isvc_subtract_upsampled_res(r3, pi2_upsampled_res[3]); + r4 = isvc_subtract_upsampled_res(r4, pi2_upsampled_res[4]); + r5 = isvc_subtract_upsampled_res(r5, pi2_upsampled_res[5]); + r6 = isvc_subtract_upsampled_res(r6, pi2_upsampled_res[6]); + r7 = isvc_subtract_upsampled_res(r7, pi2_upsampled_res[7]); + } + + a0 = r0 + r7; + a1 = r1 + r6; + a2 = r2 + r5; + a3 = r3 + r4; + + a4 = a0 + a3; + a5 = a1 + a2; + a6 = a0 - a3; + a7 = a1 - a2; + + pi2_out_tmp[0] = a4 + a5; + + pi2_out_tmp[2] = a6 + (a7 >> 1); + pi2_out_tmp[4] = a4 - a5; + pi2_out_tmp[6] = (a6 >> 1) - a7; + + a0 = r0 - r7; + a1 = r1 - r6; + a2 = r2 - r5; + a3 = r3 - r4; + + a4 = a1 + a2 + ((a0 >> 1) + a0); + a5 = a0 - a3 - ((a2 >> 1) + a2); + a6 = a0 + a3 - ((a1 >> 1) + a1); + a7 = a1 - a2 + ((a3 >> 1) + a3); + + pi2_out_tmp[1] = a4 + (a7 >> 2); + pi2_out_tmp[3] = a5 + (a6 >> 2); + pi2_out_tmp[5] = a6 - (a5 >> 2); + pi2_out_tmp[7] = (a4 >> 2) - a7; + + pu1_src += i4_src_stride; + pu1_pred += i4_pred_stride; + pi2_out_tmp += 8; + pi2_upsampled_res += i4_upsampled_res_stride; + } + + /*vertical transform and quant */ + + pi2_out_tmp = pi2_out; + + for(i = 0; i < SUB_BLK_WIDTH_8x8; ++i) + { + r0 = pi2_out_tmp[0]; + r1 = pi2_out_tmp[8]; + r2 = pi2_out_tmp[16]; + r3 = pi2_out_tmp[24]; + r4 = pi2_out_tmp[32]; + r5 = pi2_out_tmp[40]; + r6 = pi2_out_tmp[48]; + r7 = pi2_out_tmp[56]; + + a0 = r0 + r7; + a1 = r1 + r6; + a2 = r2 + r5; + a3 = r3 + r4; + + a4 = a0 + a3; + a5 = a1 + a2; + a6 = a0 - a3; + a7 = a1 - a2; + + a0 = r0 - r7; + a1 = r1 - r6; + a2 = r2 - r5; + a3 = r3 - r4; + + r0 = a4 + a5; + r2 = a6 + (a7 >> 1); + r4 = a4 - a5; + r6 = (a6 >> 1) - a7; + + a4 = a1 + a2 + ((a0 >> 1) + a0); + a5 = a0 - a3 - ((a2 >> 1) + a2); + a6 = a0 + a3 - ((a1 >> 1) + a1); + a7 = a1 - a2 + ((a3 >> 1) + a3); + + r1 = a4 + (a7 >> 2); + r3 = a5 + (a6 >> 2); + r5 = a6 - (a5 >> 2); + r7 = (a4 >> 2) - a7; + + FWD_QUANT(r0, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits, + u4_nonzero_coeff); + pi2_out_tmp[0] = r0; + + FWD_QUANT(r1, pu2_threshold_matrix[8], pu2_scale_matrix[8], u4_round_factor, u4_qbits, + u4_nonzero_coeff); + pi2_out_tmp[8] = r1; + + FWD_QUANT(r2, pu2_threshold_matrix[16], pu2_scale_matrix[16], u4_round_factor, u4_qbits, + u4_nonzero_coeff); + pi2_out_tmp[16] = r2; + + FWD_QUANT(r3, pu2_threshold_matrix[24], pu2_scale_matrix[24], u4_round_factor, u4_qbits, + u4_nonzero_coeff); + pi2_out_tmp[24] = r3; + + FWD_QUANT(r4, pu2_threshold_matrix[32], pu2_scale_matrix[32], u4_round_factor, u4_qbits, + u4_nonzero_coeff); + pi2_out_tmp[32] = r4; + + FWD_QUANT(r5, pu2_threshold_matrix[40], pu2_scale_matrix[40], u4_round_factor, u4_qbits, + u4_nonzero_coeff); + pi2_out_tmp[40] = r5; + + FWD_QUANT(r6, pu2_threshold_matrix[48], pu2_scale_matrix[48], u4_round_factor, u4_qbits, + u4_nonzero_coeff); + pi2_out_tmp[48] = r6; + + FWD_QUANT(r7, pu2_threshold_matrix[56], pu2_scale_matrix[56], u4_round_factor, u4_qbits, + u4_nonzero_coeff); + pi2_out_tmp[56] = r7; + + pi2_out_tmp++; + pu2_scale_matrix++; + pu2_threshold_matrix++; + } + /* Return total nonzero coefficients in the current sub block */ + *pu1_nnz = u4_nonzero_coeff; +} diff --git a/common/svc/isvc_structs.h b/common/svc/isvc_structs.h new file mode 100644 index 0000000..7bdad1e --- /dev/null +++ b/common/svc/isvc_structs.h @@ -0,0 +1,335 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** +******************************************************************************* +* @file +* isvc_structs.h +* +* @brief +* Contains struct definition used for SVC +* +* @author +* ittiam +* +* @remarks +* None +* +******************************************************************************* +*/ + +#ifndef _ISVC_STRUCTS_H_ +#define _ISVC_STRUCTS_H_ + +#include "ih264_typedefs.h" +#include "iv2.h" +#include "ih264_defs.h" +#include "ih264_structs.h" +#include "isvc_defs.h" + +typedef struct buffer_container_t +{ + void *pv_data; + + WORD32 i4_data_stride; + +} buffer_container_t; + +typedef struct yuv_buf_props_t +{ + buffer_container_t as_component_bufs[NUM_COMPONENTS]; + + IV_COLOR_FORMAT_T e_color_format; + + UWORD32 u4_width; + + UWORD32 u4_height; + + UWORD8 u1_bit_depth; +} yuv_buf_props_t; + +typedef struct nal_unit_header_t +{ + UWORD8 u1_nal_ref_idc; + + UWORD8 u1_nal_unit_type; +} nal_unit_header_t; + +typedef struct coordinates_t +{ + WORD32 i4_abscissa; + + WORD32 i4_ordinate; +} coordinates_t; + +typedef struct svc_au_buf_t +{ + /* Array of structs that contain properties of the buffers used for storing */ + yuv_buf_props_t *ps_layer_yuv_buf_props; + + /* Temporal ID */ + WORD8 i1_temporal_id; + + /* Num Spatial Layers */ + UWORD8 u1_num_spatial_layers; + + /* Resolution ration b/w spatial layers */ + DOUBLE d_spatial_res_ratio; + + /* absolute value of POC */ + WORD32 i4_abs_poc; + + /* POC % MaxPicOrderCntLSB */ + WORD32 i4_poc_lsb; + + /* Lower 32 bits of time stamp */ + UWORD32 u4_timestamp_low; + + /* Higher 32 bits of time stamp */ + UWORD32 u4_timestamp_high; + + /* Is Pic used as refPic for future frames? */ + WORD32 i4_used_as_ref; + + /* frame_num in the slice header */ + WORD32 i4_frame_num; + + /* + * 0: Top Field + * 1: Bottom Field + */ + WORD8 i1_field_type; + + /* buffer ID from frame buffer manager */ + WORD32 i4_buf_id; + +} svc_au_buf_t; + +typedef struct svc_nalu_ext_t +{ + nal_unit_header_t s_nalu_header; + + /* idr_flag */ + UWORD8 u1_idr_flag; + + /* priority_id (Range = [0, 63]) */ + UWORD8 u1_priority_id; + + /* no_inter_layer_pred_flag */ + UWORD8 u1_no_inter_layer_pred_flag; + + /* dependency_id (Range = [0, 7]) */ + UWORD8 u1_dependency_id; + + /* quality_id (Range = [0, 15]) */ + UWORD8 u1_quality_id; + + /* temporal_id (Range = [0, 7]) */ + UWORD8 u1_temporal_id; + + /* use_ref_base_pic_flag */ + UWORD8 u1_use_ref_base_pic_flag; + + /* discardable_flag */ + UWORD8 u1_discardable_flag; + + /* output_flag */ + UWORD8 u1_output_flag; + + /* reserved_three_2bits */ + UWORD8 u1_reserved_three_2bits; + +} svc_nalu_ext_t; + +typedef struct svc_vui_ext_t +{ + /* specifies the maximum layers in the SVC bitstream */ + UWORD32 u4_vui_ext_num_entries_minus1; + + /* specifies the dependency ID for each layer */ + UWORD8 u1_vui_ext_dependency_id[MAX_VUI_EXT_NUM_ENTRIES]; + + /* specifies the quality ID for each layer */ + UWORD8 u1_vui_ext_quality_id[MAX_VUI_EXT_NUM_ENTRIES]; + + /* specifies the temporal ID for each layer */ + UWORD8 u1_vui_ext_temporal_id[MAX_VUI_EXT_NUM_ENTRIES]; + + /* specifies the timing_info_present_flag value of the i-th sub-bitstream */ + UWORD8 u1_vui_ext_timing_info_present_flag[MAX_VUI_EXT_NUM_ENTRIES]; + + /* specifies the num_units_in_tick value of the i-th sub-bitstream */ + UWORD32 u4_vui_ext_num_units_in_tick[MAX_VUI_EXT_NUM_ENTRIES]; + + /* specifies the time_scale value of the i-th sub-bitstream */ + UWORD32 u4_vui_ext_time_scale[MAX_VUI_EXT_NUM_ENTRIES]; + + /* specifies the fixed_frame_rate_flag value of the i-th sub-bitstream */ + UWORD8 u1_vui_ext_fixed_frame_rate_flag[MAX_VUI_EXT_NUM_ENTRIES]; + + /* specifies the nal_hrd_parameters_present_flag value of the i-th */ + UWORD8 u1_vui_ext_nal_hrd_params_present_flag[MAX_VUI_EXT_NUM_ENTRIES]; + + /* specifies the vcl_hrd_parameters_present_flag value of the i-th */ + UWORD8 u1_vui_ext_vcl_hrd_params_present_flag[MAX_VUI_EXT_NUM_ENTRIES]; + + /* specifies the low_delay_hrd_flag value of the i-th sub-bitstream */ + UWORD8 u1_vui_ext_low_delay_hrd_flag[MAX_VUI_EXT_NUM_ENTRIES]; + + /* specifies the pic_struct_present_flag value of the i-th sub-bitstream */ + UWORD8 u1_vui_ext_pic_struct_present_flag[MAX_VUI_EXT_NUM_ENTRIES]; + +} svc_vui_ext_t; + +typedef struct sps_svc_ext_t +{ + /* inter_layer_deblocking_filter_control_present_flag */ + UWORD8 u1_inter_layer_deblocking_filter_control_present_flag; + + /* extended_spatial_scalability_idc */ + UWORD8 u1_extended_spatial_scalability_idc; + + /* chroma_phase_x_plus1_flag */ + UWORD8 u1_chroma_phase_x_plus1; + + /* chroma_phase_y_plus1 */ + UWORD8 u1_chroma_phase_y_plus1; + + /* seq_ref_layer_chroma_phase_x_plus1_flag */ + UWORD8 u1_seq_ref_layer_chroma_phase_x_plus1_flag; + + /* seq_ref_layer_chroma_phase_y_plus1 */ + UWORD8 u1_seq_ref_layer_chroma_phase_y_plus1; + + /* seq_scaled_ref_layer_left_offset */ + WORD32 i4_seq_scaled_ref_layer_left_offset; + + /* seq_scaled_ref_layer_top_offset */ + WORD32 i4_seq_scaled_ref_layer_top_offset; + + /* seq_scaled_ref_layer_right_offset */ + WORD32 i4_seq_scaled_ref_layer_right_offset; + + /* seq_scaled_ref_layer_bottom_offset */ + WORD32 i4_seq_scaled_ref_layer_bottom_offset; + + /* seq_tcoeff_level_prediction_flag */ + WORD8 i1_seq_tcoeff_level_prediction_flag; + + /* adaptive_tcoeff_level_prediction_flag */ + WORD8 i1_adaptive_tcoeff_level_prediction_flag; + + /* slice_header_restriction_flag */ + WORD8 i1_slice_header_restriction_flag; + +} sps_svc_ext_t; + +typedef struct subset_sps_t +{ + /* SPS structure */ + sps_t s_sps; + + /* Structure containing flags specific to SVC SPS */ + sps_svc_ext_t s_sps_svc_ext; + + /* svc_vui_parameters_present_flag */ + WORD8 i1_svc_vui_parameters_present_flag; + + svc_vui_ext_t s_svc_vui; + + /* additional_extension2_data_flag */ + WORD8 i1_additional_extension2_flag; + +} subset_sps_t; + +typedef struct svc_slice_header_t +{ + /* ref_layer_dq_id */ + UWORD32 u4_ref_layer_dq_id; + + /* disable_inter_layer_deblocking_filter_idc */ + UWORD32 u4_disable_inter_layer_deblocking_filter_idc; + + /* inter_layer_slice_alpha_c0_offset_div2 */ + WORD32 i4_inter_layer_slice_alpha_c0_offset_div2; + + /* inter_layer_slice_beta_offset_div2 */ + WORD32 i4_inter_layer_slice_beta_offset_div2; + + /* constrained_intra_resampling_flag */ + WORD8 i1_constrained_intra_resampling_flag; + + /* ref_layer_chroma_phase_x_plus1_flag */ + WORD8 i1_ref_layer_chroma_phase_x_plus1_flag; + + /* ref_layer_chroma_phase_y_plus1 */ + WORD8 i1_ref_layer_chroma_phase_y_plus1; + + /* scaled_ref_layer_left_offset */ + WORD32 i4_scaled_ref_layer_left; + + /* scaled_ref_layer_top_offset */ + WORD32 i4_scaled_ref_layer_top; + + /* scaled_ref_layer_right_offset */ + WORD32 i4_scaled_ref_layer_right; + + /* scaled_ref_layer_bottom_offset */ + WORD32 i4_scaled_ref_layer_bottom; + + /* slice_skip_flag */ + WORD8 i1_slice_skip_flag; + + /* num_mbs_in_slice_minus1 */ + UWORD32 u4_num_mbs_in_slice_minus1; + + /* adaptive_base_mode_flag */ + WORD8 i1_adaptive_base_mode_flag; + + /* default_base_mode_flag */ + WORD8 i1_default_base_mode_flag; + + /* adaptive_motion_prediction_flag */ + WORD8 i1_adaptive_motion_prediction_flag; + + /* default_motion_prediction_flag */ + WORD8 i1_default_motion_prediction_flag; + + /* adaptive_residual_prediction_flag */ + WORD8 i1_adaptive_residual_prediction_flag; + + /* default_residual_prediction_flag */ + WORD8 i1_default_residual_prediction_flag; + + /* tcoeff_level_prediction_flag */ + WORD8 i1_tcoeff_level_prediction_flag; + + /* scan_idx_start */ + UWORD32 u4_scan_idx_start; + + /* scan_idx_end */ + UWORD32 u4_scan_idx_end; + + WORD32 i4_store_ref_base_pic_flag; + + slice_header_t s_slice_header; +} svc_slice_header_t; + +#endif diff --git a/common/svc/isvc_trans_quant_itrans_iquant.h b/common/svc/isvc_trans_quant_itrans_iquant.h new file mode 100644 index 0000000..fd15dcc --- /dev/null +++ b/common/svc/isvc_trans_quant_itrans_iquant.h @@ -0,0 +1,253 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ +/** + ******************************************************************************* + * @file + * isvc_trans_quant.h + * + * @brief + * Contains declarations for forward and inverse transform paths for H264 + * + * @author + * Ittiam + * + * @remarks + * + ******************************************************************************* + */ + +#ifndef _ISVC_TRANS_QUANT_ITRANS_IQUANT_H_ +#define _ISVC_TRANS_QUANT_ITRANS_IQUANT_H_ + +#include + +#include "ih264_typedefs.h" +#include "ih264_debug.h" +#include "ih264_macros.h" +#include "isvc_macros.h" +#include "isvc_structs.h" + +/* With and without residual_pred use */ +#define NUM_RESI_TRANS_QUANT_VARIANTS 2 + +#define NUM_IQ_IT_RECON_VARIANTS 3 + +/* Structs */ +typedef struct resi_trans_quant_constants_t +{ + const UWORD16 *pu2_scale_matrix; + + const UWORD16 *pu2_threshold_matrix; + + UWORD32 u4_qbits; + + UWORD32 u4_round_factor; +} resi_trans_quant_constants_t; + +typedef struct iq_it_res_rec_constants_t +{ + const UWORD16 *pu2_iscal_mat; + + const UWORD16 *pu2_weigh_mat; + + UWORD32 u4_qp_div_6; +} iq_it_res_rec_constants_t; + +/* Typedefs */ +typedef void FT_RESI_TRANS_DCTRANS_QUANT(UWORD8 *pu1_src, UWORD8 *pu1_pred, WORD16 *pi2_out, + WORD32 src_strd, WORD32 pred_strd, WORD32 dst_strd, + const UWORD16 *pu2_scale_mat, + const UWORD16 *pu2_thresh_mat, UWORD32 u4_qbit, + UWORD32 u4_round_fact, UWORD8 *pu1_nnz); + +typedef void FT_IDCTRANS_IQUANT_ITRANS_RECON(WORD16 *pi2_src, UWORD8 *pu1_pred, UWORD8 *pu1_out, + WORD32 src_strd, WORD32 pred_strd, WORD32 out_strd, + const UWORD16 *pu2_iscale_mat, + const UWORD16 *pu2_weigh_mat, UWORD32 qp_div, + UWORD32 pi4_cntrl, WORD32 *pi4_tmp); + +typedef void FT_RESI_TRANS_QUANT(buffer_container_t *ps_src, buffer_container_t *ps_pred, + buffer_container_t *ps_out, buffer_container_t *ps_upsampled_res, + resi_trans_quant_constants_t *ps_quant_constants, UWORD8 *pu1_nnz, + WORD16 *pi2_dc_out, UWORD8 u1_use_upsampled_res); + +typedef void FT_LUMA_16X16_RESI_TRANS_DCTRANS_QUANT( + UWORD8 *pu1_src, UWORD8 *pu1_pred, WORD16 *pi2_out, WORD32 src_strd, WORD32 pred_strd, + WORD32 dst_strd, const UWORD16 *pu2_scale_matrix, const UWORD16 *pu2_threshold_matrix, + UWORD32 u4_qbits, UWORD32 u4_round_factor, UWORD8 *pu1_nnz, UWORD32 u4_dc_flag); + +typedef void FT_CHROMA_8X8_RESI_TRANS_DCTRANS_QUANT( + UWORD8 *pu1_src, UWORD8 *pu1_pred, WORD16 *pi2_out, WORD32 src_strd, WORD32 pred_strd, + WORD32 dst_strd, const UWORD16 *pu2_scale_matrix, const UWORD16 *pu2_threshold_matrix, + UWORD32 u4_qbits, UWORD32 u4_round_factor, UWORD8 *pu1_nnz); + +typedef void FT_IQ_IT_RECON(buffer_container_t *ps_src, buffer_container_t *ps_pred, + buffer_container_t *ps_res_pred, buffer_container_t *ps_res, + buffer_container_t *ps_rec, + iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants, WORD16 *pi2_tmp, + WORD16 *pi2_dc_src, WORD32 i4_iq_start_idx, UWORD8 u1_res_accumulate); + +typedef void FT_LUMA_16X16_IDCTRANS_IQUANT_ITRANS_RECON( + WORD16 *pi2_src, UWORD8 *pu1_pred, UWORD8 *pu1_out, WORD32 src_strd, WORD32 pred_strd, + WORD32 out_strd, const UWORD16 *pu2_iscale_mat, const UWORD16 *pu2_weigh_mat, UWORD32 qp_div, + UWORD32 pi4_cntrl, UWORD32 u4_dc_trans_flag, WORD32 *pi4_tmp); + +typedef void FT_CHROMA_8X8_IDCTRANS_IQUANT_ITRANS_RECON( + WORD16 *pi2_src, UWORD8 *pu1_pred, UWORD8 *pu1_out, WORD32 src_strd, WORD32 pred_strd, + WORD32 out_strd, const UWORD16 *pu2_iscale_mat, const UWORD16 *pu2_weigh_mat, UWORD32 qp_div, + UWORD32 pi4_cntrl, WORD32 *pi4_tmp); + +typedef void FT_IHADAMARD_SCALING(WORD16 *pi2_src, WORD16 *pi2_out, const UWORD16 *pu2_iscal_mat, + const UWORD16 *pu2_weigh_mat, UWORD32 u4_qp_div_6, + WORD32 *pi4_tmp); + +typedef void FT_HADAMARD_QUANT(WORD16 *pi2_src, WORD16 *pi2_dst, + resi_trans_quant_constants_t *ps_quant_constants, UWORD8 *pu1_nnz); + +/*****************************************************************************/ +/* Extern Function Declarations */ +/*****************************************************************************/ + +extern FT_RESI_TRANS_QUANT isvc_resi_trans_quant_4x4; +extern FT_RESI_TRANS_QUANT isvc_resi_trans_quant_chroma_4x4; +extern FT_RESI_TRANS_QUANT isvc_resi_trans_quant_8x8; +extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_4x4; +extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_8x8; +extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_4x4_dc; +extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_chroma_4x4; +extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_chroma_4x4_dc; +extern FT_IQ_IT_RECON isvc_zcbf_iquant_itrans_recon_4x4; +extern FT_IQ_IT_RECON isvc_chroma_zcbf_iquant_itrans_recon_4x4; +extern FT_IHADAMARD_SCALING ih264_ihadamard_scaling_4x4; +extern FT_IHADAMARD_SCALING ih264_ihadamard_scaling_2x2_uv; +extern FT_HADAMARD_QUANT isvc_hadamard_quant_4x4; +extern FT_HADAMARD_QUANT isvc_hadamard_quant_2x2_uv; + +/* A9 Declarations */ +extern FT_RESI_TRANS_QUANT isvc_resi_trans_quant_4x4_a9; +extern FT_RESI_TRANS_QUANT isvc_resi_trans_quant_chroma_4x4_a9; +extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_4x4_a9; +extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_8x8_a9; +extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_4x4_dc_a9; +extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_8x8_dc_a9; +extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_chroma_4x4_a9; +extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_chroma_4x4_dc_a9; +extern FT_LUMA_16X16_RESI_TRANS_DCTRANS_QUANT isvc_luma_16x16_resi_trans_dctrans_quant_a9; +extern FT_CHROMA_8X8_RESI_TRANS_DCTRANS_QUANT isvc_chroma_8x8_resi_trans_dctrans_quant_a9; +extern FT_LUMA_16X16_IDCTRANS_IQUANT_ITRANS_RECON isvc_luma_16x16_idctrans_iquant_itrans_recon_a9; +extern FT_CHROMA_8X8_IDCTRANS_IQUANT_ITRANS_RECON isvc_chroma_8x8_idctrans_iquant_itrans_recon_a9; +extern FT_IHADAMARD_SCALING ih264_ihadamard_scaling_4x4_a9; +extern FT_IHADAMARD_SCALING ih264_ihadamard_scaling_2x2_uv_a9; +extern FT_HADAMARD_QUANT isvc_hadamard_quant_4x4_a9; +extern FT_HADAMARD_QUANT isvc_hadamard_quant_2x2_uv_a9; + +/* Av8 Declarations */ +extern FT_RESI_TRANS_QUANT isvc_resi_trans_quant_4x4_av8; +extern FT_RESI_TRANS_QUANT isvc_resi_trans_quant_chroma_4x4_av8; +extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_4x4_av8; +extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_8x8_av8; +extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_4x4_dc_av8; +extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_8x8_dc_av8; +extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_chroma_4x4_av8; +extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_chroma_4x4_dc_av8; +extern FT_IHADAMARD_SCALING ih264_ihadamard_scaling_4x4_av8; +extern FT_IHADAMARD_SCALING ih264_ihadamard_scaling_2x2_uv_av8; + +/* NEON Declarations */ +extern FT_RESI_TRANS_QUANT isvc_resi_trans_quant_4x4_neon; +extern FT_RESI_TRANS_QUANT isvc_resi_trans_quant_4x4_with_residual_sub_neon; +extern FT_RESI_TRANS_QUANT isvc_resi_trans_quant_chroma_4x4_neon; +extern FT_RESI_TRANS_QUANT isvc_resi_trans_quant_chroma_4x4_with_residual_sub_neon; + +/* SSSE3 Declarations */ +extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_4x4_ssse3; +extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_8x8_ssse3; +extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_4x4_dc_ssse3; +extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_8x8_dc_ssse3; +extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_chroma_4x4_dc_ssse3; +extern FT_IHADAMARD_SCALING ih264_ihadamard_scaling_4x4_ssse3; +extern FT_IHADAMARD_SCALING ih264_ihadamard_scaling_2x2_uv_ssse3; + +/* SSSE42 Declarations */ +extern FT_RESI_TRANS_QUANT isvc_resi_trans_quant_4x4_sse42; +extern FT_RESI_TRANS_QUANT isvc_resi_trans_quant_4x4_with_res_pred_sse42; +extern FT_RESI_TRANS_QUANT isvc_resi_trans_quant_chroma_4x4_sse42; +extern FT_RESI_TRANS_QUANT isvc_resi_trans_quant_chroma_4x4_with_res_pred_sse42; + +extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_4x4_sse42; +extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_res_4x4_sse42; +extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_res_4x4_with_res_acc_sse42; + +extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_chroma_4x4_sse42; +extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_res_chroma_4x4_sse42; +extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_res_chroma_4x4_with_res_acc_sse42; + +extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_dc_4x4_sse42; +extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_res_dc_4x4_sse42; +extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_res_dc_with_res_acc_4x4_sse42; + +extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_chroma_4x4_dc_sse42; +extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_res_chroma_4x4_dc_sse42; +extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_res_chroma_4x4_dc_with_res_acc_sse42; + +extern FT_IHADAMARD_SCALING ih264_ihadamard_scaling_4x4_sse42; + +extern FT_HADAMARD_QUANT isvc_hadamard_quant_4x4_sse42; +extern FT_HADAMARD_QUANT isvc_hadamard_quant_2x2_uv_sse42; + +/* NEON Declarations */ +extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_4x4_neon; +extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_4x4_with_res_output_neon; +extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_4x4_with_res_accumulate_neon; + +extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_chroma_4x4_neon; +extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_chroma_4x4_with_res_output_neon; +extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_chroma_4x4_with_res_accumulate_neon; + +extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_4x4_dc_neon; +extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_4x4_dc_with_res_output_neon; +extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_4x4_dc_with_res_accumulate_neon; + +extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_chroma_4x4_dc_neon; +extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_chroma_4x4_dc_with_res_output_neon; +extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_chroma_4x4_dc_with_res_accumulate_neon; + +static FORCEINLINE UWORD8 isvc_get_resi_trans_quant_variant_idx(UWORD8 u1_use_upsampled_res) +{ + return u1_use_upsampled_res; +} + +static FORCEINLINE UWORD8 isvc_get_iq_it_recon_variant_idx(UWORD8 u1_is_intra, + UWORD8 u1_res_accumulate) +{ + ASSERT(!((1 == u1_is_intra) && (1 == u1_res_accumulate))); + + return u1_is_intra * 2 + u1_res_accumulate; +} + +static FORCEINLINE WORD16 isvc_get_residue(WORD16 i2_it_out, WORD16 i2_res_pred, + UWORD8 u1_res_accumulate) +{ + return (u1_res_accumulate + ? (CLIP3(-((WORD16) UINT8_MAX), ((WORD16) UINT8_MAX), i2_it_out + i2_res_pred)) + : (CLIP3(-((WORD16) UINT8_MAX), ((WORD16) UINT8_MAX), i2_it_out))); +} + +#endif diff --git a/common/svccommon.cmake b/common/svccommon.cmake new file mode 100644 index 0000000..89785db --- /dev/null +++ b/common/svccommon.cmake @@ -0,0 +1,39 @@ +# src files +list( + APPEND + LIBAVC_COMMON_SRCS + "${AVC_ROOT}/common/svc/isvc_common_tables.c" + "${AVC_ROOT}/common/svc/isvc_cabac_tables.c" + "${AVC_ROOT}/common/svc/isvc_intra_resample.c" + "${AVC_ROOT}/common/svc/isvc_iquant_itrans_recon.c" + "${AVC_ROOT}/common/svc/isvc_mem_fns.c" + "${AVC_ROOT}/common/svc/isvc_resi_trans_quant.c") + +include_directories(${AVC_ROOT}/common/svc) + +# arm/x86 sources +if("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "aarch64" OR + "${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "aarch32") + list( + APPEND + LIBAVC_COMMON_ASMS + "${AVC_ROOT}/common/arm/svc/isvc_intra_sampling_neon.c" + "${AVC_ROOT}/common/arm/svc/isvc_iquant_itrans_recon_neon.c" + "${AVC_ROOT}/common/arm/svc/isvc_mem_fns_neon.c" + "${AVC_ROOT}/common/arm/svc/isvc_resi_trans_quant_neon.c") + include_directories(${AVC_ROOT}/common/arm/svc) +else() + list( + APPEND + LIBAVC_COMMON_SRCS + "${AVC_ROOT}/common/x86/svc/isvc_iquant_itrans_recon_dc_ssse3.c" + "${AVC_ROOT}/common/x86/svc/isvc_iquant_itrans_recon_sse42.c" + "${AVC_ROOT}/common/x86/svc/isvc_iquant_itrans_recon_ssse3.c" + "${AVC_ROOT}/common/x86/svc/isvc_mem_fns_sse42.c" + "${AVC_ROOT}/common/x86/svc/isvc_mem_fns_ssse3.c" + "${AVC_ROOT}/common/x86/svc/isvc_padding_ssse3.c" + "${AVC_ROOT}/common/x86/svc/isvc_resi_trans_quant_sse42.c" + "${AVC_ROOT}/common/x86/svc/isvc_intra_resample_sse42.c") + + include_directories(${AVC_ROOT}/common/x86/svc) +endif() diff --git a/common/x86/svc/isvc_intra_resample_sse42.c b/common/x86/svc/isvc_intra_resample_sse42.c new file mode 100644 index 0000000..cc790da --- /dev/null +++ b/common/x86/svc/isvc_intra_resample_sse42.c @@ -0,0 +1,658 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ +/*! + ************************************************************************** + + * * \file ih264d_resamp_svc.c + * + * \brief + * Contains routines that + * resample for SVC resampling + * + * Detailed_description + * + * \date + * + * + * + * \author + + * ************************************************************************** + + */ +#include + +#include "ih264_typedefs.h" +#include "ih264_debug.h" +#include "isvc_intra_resample.h" + +void isvc_interpolate_base_luma_dyadic_sse42(UWORD8 *pu1_inp_buf, WORD16 *pi2_tmp_filt_buf, + UWORD8 *pu1_out_buf, WORD32 i4_out_stride) +{ + WORD32 i4_y; + WORD32 i4_filt_stride, i4_src_stride; + UWORD8 *pu1_inp, *pu1_out; + WORD16 *pi2_tmp; + + __m128i i4_samp_16x8b_0, i4_samp_16x8b_1, i4_samp_16x8b_2, i4_samp_16x8b_3; + __m128i i4_samp_8x16b_0, i4_samp_8x16b_1, i4_samp_8x16b_2, i4_samp_8x16b_3; + __m128i i4_res_8x16b_r1_1, i4_res_8x16b_r1_2, i4_res_8x16b_r1_3; + __m128i i4_res_8x16b_r2_1, i4_res_8x16b_r2_2, i4_res_8x16b_r2_3; + + /* Filter coefficient values for phase 4 */ + __m128i i4_coeff_8x16b_0 = _mm_set1_epi16(-3); + __m128i i4_coeff_8x16b_1 = _mm_set1_epi16(28); + i4_filt_stride = 12; + i4_src_stride = DYADIC_REF_W_Y; + + /* Initializing pointers */ + pu1_inp = pu1_inp_buf; + pi2_tmp = pi2_tmp_filt_buf; + pu1_out = pu1_out_buf; + + /* Vertical interpolation */ + /*First 64 bit */ + /* y = 0, y_phase = 12 */ + i4_samp_16x8b_0 = _mm_loadl_epi64((__m128i *) (pu1_inp)); + i4_samp_16x8b_1 = _mm_loadl_epi64((__m128i *) (pu1_inp + i4_src_stride)); + i4_samp_16x8b_2 = _mm_loadl_epi64((__m128i *) (pu1_inp + (i4_src_stride << 1))); + i4_samp_16x8b_3 = _mm_loadl_epi64((__m128i *) (pu1_inp + (i4_src_stride << 1) + i4_src_stride)); + pu1_inp += (i4_src_stride << 2); + i4_samp_8x16b_0 = _mm_cvtepu8_epi16(i4_samp_16x8b_0); + i4_samp_8x16b_1 = _mm_cvtepu8_epi16(i4_samp_16x8b_1); + i4_samp_8x16b_2 = _mm_cvtepu8_epi16(i4_samp_16x8b_2); + i4_samp_8x16b_3 = _mm_cvtepu8_epi16(i4_samp_16x8b_3); + + /* since y_phase 12 for y = 0 */ + /*Multiply by 8 => left shift by 3*/ + i4_res_8x16b_r1_1 = _mm_slli_epi16(i4_samp_8x16b_1, 3); + i4_res_8x16b_r1_2 = _mm_mullo_epi16(i4_samp_8x16b_2, i4_coeff_8x16b_1); + i4_res_8x16b_r1_3 = _mm_mullo_epi16(i4_samp_8x16b_3, i4_coeff_8x16b_0); + + i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_2); + i4_res_8x16b_r1_3 = _mm_subs_epi16(i4_res_8x16b_r1_3, i4_samp_8x16b_0); + i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_3); + + _mm_storeu_si128((__m128i *) pi2_tmp, i4_res_8x16b_r1_1); + pi2_tmp += i4_filt_stride; + + for(i4_y = 1; i4_y < 15; i4_y += 2) + { + i4_samp_8x16b_0 = i4_samp_8x16b_1; + i4_samp_8x16b_1 = i4_samp_8x16b_2; + i4_samp_8x16b_2 = i4_samp_8x16b_3; + i4_samp_8x16b_3 = _mm_cvtepu8_epi16(_mm_loadl_epi64((__m128i *) (pu1_inp))); + + /* y_phase is 4 for odd values of y */ + /* and 12 for even values of y */ + //*Multiply by 8 => left shift by 3*/ + i4_res_8x16b_r1_1 = _mm_mullo_epi16(i4_samp_8x16b_0, i4_coeff_8x16b_0); + i4_res_8x16b_r1_2 = _mm_mullo_epi16(i4_samp_8x16b_1, i4_coeff_8x16b_1); + i4_res_8x16b_r1_3 = _mm_slli_epi16(i4_samp_8x16b_2, 3); + + i4_res_8x16b_r2_1 = _mm_slli_epi16(i4_samp_8x16b_1, 3); + i4_res_8x16b_r2_2 = _mm_mullo_epi16(i4_samp_8x16b_2, i4_coeff_8x16b_1); + i4_res_8x16b_r2_3 = _mm_mullo_epi16(i4_samp_8x16b_3, i4_coeff_8x16b_0); + + i4_res_8x16b_r1_3 = _mm_subs_epi16(i4_res_8x16b_r1_3, i4_samp_8x16b_3); + i4_res_8x16b_r2_3 = _mm_subs_epi16(i4_res_8x16b_r2_3, i4_samp_8x16b_0); + + i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_2); + i4_res_8x16b_r2_1 = _mm_adds_epi16(i4_res_8x16b_r2_1, i4_res_8x16b_r2_2); + + i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_3); + i4_res_8x16b_r2_1 = _mm_adds_epi16(i4_res_8x16b_r2_1, i4_res_8x16b_r2_3); + + /* Storing the results */ + _mm_storeu_si128((__m128i *) pi2_tmp, i4_res_8x16b_r1_1); + _mm_storeu_si128((__m128i *) (pi2_tmp + i4_filt_stride), i4_res_8x16b_r2_1); + pi2_tmp += (i4_filt_stride << 1); + pu1_inp += i4_src_stride; + + } /* End of loop over y */ + + /* y = 15, y_phase = 4 */ + i4_samp_8x16b_0 = i4_samp_8x16b_1; + i4_samp_8x16b_1 = i4_samp_8x16b_2; + i4_samp_8x16b_2 = i4_samp_8x16b_3; + i4_samp_8x16b_3 = _mm_cvtepu8_epi16(_mm_loadl_epi64((__m128i *) (pu1_inp))); + + i4_res_8x16b_r1_1 = _mm_mullo_epi16(i4_samp_8x16b_0, i4_coeff_8x16b_0); + i4_res_8x16b_r1_2 = _mm_mullo_epi16(i4_samp_8x16b_1, i4_coeff_8x16b_1); + i4_res_8x16b_r1_3 = _mm_slli_epi16(i4_samp_8x16b_2, 3); + i4_res_8x16b_r1_3 = _mm_subs_epi16(i4_res_8x16b_r1_3, i4_samp_8x16b_3); + + i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_2); + i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_3); + + /* Store the output */ + _mm_storeu_si128((__m128i *) pi2_tmp, i4_res_8x16b_r1_1); + + /* Reinitializing the ptrs */ + pu1_inp = pu1_inp_buf; + pi2_tmp = pi2_tmp_filt_buf; + + /*Remaining 32 bit */ + pu1_inp += 8; + pi2_tmp += 8; + + /* y = 0, y_phase = 12 */ + i4_samp_16x8b_0 = _mm_loadl_epi64((__m128i *) (pu1_inp)); + i4_samp_16x8b_1 = _mm_loadl_epi64((__m128i *) (pu1_inp + i4_src_stride)); + i4_samp_16x8b_2 = _mm_loadl_epi64((__m128i *) (pu1_inp + (i4_src_stride << 1))); + i4_samp_16x8b_3 = + _mm_loadl_epi64((__m128i *) (pu1_inp + (i4_src_stride << 1) + i4_src_stride)); + pu1_inp += (i4_src_stride << 2); + i4_samp_8x16b_0 = _mm_cvtepu8_epi16(i4_samp_16x8b_0); + i4_samp_8x16b_1 = _mm_cvtepu8_epi16(i4_samp_16x8b_1); + i4_samp_8x16b_2 = _mm_cvtepu8_epi16(i4_samp_16x8b_2); + i4_samp_8x16b_3 = _mm_cvtepu8_epi16(i4_samp_16x8b_3); + + /* since y_phase 12 for y = 0 */ + /*Multiply by 8 => left shift by 3*/ + i4_res_8x16b_r1_1 = _mm_slli_epi16(i4_samp_8x16b_1, 3); + i4_res_8x16b_r1_2 = _mm_mullo_epi16(i4_samp_8x16b_2, i4_coeff_8x16b_1); + i4_res_8x16b_r1_3 = _mm_mullo_epi16(i4_samp_8x16b_3, i4_coeff_8x16b_0); + + i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_2); + i4_res_8x16b_r1_3 = _mm_subs_epi16(i4_res_8x16b_r1_3, i4_samp_8x16b_0); + i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_3); + + _mm_storel_epi64((__m128i *) pi2_tmp, i4_res_8x16b_r1_1); + pi2_tmp += i4_filt_stride; + + for(i4_y = 1; i4_y < 15; i4_y += 2) + { + i4_samp_8x16b_0 = i4_samp_8x16b_1; + i4_samp_8x16b_1 = i4_samp_8x16b_2; + i4_samp_8x16b_2 = i4_samp_8x16b_3; + i4_samp_8x16b_3 = _mm_cvtepu8_epi16(_mm_loadl_epi64((__m128i *) (pu1_inp))); + + /* y_phase is 4 for odd values of y */ + /* and 12 for even values of y */ + //*Multiply by 8 => left shift by 3*/ + i4_res_8x16b_r1_1 = _mm_mullo_epi16(i4_samp_8x16b_0, i4_coeff_8x16b_0); + i4_res_8x16b_r1_2 = _mm_mullo_epi16(i4_samp_8x16b_1, i4_coeff_8x16b_1); + i4_res_8x16b_r1_3 = _mm_slli_epi16(i4_samp_8x16b_2, 3); + + i4_res_8x16b_r2_1 = _mm_slli_epi16(i4_samp_8x16b_1, 3); + i4_res_8x16b_r2_2 = _mm_mullo_epi16(i4_samp_8x16b_2, i4_coeff_8x16b_1); + i4_res_8x16b_r2_3 = _mm_mullo_epi16(i4_samp_8x16b_3, i4_coeff_8x16b_0); + + i4_res_8x16b_r1_3 = _mm_subs_epi16(i4_res_8x16b_r1_3, i4_samp_8x16b_3); + i4_res_8x16b_r2_3 = _mm_subs_epi16(i4_res_8x16b_r2_3, i4_samp_8x16b_0); + + i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_2); + i4_res_8x16b_r2_1 = _mm_adds_epi16(i4_res_8x16b_r2_1, i4_res_8x16b_r2_2); + + i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_3); + i4_res_8x16b_r2_1 = _mm_adds_epi16(i4_res_8x16b_r2_1, i4_res_8x16b_r2_3); + + /* Storing the results */ + _mm_storel_epi64((__m128i *) pi2_tmp, i4_res_8x16b_r1_1); + _mm_storel_epi64((__m128i *) (pi2_tmp + i4_filt_stride), i4_res_8x16b_r2_1); + pi2_tmp += (i4_filt_stride << 1); + pu1_inp += i4_src_stride; + + } /* End of loop over y */ + + /* y = 15, y_phase = 4 */ + i4_samp_8x16b_0 = i4_samp_8x16b_1; + i4_samp_8x16b_1 = i4_samp_8x16b_2; + i4_samp_8x16b_2 = i4_samp_8x16b_3; + i4_samp_8x16b_3 = _mm_cvtepu8_epi16(_mm_loadl_epi64((__m128i *) (pu1_inp))); + + i4_res_8x16b_r1_1 = _mm_mullo_epi16(i4_samp_8x16b_0, i4_coeff_8x16b_0); + i4_res_8x16b_r1_2 = _mm_mullo_epi16(i4_samp_8x16b_1, i4_coeff_8x16b_1); + i4_res_8x16b_r1_3 = _mm_slli_epi16(i4_samp_8x16b_2, 3); + i4_res_8x16b_r1_3 = _mm_subs_epi16(i4_res_8x16b_r1_3, i4_samp_8x16b_3); + + i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_2); + i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_3); + + /* Store the output */ + _mm_storel_epi64((__m128i *) pi2_tmp, i4_res_8x16b_r1_1); + + /* Reinitializing the ptrs */ + pu1_inp = pu1_inp_buf; + pi2_tmp = pi2_tmp_filt_buf; + + { + __m128i coeff_c0_c1_8x16b = _mm_set_epi16(28, -3, 28, -3, 28, -3, 28, -3); + __m128i coeff_c2_c3_8x16b = _mm_set_epi16(-1, 8, -1, 8, -1, 8, -1, 8); + __m128i coeff_c3_c2_8x16b = _mm_set_epi16(8, -1, 8, -1, 8, -1, 8, -1); + __m128i coeff_c1_c0_8x16b = _mm_set_epi16(-3, 28, -3, 28, -3, 28, -3, 28); + + __m128i i4_samp_8x16b_rpart1_0, i4_samp_8x16b_rpart2_0; + __m128i i4_samp_8x16b_rpart1_1, i4_samp_8x16b_rpart2_1; + __m128i i4_samp_8x16b_rpart1_2, i4_samp_8x16b_rpart2_2; + __m128i i4_samp_8x16b_rpart1_3, i4_samp_8x16b_rpart2_3; + __m128i i4_samp_8x16b_rpart1_4, i4_samp_8x16b_rpart2_4; + + __m128i i4_res_4x32b_rpart1_0, i4_res_4x32b_rpart2_0; + __m128i i4_res_4x32b_rpart1_1, i4_res_4x32b_rpart2_1; + __m128i i4_res_4x32b_rpart1_2, i4_res_4x32b_rpart2_2; + __m128i i4_res_4x32b_rpart1_3, i4_res_4x32b_rpart2_3; + + __m128i res_512 = _mm_set1_epi32(512); + /* Horizontal interpolation */ + for(i4_y = 0; i4_y < 16; i4_y++) + { + i4_samp_8x16b_rpart1_0 = _mm_loadu_si128((__m128i *) pi2_tmp); + i4_samp_8x16b_rpart2_0 = _mm_loadu_si128((__m128i *) (pi2_tmp + 4)); + + i4_samp_8x16b_rpart1_1 = _mm_srli_si128(i4_samp_8x16b_rpart1_0, 2); + i4_samp_8x16b_rpart1_2 = _mm_srli_si128(i4_samp_8x16b_rpart1_0, 4); + i4_samp_8x16b_rpart1_3 = _mm_srli_si128(i4_samp_8x16b_rpart1_0, 6); + i4_samp_8x16b_rpart1_4 = _mm_srli_si128(i4_samp_8x16b_rpart1_0, 8); + + i4_samp_8x16b_rpart2_1 = _mm_srli_si128(i4_samp_8x16b_rpart2_0, 2); + i4_samp_8x16b_rpart2_2 = _mm_srli_si128(i4_samp_8x16b_rpart2_0, 4); + i4_samp_8x16b_rpart2_3 = _mm_srli_si128(i4_samp_8x16b_rpart2_0, 6); + i4_samp_8x16b_rpart2_4 = _mm_srli_si128(i4_samp_8x16b_rpart2_0, 8); + + i4_samp_8x16b_rpart1_0 = + _mm_unpacklo_epi16(i4_samp_8x16b_rpart1_0, i4_samp_8x16b_rpart1_1); + i4_samp_8x16b_rpart1_1 = + _mm_unpacklo_epi16(i4_samp_8x16b_rpart1_1, i4_samp_8x16b_rpart1_2); + i4_samp_8x16b_rpart1_2 = + _mm_unpacklo_epi16(i4_samp_8x16b_rpart1_2, i4_samp_8x16b_rpart1_3); + i4_samp_8x16b_rpart1_3 = + _mm_unpacklo_epi16(i4_samp_8x16b_rpart1_3, i4_samp_8x16b_rpart1_4); + + i4_samp_8x16b_rpart2_0 = + _mm_unpacklo_epi16(i4_samp_8x16b_rpart2_0, i4_samp_8x16b_rpart2_1); + i4_samp_8x16b_rpart2_1 = + _mm_unpacklo_epi16(i4_samp_8x16b_rpart2_1, i4_samp_8x16b_rpart2_2); + i4_samp_8x16b_rpart2_2 = + _mm_unpacklo_epi16(i4_samp_8x16b_rpart2_2, i4_samp_8x16b_rpart2_3); + i4_samp_8x16b_rpart2_3 = + _mm_unpacklo_epi16(i4_samp_8x16b_rpart2_3, i4_samp_8x16b_rpart2_4); + + i4_res_4x32b_rpart1_0 = _mm_madd_epi16(i4_samp_8x16b_rpart1_0, coeff_c3_c2_8x16b); + i4_res_4x32b_rpart1_2 = _mm_madd_epi16(i4_samp_8x16b_rpart1_2, coeff_c1_c0_8x16b); + + i4_res_4x32b_rpart1_1 = _mm_madd_epi16(i4_samp_8x16b_rpart1_1, coeff_c0_c1_8x16b); + i4_res_4x32b_rpart1_3 = _mm_madd_epi16(i4_samp_8x16b_rpart1_3, coeff_c2_c3_8x16b); + + i4_res_4x32b_rpart2_0 = _mm_madd_epi16(i4_samp_8x16b_rpart2_0, coeff_c3_c2_8x16b); + i4_res_4x32b_rpart2_2 = _mm_madd_epi16(i4_samp_8x16b_rpart2_2, coeff_c1_c0_8x16b); + + i4_res_4x32b_rpart2_1 = _mm_madd_epi16(i4_samp_8x16b_rpart2_1, coeff_c0_c1_8x16b); + i4_res_4x32b_rpart2_3 = _mm_madd_epi16(i4_samp_8x16b_rpart2_3, coeff_c2_c3_8x16b); + + i4_res_4x32b_rpart1_0 = _mm_add_epi32(i4_res_4x32b_rpart1_0, i4_res_4x32b_rpart1_2); + i4_res_4x32b_rpart1_1 = _mm_add_epi32(i4_res_4x32b_rpart1_1, i4_res_4x32b_rpart1_3); + + i4_res_4x32b_rpart2_0 = _mm_add_epi32(i4_res_4x32b_rpart2_0, i4_res_4x32b_rpart2_2); + i4_res_4x32b_rpart2_1 = _mm_add_epi32(i4_res_4x32b_rpart2_1, i4_res_4x32b_rpart2_3); + + i4_res_4x32b_rpart1_2 = + _mm_unpacklo_epi32(i4_res_4x32b_rpart1_0, i4_res_4x32b_rpart1_1); + i4_res_4x32b_rpart1_3 = + _mm_unpackhi_epi32(i4_res_4x32b_rpart1_0, i4_res_4x32b_rpart1_1); + + i4_res_4x32b_rpart2_2 = + _mm_unpacklo_epi32(i4_res_4x32b_rpart2_0, i4_res_4x32b_rpart2_1); + i4_res_4x32b_rpart2_3 = + _mm_unpackhi_epi32(i4_res_4x32b_rpart2_0, i4_res_4x32b_rpart2_1); + + i4_res_4x32b_rpart1_0 = _mm_add_epi32(i4_res_4x32b_rpart1_2, res_512); + i4_res_4x32b_rpart1_1 = _mm_add_epi32(i4_res_4x32b_rpart1_3, res_512); + + i4_res_4x32b_rpart1_0 = _mm_srai_epi32(i4_res_4x32b_rpart1_0, 10); + i4_res_4x32b_rpart1_1 = _mm_srai_epi32(i4_res_4x32b_rpart1_1, 10); + + i4_res_4x32b_rpart2_0 = _mm_add_epi32(i4_res_4x32b_rpart2_2, res_512); + i4_res_4x32b_rpart2_1 = _mm_add_epi32(i4_res_4x32b_rpart2_3, res_512); + + i4_res_4x32b_rpart2_0 = _mm_srai_epi32(i4_res_4x32b_rpart2_0, 10); + i4_res_4x32b_rpart2_1 = _mm_srai_epi32(i4_res_4x32b_rpart2_1, 10); + + _mm_storeu_si128( + (__m128i *) pu1_out, + _mm_packus_epi16(_mm_packus_epi32(i4_res_4x32b_rpart1_0, i4_res_4x32b_rpart1_1), + _mm_packus_epi32(i4_res_4x32b_rpart2_0, i4_res_4x32b_rpart2_1))); + + pi2_tmp += i4_filt_stride; + pu1_out += i4_out_stride; + + } /* End of loop over y */ + } +} + +void isvc_vert_interpol_chroma_dyadic_sse42(UWORD8 *pu1_inp_buf, WORD16 *pi2_tmp_filt_buf, + WORD32 i4_phase_0, WORD32 i4_phase_1) +{ + WORD8 i4_coeff_0, i4_coeff_1, i4_coeff_2, i4_coeff_3; + WORD32 i4_filt_stride, i4_src_stride; + UWORD8 *pu1_inp; + WORD16 *pi2_tmp; + __m128i i4_samp_16x8b_0, i4_samp_16x8b_1, i4_samp_16x8b_2, i4_samp_16x8b_3, i4_samp_16x8b_4, + i4_samp_16x8b_5; + __m128i i4_res_8x16b_r0, i4_res_8x16b_r1, i4_res_8x16b_r2, i4_res_8x16b_r3, i4_res_8x16b_r4, + i4_res_8x16b_r5, i4_res_8x16b_r6, i4_res_8x16b_r7; + __m128i i4_res_8x16b_r7_temp; + __m128i i4_c0_c1_16x8b, i4_c2_c3_16x8b; + + i4_coeff_0 = (WORD8) (16 - i4_phase_0); + i4_coeff_1 = (WORD8) (i4_phase_0); + i4_coeff_2 = (WORD8) (16 - i4_phase_1); + i4_coeff_3 = (WORD8) (i4_phase_1); + + i4_c0_c1_16x8b = + _mm_set_epi8(i4_coeff_1, i4_coeff_0, i4_coeff_1, i4_coeff_0, i4_coeff_1, i4_coeff_0, + i4_coeff_1, i4_coeff_0, i4_coeff_1, i4_coeff_0, i4_coeff_1, i4_coeff_0, + i4_coeff_1, i4_coeff_0, i4_coeff_1, i4_coeff_0); + i4_c2_c3_16x8b = + _mm_set_epi8(i4_coeff_3, i4_coeff_2, i4_coeff_3, i4_coeff_2, i4_coeff_3, i4_coeff_2, + i4_coeff_3, i4_coeff_2, i4_coeff_3, i4_coeff_2, i4_coeff_3, i4_coeff_2, + i4_coeff_3, i4_coeff_2, i4_coeff_3, i4_coeff_2); + + /* Initializing pointers */ + pu1_inp = pu1_inp_buf; + pi2_tmp = pi2_tmp_filt_buf; + i4_filt_stride = 6; + i4_src_stride = DYADIC_REF_W_C; + + i4_samp_16x8b_0 = _mm_loadu_si128((__m128i *) (pu1_inp)); + i4_samp_16x8b_1 = _mm_loadu_si128((__m128i *) (pu1_inp + i4_src_stride)); + i4_samp_16x8b_2 = _mm_loadu_si128((__m128i *) (pu1_inp + (i4_src_stride << 1))); + i4_samp_16x8b_3 = _mm_loadu_si128((__m128i *) (pu1_inp + (i4_src_stride << 1) + i4_src_stride)); + i4_samp_16x8b_4 = _mm_loadu_si128((__m128i *) (pu1_inp + (i4_src_stride << 2))); + i4_samp_16x8b_5 = _mm_loadu_si128((__m128i *) (pu1_inp + (i4_src_stride << 2) + i4_src_stride)); + + i4_samp_16x8b_0 = _mm_unpacklo_epi8(i4_samp_16x8b_0, i4_samp_16x8b_1); + i4_res_8x16b_r0 = _mm_maddubs_epi16(i4_samp_16x8b_0, i4_c0_c1_16x8b); + _mm_storeu_si128((__m128i *) (pi2_tmp), i4_res_8x16b_r0); + + i4_samp_16x8b_1 = _mm_unpacklo_epi8(i4_samp_16x8b_1, i4_samp_16x8b_2); + i4_res_8x16b_r1 = _mm_maddubs_epi16(i4_samp_16x8b_1, i4_c2_c3_16x8b); + _mm_storeu_si128((__m128i *) (pi2_tmp + i4_filt_stride), i4_res_8x16b_r1); + + i4_res_8x16b_r2 = _mm_maddubs_epi16(i4_samp_16x8b_1, i4_c0_c1_16x8b); + _mm_storeu_si128((__m128i *) (pi2_tmp + (i4_filt_stride << 1)), i4_res_8x16b_r2); + + i4_samp_16x8b_2 = _mm_unpacklo_epi8(i4_samp_16x8b_2, i4_samp_16x8b_3); + i4_res_8x16b_r3 = _mm_maddubs_epi16(i4_samp_16x8b_2, i4_c2_c3_16x8b); + _mm_storeu_si128((__m128i *) (pi2_tmp + (i4_filt_stride << 1) + i4_filt_stride), + i4_res_8x16b_r3); + + i4_res_8x16b_r4 = _mm_maddubs_epi16(i4_samp_16x8b_2, i4_c0_c1_16x8b); + _mm_storeu_si128((__m128i *) (pi2_tmp + (i4_filt_stride << 2)), i4_res_8x16b_r4); + + i4_samp_16x8b_3 = _mm_unpacklo_epi8(i4_samp_16x8b_3, i4_samp_16x8b_4); + i4_res_8x16b_r5 = _mm_maddubs_epi16(i4_samp_16x8b_3, i4_c2_c3_16x8b); + _mm_storeu_si128((__m128i *) (pi2_tmp + (i4_filt_stride << 2) + i4_filt_stride), + i4_res_8x16b_r5); + + i4_res_8x16b_r6 = _mm_maddubs_epi16(i4_samp_16x8b_3, i4_c0_c1_16x8b); + _mm_storel_epi64((__m128i *) (pi2_tmp + (i4_filt_stride << 2) + (i4_filt_stride << 1)), + i4_res_8x16b_r6); + + i4_res_8x16b_r6 = _mm_shuffle_epi32(i4_res_8x16b_r6, 78); + + i4_samp_16x8b_4 = _mm_unpacklo_epi8(i4_samp_16x8b_4, i4_samp_16x8b_5); + + i4_res_8x16b_r7 = _mm_maddubs_epi16(i4_samp_16x8b_4, i4_c2_c3_16x8b); + + i4_res_8x16b_r7 = _mm_shuffle_epi32(i4_res_8x16b_r7, 147); + + i4_res_8x16b_r7_temp = _mm_blend_epi16(i4_res_8x16b_r6, i4_res_8x16b_r7, 252); + + _mm_storeu_si128((__m128i *) (pi2_tmp + (i4_filt_stride << 2) + (i4_filt_stride << 1) + 4), + i4_res_8x16b_r7_temp); +} + +void isvc_horz_interpol_chroma_dyadic_sse42(WORD16 *pi2_tmp_filt_buf, UWORD8 *pu1_out_buf, + WORD32 i4_out_stride, WORD32 i4_phase_0, + WORD32 i4_phase_1) +{ + WORD32 i4_dst_stride, i4_dst_stride2, i4_dst_stride4; + UWORD8 *pu1_out; + WORD16 *pi2_tmp; + + __m128i i4_samp_8x16b_r1_0, i4_samp_8x16b_r1_1, i4_samp_8x16b_r1_2; + __m128i i4_samp_8x16b_r2_0, i4_samp_8x16b_r2_1, i4_samp_8x16b_r2_2; + __m128i i4_samp_8x16b_r3_0, i4_samp_8x16b_r3_1, i4_samp_8x16b_r3_2; + __m128i i4_samp_8x16b_r4_0, i4_samp_8x16b_r4_1, i4_samp_8x16b_r4_2; + __m128i i4_samp_8x16b_r5_0, i4_samp_8x16b_r5_1, i4_samp_8x16b_r5_2; + __m128i i4_samp_8x16b_r6_0, i4_samp_8x16b_r6_1, i4_samp_8x16b_r6_2; + __m128i i4_samp_8x16b_r7_0, i4_samp_8x16b_r7_1, i4_samp_8x16b_r7_2; + __m128i i4_samp_8x16b_r8_0, i4_samp_8x16b_r8_1, i4_samp_8x16b_r8_2; + + __m128i i4_res_4x32b_r1_0, i4_res_4x32b_r1_1; + __m128i i4_res_4x32b_r2_0, i4_res_4x32b_r2_1; + __m128i i4_res_4x32b_r3_0, i4_res_4x32b_r3_1; + __m128i i4_res_4x32b_r4_0, i4_res_4x32b_r4_1; + __m128i i4_res_4x32b_r5_0, i4_res_4x32b_r5_1; + __m128i i4_res_4x32b_r6_0, i4_res_4x32b_r6_1; + __m128i i4_res_4x32b_r7_0, i4_res_4x32b_r7_1; + __m128i i4_res_4x32b_r8_0, i4_res_4x32b_r8_1; + + __m128i i4_res_final_8x16b_r1, i4_res_final_8x16b_r2, i4_res_final_8x16b_r3, + i4_res_final_8x16b_r4, i4_res_final_8x16b_r5, i4_res_final_8x16b_r6, i4_res_final_8x16b_r7, + i4_res_final_8x16b_r8; + + __m128i out_16x8b_r1, out_16x8b_r2, out_16x8b_r3, out_16x8b_r4, out_16x8b_r5, out_16x8b_r6, + out_16x8b_r7, out_16x8b_r8; + + __m128i i4_res_final_8x16b_r12_0, i4_res_final_8x16b_r12_1; + __m128i i4_res_final_8x16b_r34_0, i4_res_final_8x16b_r34_1; + __m128i i4_res_final_8x16b_r56_0, i4_res_final_8x16b_r56_1; + __m128i i4_res_final_8x16b_r67_0, i4_res_final_8x16b_r67_1; + __m128i chroma_mask, chroma_mask2; + + WORD32 i4_coeff_0 = 16 - i4_phase_0; + WORD32 i4_coeff_1 = i4_phase_0; + WORD32 i4_coeff_2 = 16 - i4_phase_1; + WORD32 i4_coeff_3 = i4_phase_1; + __m128i coeff_c0_c1_8x16b = _mm_set_epi16(i4_coeff_1, i4_coeff_0, i4_coeff_1, i4_coeff_0, + i4_coeff_1, i4_coeff_0, i4_coeff_1, i4_coeff_0); + __m128i coeff_c2_c3_8x16b = _mm_set_epi16(i4_coeff_3, i4_coeff_2, i4_coeff_3, i4_coeff_2, + i4_coeff_3, i4_coeff_2, i4_coeff_3, i4_coeff_2); + __m128i res_128 = _mm_set1_epi32(128); + UWORD32 u4_norm_factor = 8; + + /* Initializing pointers */ + pu1_out = pu1_out_buf; + pi2_tmp = pi2_tmp_filt_buf; + i4_dst_stride = i4_out_stride; + + i4_dst_stride2 = i4_dst_stride << 1; + i4_dst_stride4 = i4_dst_stride << 2; + + /* Horizontal interpolation */ + i4_samp_8x16b_r1_0 = _mm_loadu_si128((__m128i *) pi2_tmp); + i4_samp_8x16b_r2_0 = _mm_loadu_si128((__m128i *) (pi2_tmp + 6)); + i4_samp_8x16b_r3_0 = _mm_loadu_si128((__m128i *) (pi2_tmp + 12)); + i4_samp_8x16b_r4_0 = _mm_loadu_si128((__m128i *) (pi2_tmp + 18)); + i4_samp_8x16b_r5_0 = _mm_loadu_si128((__m128i *) (pi2_tmp + 24)); + i4_samp_8x16b_r6_0 = _mm_loadu_si128((__m128i *) (pi2_tmp + 30)); + i4_samp_8x16b_r7_0 = _mm_loadu_si128((__m128i *) (pi2_tmp + 36)); + i4_samp_8x16b_r8_0 = _mm_loadu_si128((__m128i *) (pi2_tmp + 42)); + + i4_samp_8x16b_r1_1 = _mm_srli_si128(i4_samp_8x16b_r1_0, 2); + i4_samp_8x16b_r1_2 = _mm_srli_si128(i4_samp_8x16b_r1_0, 4); + + i4_samp_8x16b_r2_1 = _mm_srli_si128(i4_samp_8x16b_r2_0, 2); + i4_samp_8x16b_r2_2 = _mm_srli_si128(i4_samp_8x16b_r2_0, 4); + + i4_samp_8x16b_r3_1 = _mm_srli_si128(i4_samp_8x16b_r3_0, 2); + i4_samp_8x16b_r3_2 = _mm_srli_si128(i4_samp_8x16b_r3_0, 4); + + i4_samp_8x16b_r4_1 = _mm_srli_si128(i4_samp_8x16b_r4_0, 2); + i4_samp_8x16b_r4_2 = _mm_srli_si128(i4_samp_8x16b_r4_0, 4); + + i4_samp_8x16b_r5_1 = _mm_srli_si128(i4_samp_8x16b_r5_0, 2); + i4_samp_8x16b_r5_2 = _mm_srli_si128(i4_samp_8x16b_r5_0, 4); + + i4_samp_8x16b_r6_1 = _mm_srli_si128(i4_samp_8x16b_r6_0, 2); + i4_samp_8x16b_r6_2 = _mm_srli_si128(i4_samp_8x16b_r6_0, 4); + + i4_samp_8x16b_r7_1 = _mm_srli_si128(i4_samp_8x16b_r7_0, 2); + i4_samp_8x16b_r7_2 = _mm_srli_si128(i4_samp_8x16b_r7_0, 4); + + i4_samp_8x16b_r8_1 = _mm_srli_si128(i4_samp_8x16b_r8_0, 2); + i4_samp_8x16b_r8_2 = _mm_srli_si128(i4_samp_8x16b_r8_0, 4); + + i4_samp_8x16b_r1_0 = _mm_unpacklo_epi16(i4_samp_8x16b_r1_0, i4_samp_8x16b_r1_1); + i4_samp_8x16b_r2_0 = _mm_unpacklo_epi16(i4_samp_8x16b_r2_0, i4_samp_8x16b_r2_1); + i4_samp_8x16b_r3_0 = _mm_unpacklo_epi16(i4_samp_8x16b_r3_0, i4_samp_8x16b_r3_1); + i4_samp_8x16b_r4_0 = _mm_unpacklo_epi16(i4_samp_8x16b_r4_0, i4_samp_8x16b_r4_1); + i4_samp_8x16b_r5_0 = _mm_unpacklo_epi16(i4_samp_8x16b_r5_0, i4_samp_8x16b_r5_1); + i4_samp_8x16b_r6_0 = _mm_unpacklo_epi16(i4_samp_8x16b_r6_0, i4_samp_8x16b_r6_1); + i4_samp_8x16b_r7_0 = _mm_unpacklo_epi16(i4_samp_8x16b_r7_0, i4_samp_8x16b_r7_1); + i4_samp_8x16b_r8_0 = _mm_unpacklo_epi16(i4_samp_8x16b_r8_0, i4_samp_8x16b_r8_1); + + i4_samp_8x16b_r1_1 = _mm_unpacklo_epi16(i4_samp_8x16b_r1_1, i4_samp_8x16b_r1_2); + i4_samp_8x16b_r2_1 = _mm_unpacklo_epi16(i4_samp_8x16b_r2_1, i4_samp_8x16b_r2_2); + i4_samp_8x16b_r3_1 = _mm_unpacklo_epi16(i4_samp_8x16b_r3_1, i4_samp_8x16b_r3_2); + i4_samp_8x16b_r4_1 = _mm_unpacklo_epi16(i4_samp_8x16b_r4_1, i4_samp_8x16b_r4_2); + i4_samp_8x16b_r5_1 = _mm_unpacklo_epi16(i4_samp_8x16b_r5_1, i4_samp_8x16b_r5_2); + i4_samp_8x16b_r6_1 = _mm_unpacklo_epi16(i4_samp_8x16b_r6_1, i4_samp_8x16b_r6_2); + i4_samp_8x16b_r7_1 = _mm_unpacklo_epi16(i4_samp_8x16b_r7_1, i4_samp_8x16b_r7_2); + i4_samp_8x16b_r8_1 = _mm_unpacklo_epi16(i4_samp_8x16b_r8_1, i4_samp_8x16b_r8_2); + + // a0c0 + a1c1 a1c0 + a2c1 a2c0 + a3c1 a3c0 + a4c1 + i4_res_4x32b_r1_0 = _mm_madd_epi16(i4_samp_8x16b_r1_0, coeff_c0_c1_8x16b); + // b0c0+b1c1 b1c0+b2c1 b2c0+b3c1 b3c0+b4c1 + i4_res_4x32b_r2_0 = _mm_madd_epi16(i4_samp_8x16b_r2_0, coeff_c0_c1_8x16b); + i4_res_4x32b_r3_0 = _mm_madd_epi16(i4_samp_8x16b_r3_0, coeff_c0_c1_8x16b); + i4_res_4x32b_r4_0 = _mm_madd_epi16(i4_samp_8x16b_r4_0, coeff_c0_c1_8x16b); + i4_res_4x32b_r5_0 = _mm_madd_epi16(i4_samp_8x16b_r5_0, coeff_c0_c1_8x16b); + i4_res_4x32b_r6_0 = _mm_madd_epi16(i4_samp_8x16b_r6_0, coeff_c0_c1_8x16b); + i4_res_4x32b_r7_0 = _mm_madd_epi16(i4_samp_8x16b_r7_0, coeff_c0_c1_8x16b); + i4_res_4x32b_r8_0 = _mm_madd_epi16(i4_samp_8x16b_r8_0, coeff_c0_c1_8x16b); + + // a1c2+a2c3 a2c2+a3c3 a3c2+a4c3 a4c2+a5c3 + i4_res_4x32b_r1_1 = _mm_madd_epi16(i4_samp_8x16b_r1_1, coeff_c2_c3_8x16b); + // b1c2+b2c3 b2c2+b3c3 b3c2+b4c3 b4c2+b5c3 + i4_res_4x32b_r2_1 = _mm_madd_epi16(i4_samp_8x16b_r2_1, coeff_c2_c3_8x16b); + i4_res_4x32b_r3_1 = _mm_madd_epi16(i4_samp_8x16b_r3_1, coeff_c2_c3_8x16b); + i4_res_4x32b_r4_1 = _mm_madd_epi16(i4_samp_8x16b_r4_1, coeff_c2_c3_8x16b); + i4_res_4x32b_r5_1 = _mm_madd_epi16(i4_samp_8x16b_r5_1, coeff_c2_c3_8x16b); + i4_res_4x32b_r6_1 = _mm_madd_epi16(i4_samp_8x16b_r6_1, coeff_c2_c3_8x16b); + i4_res_4x32b_r7_1 = _mm_madd_epi16(i4_samp_8x16b_r7_1, coeff_c2_c3_8x16b); + i4_res_4x32b_r8_1 = _mm_madd_epi16(i4_samp_8x16b_r8_1, coeff_c2_c3_8x16b); + + i4_res_4x32b_r1_0 = _mm_add_epi32(i4_res_4x32b_r1_0, res_128); + i4_res_4x32b_r2_0 = _mm_add_epi32(i4_res_4x32b_r2_0, res_128); + i4_res_4x32b_r3_0 = _mm_add_epi32(i4_res_4x32b_r3_0, res_128); + i4_res_4x32b_r4_0 = _mm_add_epi32(i4_res_4x32b_r4_0, res_128); + i4_res_4x32b_r5_0 = _mm_add_epi32(i4_res_4x32b_r5_0, res_128); + i4_res_4x32b_r6_0 = _mm_add_epi32(i4_res_4x32b_r6_0, res_128); + i4_res_4x32b_r7_0 = _mm_add_epi32(i4_res_4x32b_r7_0, res_128); + i4_res_4x32b_r8_0 = _mm_add_epi32(i4_res_4x32b_r8_0, res_128); + + i4_res_4x32b_r1_1 = _mm_add_epi32(i4_res_4x32b_r1_1, res_128); + i4_res_4x32b_r2_1 = _mm_add_epi32(i4_res_4x32b_r2_1, res_128); + i4_res_4x32b_r3_1 = _mm_add_epi32(i4_res_4x32b_r3_1, res_128); + i4_res_4x32b_r4_1 = _mm_add_epi32(i4_res_4x32b_r4_1, res_128); + i4_res_4x32b_r5_1 = _mm_add_epi32(i4_res_4x32b_r5_1, res_128); + i4_res_4x32b_r6_1 = _mm_add_epi32(i4_res_4x32b_r6_1, res_128); + i4_res_4x32b_r7_1 = _mm_add_epi32(i4_res_4x32b_r7_1, res_128); + i4_res_4x32b_r8_1 = _mm_add_epi32(i4_res_4x32b_r8_1, res_128); + + i4_res_4x32b_r1_0 = _mm_srai_epi32(i4_res_4x32b_r1_0, u4_norm_factor); + i4_res_4x32b_r2_0 = _mm_srai_epi32(i4_res_4x32b_r2_0, u4_norm_factor); + i4_res_4x32b_r3_0 = _mm_srai_epi32(i4_res_4x32b_r3_0, u4_norm_factor); + i4_res_4x32b_r4_0 = _mm_srai_epi32(i4_res_4x32b_r4_0, u4_norm_factor); + i4_res_4x32b_r5_0 = _mm_srai_epi32(i4_res_4x32b_r5_0, u4_norm_factor); + i4_res_4x32b_r6_0 = _mm_srai_epi32(i4_res_4x32b_r6_0, u4_norm_factor); + i4_res_4x32b_r7_0 = _mm_srai_epi32(i4_res_4x32b_r7_0, u4_norm_factor); + i4_res_4x32b_r8_0 = _mm_srai_epi32(i4_res_4x32b_r8_0, u4_norm_factor); + + i4_res_4x32b_r1_1 = _mm_srai_epi32(i4_res_4x32b_r1_1, u4_norm_factor); + i4_res_4x32b_r2_1 = _mm_srai_epi32(i4_res_4x32b_r2_1, u4_norm_factor); + i4_res_4x32b_r3_1 = _mm_srai_epi32(i4_res_4x32b_r3_1, u4_norm_factor); + i4_res_4x32b_r4_1 = _mm_srai_epi32(i4_res_4x32b_r4_1, u4_norm_factor); + i4_res_4x32b_r5_1 = _mm_srai_epi32(i4_res_4x32b_r5_1, u4_norm_factor); + i4_res_4x32b_r6_1 = _mm_srai_epi32(i4_res_4x32b_r6_1, u4_norm_factor); + i4_res_4x32b_r7_1 = _mm_srai_epi32(i4_res_4x32b_r7_1, u4_norm_factor); + i4_res_4x32b_r8_1 = _mm_srai_epi32(i4_res_4x32b_r8_1, u4_norm_factor); + + i4_res_final_8x16b_r12_0 = _mm_packs_epi32(i4_res_4x32b_r1_0, i4_res_4x32b_r2_0); + i4_res_final_8x16b_r34_0 = _mm_packs_epi32(i4_res_4x32b_r3_0, i4_res_4x32b_r4_0); + i4_res_final_8x16b_r56_0 = _mm_packs_epi32(i4_res_4x32b_r5_0, i4_res_4x32b_r6_0); + i4_res_final_8x16b_r67_0 = _mm_packs_epi32(i4_res_4x32b_r7_0, i4_res_4x32b_r8_0); + + i4_res_final_8x16b_r12_1 = _mm_packs_epi32(i4_res_4x32b_r1_1, i4_res_4x32b_r2_1); + i4_res_final_8x16b_r34_1 = _mm_packs_epi32(i4_res_4x32b_r3_1, i4_res_4x32b_r4_1); + i4_res_final_8x16b_r56_1 = _mm_packs_epi32(i4_res_4x32b_r5_1, i4_res_4x32b_r6_1); + i4_res_final_8x16b_r67_1 = _mm_packs_epi32(i4_res_4x32b_r7_1, i4_res_4x32b_r8_1); + + i4_res_final_8x16b_r1 = _mm_unpacklo_epi16(i4_res_final_8x16b_r12_0, i4_res_final_8x16b_r12_1); + i4_res_final_8x16b_r2 = _mm_unpackhi_epi16(i4_res_final_8x16b_r12_0, i4_res_final_8x16b_r12_1); + i4_res_final_8x16b_r3 = _mm_unpacklo_epi16(i4_res_final_8x16b_r34_0, i4_res_final_8x16b_r34_1); + i4_res_final_8x16b_r4 = _mm_unpackhi_epi16(i4_res_final_8x16b_r34_0, i4_res_final_8x16b_r34_1); + i4_res_final_8x16b_r5 = _mm_unpacklo_epi16(i4_res_final_8x16b_r56_0, i4_res_final_8x16b_r56_1); + i4_res_final_8x16b_r6 = _mm_unpackhi_epi16(i4_res_final_8x16b_r56_0, i4_res_final_8x16b_r56_1); + i4_res_final_8x16b_r7 = _mm_unpacklo_epi16(i4_res_final_8x16b_r67_0, i4_res_final_8x16b_r67_1); + i4_res_final_8x16b_r8 = _mm_unpackhi_epi16(i4_res_final_8x16b_r67_0, i4_res_final_8x16b_r67_1); + + chroma_mask = _mm_set1_epi16(0xFF00); + chroma_mask2 = _mm_set1_epi16(0x00FF); + out_16x8b_r1 = _mm_loadu_si128((__m128i *) (&pu1_out[0])); + out_16x8b_r2 = _mm_loadu_si128((__m128i *) (&pu1_out[i4_dst_stride])); + out_16x8b_r3 = _mm_loadu_si128((__m128i *) (&pu1_out[i4_dst_stride2])); + out_16x8b_r4 = _mm_loadu_si128((__m128i *) (&pu1_out[i4_dst_stride2 + i4_dst_stride])); + out_16x8b_r5 = _mm_loadu_si128((__m128i *) (&pu1_out[i4_dst_stride4])); + out_16x8b_r6 = _mm_loadu_si128((__m128i *) (&pu1_out[i4_dst_stride4 + i4_dst_stride])); + out_16x8b_r7 = _mm_loadu_si128((__m128i *) (&pu1_out[i4_dst_stride4 + i4_dst_stride2])); + out_16x8b_r8 = + _mm_loadu_si128((__m128i *) (&pu1_out[i4_dst_stride4 + i4_dst_stride2 + i4_dst_stride])); + + out_16x8b_r1 = _mm_and_si128(out_16x8b_r1, chroma_mask); + out_16x8b_r2 = _mm_and_si128(out_16x8b_r2, chroma_mask); + out_16x8b_r3 = _mm_and_si128(out_16x8b_r3, chroma_mask); + out_16x8b_r4 = _mm_and_si128(out_16x8b_r4, chroma_mask); + out_16x8b_r5 = _mm_and_si128(out_16x8b_r5, chroma_mask); + out_16x8b_r6 = _mm_and_si128(out_16x8b_r6, chroma_mask); + out_16x8b_r7 = _mm_and_si128(out_16x8b_r7, chroma_mask); + out_16x8b_r8 = _mm_and_si128(out_16x8b_r8, chroma_mask); + + i4_res_final_8x16b_r1 = _mm_and_si128(i4_res_final_8x16b_r1, chroma_mask2); + i4_res_final_8x16b_r2 = _mm_and_si128(i4_res_final_8x16b_r2, chroma_mask2); + i4_res_final_8x16b_r3 = _mm_and_si128(i4_res_final_8x16b_r3, chroma_mask2); + i4_res_final_8x16b_r4 = _mm_and_si128(i4_res_final_8x16b_r4, chroma_mask2); + i4_res_final_8x16b_r5 = _mm_and_si128(i4_res_final_8x16b_r5, chroma_mask2); + i4_res_final_8x16b_r6 = _mm_and_si128(i4_res_final_8x16b_r6, chroma_mask2); + i4_res_final_8x16b_r7 = _mm_and_si128(i4_res_final_8x16b_r7, chroma_mask2); + i4_res_final_8x16b_r8 = _mm_and_si128(i4_res_final_8x16b_r8, chroma_mask2); + + out_16x8b_r1 = _mm_add_epi8(i4_res_final_8x16b_r1, out_16x8b_r1); + out_16x8b_r2 = _mm_add_epi8(i4_res_final_8x16b_r2, out_16x8b_r2); + out_16x8b_r3 = _mm_add_epi8(i4_res_final_8x16b_r3, out_16x8b_r3); + out_16x8b_r4 = _mm_add_epi8(i4_res_final_8x16b_r4, out_16x8b_r4); + out_16x8b_r5 = _mm_add_epi8(i4_res_final_8x16b_r5, out_16x8b_r5); + out_16x8b_r6 = _mm_add_epi8(i4_res_final_8x16b_r6, out_16x8b_r6); + out_16x8b_r7 = _mm_add_epi8(i4_res_final_8x16b_r7, out_16x8b_r7); + out_16x8b_r8 = _mm_add_epi8(i4_res_final_8x16b_r8, out_16x8b_r8); + + _mm_storeu_si128((__m128i *) pu1_out, out_16x8b_r1); + _mm_storeu_si128((__m128i *) (pu1_out + i4_dst_stride), out_16x8b_r2); + _mm_storeu_si128((__m128i *) (pu1_out + (i4_dst_stride * 2)), out_16x8b_r3); + _mm_storeu_si128((__m128i *) (pu1_out + (i4_dst_stride * 3)), out_16x8b_r4); + _mm_storeu_si128((__m128i *) (pu1_out + (i4_dst_stride * 4)), out_16x8b_r5); + _mm_storeu_si128((__m128i *) (pu1_out + (i4_dst_stride * 5)), out_16x8b_r6); + _mm_storeu_si128((__m128i *) (pu1_out + (i4_dst_stride * 6)), out_16x8b_r7); + _mm_storeu_si128((__m128i *) (pu1_out + (i4_dst_stride * 7)), out_16x8b_r8); +} diff --git a/common/x86/svc/isvc_iquant_itrans_recon_dc_ssse3.c b/common/x86/svc/isvc_iquant_itrans_recon_dc_ssse3.c new file mode 100644 index 0000000..e4e6c27 --- /dev/null +++ b/common/x86/svc/isvc_iquant_itrans_recon_dc_ssse3.c @@ -0,0 +1,548 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ +/** + ******************************************************************************* + * @file + * isvc_iquant_itrans_recon_dc_ssse3.c + * + * @brief + * Contains function definitions for inverse quantization, inverse + * transform and reconstruction + * + * @author + * Mohit [100664] + * + * @par List of Functions: + * - isvc_iquant_itrans_recon_4x4_dc_ssse3() + * - isvc_iquant_itrans_recon_8x8_dc_ssse3() + * + * @remarks + * None + * + ******************************************************************************* + */ +#include + +#include "ih264_typedefs.h" +#include "ih264_debug.h" +#include "ih264_defs.h" +#include "ih264_trans_macros.h" +#include "ih264_macros.h" +#include "ih264_platform_macros.h" +#include "ih264_trans_data.h" +#include "ih264_size_defs.h" +#include "isvc_structs.h" +#include "isvc_trans_quant_itrans_iquant.h" + +/* + ******************************************************************************** + * + * @brief This function reconstructs a 4x4 sub block from quantized resiude and + * prediction buffer for dc input pattern only, i.e. only the (0,0) element of + *the input 4x4 block is non-zero. For complete function, refer + *isvc_iquant_itrans_recon_ssse3.c + * + * @par Description: + * The quantized residue is first inverse quantized, then inverse transformed. + * This inverse transformed content is added to the prediction buffer to recon- + * struct the end output + * + * @param[in] pi2_src + * quantized 4x4 block + * + * @param[in] pu1_pred + * prediction 4x4 block + * + * @param[out] pu1_out + * reconstructed 4x4 block + * + * @param[in] src_strd + * quantization buffer stride + * + * @param[in] i4_pred_stride, + * Prediction buffer stride + * + * @param[in] i4_out_stride + * recon buffer Stride + * + * @param[in] pu2_scaling_list + * pointer to scaling list + * + * @param[in] pu2_norm_adjust + * pointer to inverse scale matrix + * + * @param[in] u4_qp_div_6 + * Floor (qp/6) + * + * @param[in] pi4_tmp + * temporary buffer of size 1*16 + * + * @returns none + * + * @remarks none + * + ******************************************************************************* + */ +void isvc_iquant_itrans_recon_4x4_dc_ssse3(buffer_container_t *ps_src, buffer_container_t *ps_pred, + buffer_container_t *ps_res_pred, + buffer_container_t *ps_res, buffer_container_t *ps_rec, + iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants, + WORD16 *pi2_tmp, WORD16 *pi2_dc_src, + WORD32 i4_iq_start_idx, UWORD8 u1_res_accumulate) +{ + WORD16 *pi2_src = ps_src->pv_data; + WORD16 *pi2_res = ps_res->pv_data; + WORD16 *pi2_res_pred = ps_res_pred->pv_data; + UWORD8 *pu1_pred = ps_pred->pv_data; + UWORD8 *pu1_out = ps_rec->pv_data; + WORD32 i4_src_stride = ps_src->i4_data_stride; + WORD32 i4_res_stride = ps_res->i4_data_stride; + WORD32 i4_res_pred_stride = ps_res_pred->i4_data_stride; + WORD32 i4_pred_stride = ps_pred->i4_data_stride; + WORD32 i4_out_stride = ps_rec->i4_data_stride; + const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat; + const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat; + UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6; + UWORD32 *pu4_out = (UWORD32 *) pu1_out; + WORD32 q0 = pi2_src[0]; + WORD16 i_macro, rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0; + + __m128i predload_r, pred_r0, pred_r1, pred_r2, pred_r3; + __m128i sign_reg; + __m128i zero_8x16b = _mm_setzero_si128(); // all bits reset to zero + __m128i temp4, temp5, temp6, temp7; + __m128i value_add; + + UNUSED(pi2_tmp); + UNUSED(u1_res_accumulate); + UNUSED(i4_src_stride); + UNUSED(i4_res_stride); + UNUSED(i4_res_pred_stride); + UNUSED(pi2_res); + UNUSED(pi2_res_pred); + UNUSED(i4_iq_start_idx); + + /* Implement residue accumulation */ + ASSERT(0); + + INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4); + + if(i4_iq_start_idx != 0) q0 = pi2_dc_src[0]; // Restoring dc value for intra case + + i_macro = ((q0 + 32) >> 6); + + value_add = _mm_set1_epi16(i_macro); + + zero_8x16b = _mm_setzero_si128(); // all bits reset to zero + // Load pred buffer + predload_r = _mm_loadl_epi64((__m128i *) (&pu1_pred[0])); // p00 p01 p02 p03 0 0 0 0 0 + // 0 0 0 -- all 8 bits + pred_r0 = _mm_unpacklo_epi8(predload_r, zero_8x16b); // p00 p01 p02 p03 0 0 0 0 -- all 16 bits + predload_r = + _mm_loadl_epi64((__m128i *) (&pu1_pred[i4_pred_stride])); // p10 p11 p12 p13 0 0 0 0 0 0 + // 0 0 -- all 8 bits + pred_r1 = _mm_unpacklo_epi8(predload_r, zero_8x16b); // p10 p11 p12 p13 0 0 0 0 -- all 16 bits + predload_r = + _mm_loadl_epi64((__m128i *) (&pu1_pred[2 * i4_pred_stride])); // p20 p21 p22 p23 0 0 0 0 + // 0 0 0 0 -- all 8 bits + pred_r2 = _mm_unpacklo_epi8(predload_r, zero_8x16b); // p20 p21 p22 p23 0 0 0 0 -- all 16 bits + predload_r = + _mm_loadl_epi64((__m128i *) (&pu1_pred[3 * i4_pred_stride])); // p30 p31 p32 p33 0 0 0 0 + // 0 0 0 0 -- all 8 bits + pred_r3 = _mm_unpacklo_epi8(predload_r, zero_8x16b); // p30 p31 p32 p33 0 0 0 0 -- all 16 bits + + pred_r0 = _mm_unpacklo_epi64(pred_r0, pred_r1); // p00 p01 p02 p03 p10 p11 p12 p13 + pred_r2 = _mm_unpacklo_epi64(pred_r2, pred_r3); // p20 p21 p22p p23 p30 p31 p32 p33 + + temp4 = _mm_add_epi16(value_add, pred_r0); + temp5 = _mm_add_epi16(value_add, pred_r2); + /*------------------------------------------------------------------*/ + // Clipping the results to 8 bits + sign_reg = _mm_cmpgt_epi16(temp4, zero_8x16b); // sign check + temp4 = _mm_and_si128(temp4, sign_reg); + sign_reg = _mm_cmpgt_epi16(temp5, zero_8x16b); // sign check + temp5 = _mm_and_si128(temp5, sign_reg); + + temp4 = _mm_packus_epi16(temp4, temp5); + temp5 = _mm_srli_si128(temp4, 4); + temp6 = _mm_srli_si128(temp5, 4); + temp7 = _mm_srli_si128(temp6, 4); + + *pu4_out = _mm_cvtsi128_si32(temp4); + pu1_out += i4_out_stride; + pu4_out = (UWORD32 *) (pu1_out); + *(pu4_out) = _mm_cvtsi128_si32(temp5); + pu1_out += i4_out_stride; + pu4_out = (UWORD32 *) (pu1_out); + *(pu4_out) = _mm_cvtsi128_si32(temp6); + pu1_out += i4_out_stride; + pu4_out = (UWORD32 *) (pu1_out); + *(pu4_out) = _mm_cvtsi128_si32(temp7); +} + +/** + ******************************************************************************* + * + * @brief + * This function performs inverse quant and Inverse transform type Ci4 for 8x8 + *block for dc input pattern only, i.e. only the (0,0) element of the input 8x8 + *block is non-zero. For complete function, refer + *isvc_iquant_itrans_recon_ssse3.c + * + * @par Description: + * Performs inverse transform Ci8 and adds the residue to get the + * reconstructed block + * + * @param[in] pi2_src + * Input 8x8coefficients + * + * @param[in] pu1_pred + * Prediction 8x8 block + * + * @param[out] pu1_recon + * Output 8x8 block + * + * @param[in] q_div + * QP/6 + * + * @param[in] q_rem + * QP%6 + * + * @param[in] q_lev + * Quantizer level + * + * @param[in] u4_src_stride + * Input stride + * + * @param[in] u4_pred_stride, + * Prediction stride + * + * @param[in] u4_out_stride + * Output Stride + * + * @param[in] pi4_tmp + * temporary buffer of size 1*64 + * the tmp for each block + * + * @param[in] pu4_iquant_mat + * Pointer to the inverse quantization matrix + * + * @returns Void + * + * @remarks + * None + * + ******************************************************************************* + */ + +void isvc_iquant_itrans_recon_8x8_dc_ssse3(buffer_container_t *ps_src, buffer_container_t *ps_pred, + buffer_container_t *ps_res_pred, + buffer_container_t *ps_res, buffer_container_t *ps_rec, + iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants, + WORD16 *pi2_tmp, WORD16 *pi2_dc_src, + WORD32 i4_iq_start_idx, UWORD8 u1_res_accumulate) +{ + WORD16 *pi2_src = ps_src->pv_data; + WORD16 *pi2_res = ps_res->pv_data; + WORD16 *pi2_res_pred = ps_res_pred->pv_data; + UWORD8 *pu1_pred = ps_pred->pv_data; + UWORD8 *pu1_out = ps_rec->pv_data; + WORD32 i4_src_stride = ps_src->i4_data_stride; + WORD32 i4_res_stride = ps_res->i4_data_stride; + WORD32 i4_res_pred_stride = ps_res_pred->i4_data_stride; + WORD32 i4_pred_stride = ps_pred->i4_data_stride; + WORD32 i4_out_stride = ps_rec->i4_data_stride; + const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat; + const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat; + UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6; + WORD32 q0 = pi2_src[0]; + WORD16 i_macro, rnd_fact = (u4_qp_div_6 < 6) ? 1 << (5 - u4_qp_div_6) : 0; + + __m128i predload_r, pred_r0, pred_r1, pred_r2, pred_r3, pred_r4, pred_r5, pred_r6, pred_r7; + __m128i sign_reg; + __m128i zero_8x16b = _mm_setzero_si128(); // all bits reset to zero + __m128i temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8; + __m128i value_add; + + UNUSED(pi2_tmp); + UNUSED(pi2_dc_src); + UNUSED(u1_res_accumulate); + UNUSED(i4_src_stride); + UNUSED(i4_res_stride); + UNUSED(i4_res_pred_stride); + UNUSED(pi2_res); + UNUSED(pi2_res_pred); + UNUSED(i4_iq_start_idx); + + /* Implement residue accumulation */ + ASSERT(0); + + INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 6); + i_macro = ((q0 + 32) >> 6); + + value_add = _mm_set1_epi16(i_macro); + + // Load pred buffer row 0 + predload_r = + _mm_loadl_epi64((__m128i *) (&pu1_pred[0])); // p0 p1 p2 p3 p4 p5 p6 p7 0 0 0 0 0 0 0 0 + // -- all 8 bits + pred_r0 = _mm_unpacklo_epi8(predload_r, zero_8x16b); // p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits + // Load pred buffer row 1 + predload_r = + _mm_loadl_epi64((__m128i *) (&pu1_pred[i4_pred_stride])); // p0 p1 p2 p3 p4 p5 p6 p7 0 0 + // 0 0 0 0 0 0 -- all 8 bits + pred_r1 = _mm_unpacklo_epi8(predload_r, zero_8x16b); // p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits + // Load pred buffer row 2 + predload_r = _mm_loadl_epi64( + (__m128i *) (&pu1_pred[2 * i4_pred_stride])); // p0 p1 p2 p3 p4 p5 p6 p7 0 0 + // 0 0 0 0 0 0 -- all 8 bits + pred_r2 = _mm_unpacklo_epi8(predload_r, zero_8x16b); // p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits + // Load pred buffer row 3 + predload_r = _mm_loadl_epi64( + (__m128i *) (&pu1_pred[3 * i4_pred_stride])); // p0 p1 p2 p3 p4 p5 p6 p7 0 0 + // 0 0 0 0 0 0 -- all 8 bits + pred_r3 = _mm_unpacklo_epi8(predload_r, zero_8x16b); // p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits + // Load pred buffer row 4 + predload_r = _mm_loadl_epi64( + (__m128i *) (&pu1_pred[4 * i4_pred_stride])); // p0 p1 p2 p3 p4 p5 p6 p7 0 0 + // 0 0 0 0 0 0 -- all 8 bits + pred_r4 = _mm_unpacklo_epi8(predload_r, zero_8x16b); // p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits + // Load pred buffer row 5 + predload_r = + _mm_loadl_epi64((__m128i *) (&pu1_pred[5 * i4_pred_stride])); // p0 p1 p2 p3 p4 p5 p6 p7 0 + // 0 0 0 0 0 0 0 -- all 8 bit + pred_r5 = _mm_unpacklo_epi8(predload_r, zero_8x16b); // p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits + // Load pred buffer row 6 + predload_r = _mm_loadl_epi64( + (__m128i *) (&pu1_pred[6 * i4_pred_stride])); // p0 p1 p2 p3 p4 p5 p6 p7 0 0 + // 0 0 0 0 0 0 -- all 8 bits + pred_r6 = _mm_unpacklo_epi8(predload_r, zero_8x16b); // p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits + // Load pred buffer row 7 + predload_r = _mm_loadl_epi64( + (__m128i *) (&pu1_pred[7 * i4_pred_stride])); // p0 p1 p2 p3 p4 p5 p6 p7 0 0 + // 0 0 0 0 0 0 -- all 8 bits + pred_r7 = _mm_unpacklo_epi8(predload_r, zero_8x16b); // p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits + + temp1 = _mm_add_epi16(value_add, pred_r0); + + temp2 = _mm_add_epi16(value_add, pred_r1); + + temp3 = _mm_add_epi16(value_add, pred_r2); + + temp4 = _mm_add_epi16(value_add, pred_r3); + + temp5 = _mm_add_epi16(value_add, pred_r4); + + temp6 = _mm_add_epi16(value_add, pred_r5); + + temp7 = _mm_add_epi16(value_add, pred_r6); + + temp8 = _mm_add_epi16(value_add, pred_r7); + /*------------------------------------------------------------------*/ + // Clipping the results to 8 bits + sign_reg = _mm_cmpgt_epi16(temp1, zero_8x16b); // sign check + temp1 = _mm_and_si128(temp1, sign_reg); + sign_reg = _mm_cmpgt_epi16(temp2, zero_8x16b); // sign check + temp2 = _mm_and_si128(temp2, sign_reg); + sign_reg = _mm_cmpgt_epi16(temp3, zero_8x16b); // sign check + temp3 = _mm_and_si128(temp3, sign_reg); + sign_reg = _mm_cmpgt_epi16(temp4, zero_8x16b); // sign check + temp4 = _mm_and_si128(temp4, sign_reg); + sign_reg = _mm_cmpgt_epi16(temp5, zero_8x16b); // sign check + temp5 = _mm_and_si128(temp5, sign_reg); + sign_reg = _mm_cmpgt_epi16(temp6, zero_8x16b); // sign check + temp6 = _mm_and_si128(temp6, sign_reg); + sign_reg = _mm_cmpgt_epi16(temp7, zero_8x16b); // sign check + temp7 = _mm_and_si128(temp7, sign_reg); + sign_reg = _mm_cmpgt_epi16(temp8, zero_8x16b); // sign check + temp8 = _mm_and_si128(temp8, sign_reg); + + temp1 = _mm_packus_epi16(temp1, zero_8x16b); + temp2 = _mm_packus_epi16(temp2, zero_8x16b); + temp3 = _mm_packus_epi16(temp3, zero_8x16b); + temp4 = _mm_packus_epi16(temp4, zero_8x16b); + temp5 = _mm_packus_epi16(temp5, zero_8x16b); + temp6 = _mm_packus_epi16(temp6, zero_8x16b); + temp7 = _mm_packus_epi16(temp7, zero_8x16b); + temp8 = _mm_packus_epi16(temp8, zero_8x16b); + + _mm_storel_epi64((__m128i *) (&pu1_out[0]), temp1); + _mm_storel_epi64((__m128i *) (&pu1_out[i4_out_stride]), temp2); + _mm_storel_epi64((__m128i *) (&pu1_out[2 * i4_out_stride]), temp3); + _mm_storel_epi64((__m128i *) (&pu1_out[3 * i4_out_stride]), temp4); + _mm_storel_epi64((__m128i *) (&pu1_out[4 * i4_out_stride]), temp5); + _mm_storel_epi64((__m128i *) (&pu1_out[5 * i4_out_stride]), temp6); + _mm_storel_epi64((__m128i *) (&pu1_out[6 * i4_out_stride]), temp7); + _mm_storel_epi64((__m128i *) (&pu1_out[7 * i4_out_stride]), temp8); +} + +/* + ******************************************************************************** + * + * @brief This function reconstructs a 4x4 sub block from quantized chroma + *resiude and prediction buffer + * + * @par Description: + * The quantized residue is first inverse quantized, then inverse transformed. + * This inverse transformed content is added to the prediction buffer to recon- + * struct the end output + * + * @param[in] pi2_src + * quantized 4x4 block + * + * @param[in] pu1_pred + * prediction 4x4 block + * + * @param[out] pu1_out + * reconstructed 4x4 block + * + * @param[in] src_strd + * quantization buffer stride + * + * @param[in] i4_pred_stride, + * Prediction buffer stride + * + * @param[in] i4_out_stride + * recon buffer Stride + * + * @param[in] pu2_scaling_list + * pointer to scaling list + * + * @param[in] pu2_norm_adjust + * pointer to inverse scale matrix + * + * @param[in] u4_qp_div_6 + * Floor (qp/6) + * + * @param[in] pi4_tmp + * temporary buffer of size 1*16 + * + * @returns none + * + * @remarks none + * + ******************************************************************************* + */ +void isvc_iquant_itrans_recon_chroma_4x4_dc_ssse3( + buffer_container_t *ps_src, buffer_container_t *ps_pred, buffer_container_t *ps_res_pred, + buffer_container_t *ps_res, buffer_container_t *ps_rec, + iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants, WORD16 *pi2_tmp, WORD16 *pi2_dc_src, + WORD32 i4_iq_start_idx, UWORD8 u1_res_accumulate) +{ + WORD16 *pi2_src = ps_src->pv_data; + WORD16 *pi2_res = ps_res->pv_data; + WORD16 *pi2_res_pred = ps_res_pred->pv_data; + UWORD8 *pu1_pred = ps_pred->pv_data; + UWORD8 *pu1_out = ps_rec->pv_data; + WORD32 i4_src_stride = ps_src->i4_data_stride; + WORD32 i4_res_stride = ps_res->i4_data_stride; + WORD32 i4_res_pred_stride = ps_res_pred->i4_data_stride; + WORD32 i4_pred_stride = ps_pred->i4_data_stride; + WORD32 i4_out_stride = ps_rec->i4_data_stride; + const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat; + const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat; + UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6; + WORD16 q0 = pi2_dc_src[0]; // DC value won't be dequantized for chroma + // inverse transform + WORD16 i_macro = ((q0 + 32) >> 6); + + __m128i pred_r0, pred_r1, pred_r2, pred_r3, sign_reg; + __m128i zero_8x16b = _mm_setzero_si128(); // all bits reset to zero + __m128i chroma_mask = _mm_set1_epi16(0xFF); + __m128i value_add = _mm_set1_epi16(i_macro); + __m128i out_r0, out_r1, out_r2, out_r3; + + UNUSED(pi2_src); + UNUSED(pu2_iscal_mat); + UNUSED(pu2_weigh_mat); + UNUSED(u4_qp_div_6); + UNUSED(pi2_tmp); + UNUSED(u1_res_accumulate); + UNUSED(i4_src_stride); + UNUSED(i4_res_stride); + UNUSED(i4_res_pred_stride); + UNUSED(pi2_res); + UNUSED(pi2_res_pred); + UNUSED(i4_iq_start_idx); + + /* Implement residue accumulation */ + ASSERT(0); + + // Load pred buffer + pred_r0 = _mm_loadl_epi64((__m128i *) (&pu1_pred[0])); // p00 p01 p02 p03 0 0 0 0 0 + // 0 0 0 -- all 8 bits + pred_r1 = _mm_loadl_epi64((__m128i *) (&pu1_pred[i4_pred_stride])); // p10 p11 p12 p13 0 0 0 0 + // 0 0 0 0 -- all 8 bits + pred_r2 = + _mm_loadl_epi64((__m128i *) (&pu1_pred[2 * i4_pred_stride])); // p20 p21 p22 p23 0 0 0 0 + // 0 0 0 0 -- all 8 bits + pred_r3 = + _mm_loadl_epi64((__m128i *) (&pu1_pred[3 * i4_pred_stride])); // p30 p31 p32 p33 0 0 0 0 + // 0 0 0 0 -- all 8 bits + + pred_r0 = _mm_and_si128(pred_r0, chroma_mask); + pred_r1 = _mm_and_si128(pred_r1, chroma_mask); + pred_r2 = _mm_and_si128(pred_r2, chroma_mask); + pred_r3 = _mm_and_si128(pred_r3, chroma_mask); + + pred_r0 = _mm_unpacklo_epi64(pred_r0, pred_r1); // p00 p01 p02 p03 p10 p11 p12 p13 + pred_r2 = _mm_unpacklo_epi64(pred_r2, pred_r3); // p20 p21 p22p p23 p30 p31 p32 p33 + + pred_r0 = _mm_add_epi16(value_add, pred_r0); + pred_r2 = _mm_add_epi16(value_add, pred_r2); + + /*------------------------------------------------------------------*/ + // Clipping the results to 8 bits + sign_reg = _mm_cmpgt_epi16(pred_r0, zero_8x16b); // sign check + pred_r0 = _mm_and_si128(pred_r0, sign_reg); + sign_reg = _mm_cmpgt_epi16(pred_r2, zero_8x16b); + pred_r2 = _mm_and_si128(pred_r2, sign_reg); + + pred_r0 = _mm_packus_epi16(pred_r0, pred_r2); + pred_r1 = _mm_srli_si128(pred_r0, 4); + pred_r2 = _mm_srli_si128(pred_r1, 4); + pred_r3 = _mm_srli_si128(pred_r2, 4); + + pred_r0 = _mm_unpacklo_epi8(pred_r0, zero_8x16b); // p00 p01 p02 p03 -- all 16 bits + pred_r1 = _mm_unpacklo_epi8(pred_r1, zero_8x16b); // p10 p11 p12 p13 -- all 16 bits + pred_r2 = _mm_unpacklo_epi8(pred_r2, zero_8x16b); // p20 p21 p22 p23 -- all 16 bits + pred_r3 = _mm_unpacklo_epi8(pred_r3, zero_8x16b); // p30 p31 p32 p33 -- all 16 bits + + chroma_mask = _mm_set1_epi16(0xFF00); + out_r0 = _mm_loadl_epi64((__m128i *) (&pu1_out[0])); + out_r1 = _mm_loadl_epi64((__m128i *) (&pu1_out[i4_out_stride])); + out_r2 = _mm_loadl_epi64((__m128i *) (&pu1_out[2 * i4_out_stride])); + out_r3 = _mm_loadl_epi64((__m128i *) (&pu1_out[3 * i4_out_stride])); + + out_r0 = _mm_and_si128(out_r0, chroma_mask); + out_r1 = _mm_and_si128(out_r1, chroma_mask); + out_r2 = _mm_and_si128(out_r2, chroma_mask); + out_r3 = _mm_and_si128(out_r3, chroma_mask); + + out_r0 = _mm_add_epi8(out_r0, pred_r0); + out_r1 = _mm_add_epi8(out_r1, pred_r1); + out_r2 = _mm_add_epi8(out_r2, pred_r2); + out_r3 = _mm_add_epi8(out_r3, pred_r3); + + _mm_storel_epi64((__m128i *) (&pu1_out[0]), out_r0); + _mm_storel_epi64((__m128i *) (&pu1_out[i4_out_stride]), out_r1); + _mm_storel_epi64((__m128i *) (&pu1_out[2 * i4_out_stride]), out_r2); + _mm_storel_epi64((__m128i *) (&pu1_out[3 * i4_out_stride]), out_r3); +} diff --git a/common/x86/svc/isvc_iquant_itrans_recon_sse42.c b/common/x86/svc/isvc_iquant_itrans_recon_sse42.c new file mode 100644 index 0000000..829952b --- /dev/null +++ b/common/x86/svc/isvc_iquant_itrans_recon_sse42.c @@ -0,0 +1,2849 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ +/** + ******************************************************************************* + * @file + * isvc_iquant_itrans_recon_sse42.c + * + * @brief + * Contains function definitions for inverse quantization, inverse + * transform and reconstruction + * + * @author + * Mohit [100664] + * + * @par List of Functions: + * - isvc_iquant_itrans_recon_4x4_sse42() + * - isvc_iquant_itrans_recon_chroma_4x4_sse42() + * + * @remarks + * None + * + ******************************************************************************* + */ +#include + +#include "ih264_typedefs.h" +#include "ih264_debug.h" +#include "ih264_defs.h" +#include "ih264_trans_macros.h" +#include "ih264_macros.h" +#include "ih264_platform_macros.h" +#include "ih264_trans_data.h" +#include "ih264_size_defs.h" +#include "isvc_structs.h" +#include "isvc_trans_quant_itrans_iquant.h" + +/* + ******************************************************************************** + * + * @brief This function reconstructs a 4x4 sub block from quantized resiude and + * prediction buffer + * + * @par Description: + * The quantized residue is first inverse quantized, then inverse transformed. + * This inverse transformed content is added to the prediction buffer to recon- + * struct the end output + * + * @param[in] pi2_src + * quantized 4x4 block + * + * @param[in] pu1_pred + * prediction 4x4 block + * + * @param[out] pu1_out + * reconstructed 4x4 block + * + * @param[in] src_strd + * quantization buffer stride + * + * @param[in] i4_pred_stride, + * Prediction buffer stride + * + * @param[in] i4_out_stride + * recon buffer Stride + * + * @param[in] pu2_scaling_list + * pointer to scaling list + * + * @param[in] pu2_norm_adjust + * pointer to inverse scale matrix + * + * @param[in] u4_qp_div_6 + * Floor (qp/6) + * + * @param[in] pi4_tmp + * temporary buffer of size 1*16 + * + * @returns none + * + * @remarks none + * + ******************************************************************************* + */ + +void isvc_iquant_itrans_recon_4x4_sse42(buffer_container_t *ps_src, buffer_container_t *ps_pred, + buffer_container_t *ps_res_pred, buffer_container_t *ps_res, + buffer_container_t *ps_rec, + iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants, + WORD16 *pi2_tmp, WORD16 *pi2_dc_src, WORD32 i4_iq_start_idx, + UWORD8 u1_res_accumulate) +{ + WORD16 *pi2_src = (WORD16 *) ps_src->pv_data; + WORD16 *pi2_tmp_ptr = pi2_tmp; + UWORD8 *pu1_pred = (UWORD8 *) ps_pred->pv_data; + UWORD8 *pu1_out = (UWORD8 *) ps_rec->pv_data; + WORD32 i4_src_stride = ps_src->i4_data_stride; + WORD32 i4_pred_stride = ps_pred->i4_data_stride; + WORD32 i4_out_stride = ps_rec->i4_data_stride; + const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat; + const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat; + UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6; + UWORD32 *pu4_out = (UWORD32 *) pu1_out; + __m128i src_r0_r1, src_r2_r3; + __m128i src_r0, src_r1, src_r2, src_r3; + __m128i scalemat_r0_r1, scalemat_r2_r3; + __m128i pred_r0, pred_r1, pred_r2, pred_r3; + __m128i sign_reg, dequant_r0_r1, dequant_r2_r3; + /* all bits reset to zero */ + __m128i zero_8x16b = _mm_setzero_si128(); + __m128i neg_255_8x16b = _mm_set1_epi16(-((WORD16) UINT8_MAX)); + __m128i pos_255_8x16b = _mm_set1_epi16(((WORD16) UINT8_MAX)); + __m128i temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7; + __m128i resq_r0, resq_r1, resq_r2, resq_r3; + __m128i add_rshift = _mm_set1_epi32((u4_qp_div_6 < 4) ? (1 << (3 - u4_qp_div_6)) : 0); + __m128i value_32 = _mm_set1_epi32(32); + + ASSERT(4 == i4_src_stride); + ASSERT(0 == u1_res_accumulate); + + UNUSED(i4_src_stride); + UNUSED(ps_res); + UNUSED(ps_res_pred); + UNUSED(u1_res_accumulate); + + /*************************************************************/ + /* Dequantization of coefficients. Will be replaced by SIMD */ + /* operations on platform */ + /*************************************************************/ + + /* a00 a01 a02 a03 a10 a11 a12 a13 -- the source + matrix 0th,1st row */ + src_r0_r1 = _mm_loadu_si128((__m128i *) (pi2_src)); + + /* a20 a21 a22 a23 a30 a31 a32 a33 -- the + source matrix 2nd,3rd row */ + src_r2_r3 = _mm_loadu_si128((__m128i *) (pi2_src + 8)); + + /* b00 b01 b02 b03 b10 b11 b12 b13 -- the + scaling matrix 0th,1st row */ + scalemat_r0_r1 = _mm_loadu_si128((__m128i *) (pu2_iscal_mat)); + + /* b20 b21 b22 b23 b30 b31 b32 b33 --b12 b13 -- the + the scaling matrix 2nd,3rd row */ + scalemat_r2_r3 = _mm_loadu_si128((__m128i *) (pu2_iscal_mat + 8)); + + /* q00 q01 q02 q03 q10 q11 + q12 q13 -- all 16 bits */ + dequant_r0_r1 = _mm_loadu_si128((__m128i *) (pu2_weigh_mat)); + + /* q20 q21 q22 q23 q30 q31 + q32 q33 -- all 16 bits */ + dequant_r2_r3 = _mm_loadu_si128((__m128i *) (pu2_weigh_mat + 8)); + + /* b00*q00 b01*q01 b02*q02 b03*q03 b10*q10 b11*q11 + b12*q12 b13*q13 -- 16 bit result */ + temp0 = _mm_mullo_epi16(scalemat_r0_r1, dequant_r0_r1); + + /* b20*q20 b21*q21 b22*q22 b23*q23 b30*q30 b31*q31 + b32*q32 b33*q33 -- 16 bit result */ + temp1 = _mm_mullo_epi16(scalemat_r2_r3, dequant_r2_r3); + + /* b00*q00 0 b01*q01 0 b02*q02 0 b03*q03 0 -- 16 bit long */ + temp4 = _mm_unpacklo_epi16(temp0, zero_8x16b); + + /* b10*q10 0 b11*q11 0 b12*q12 0 b13*q13 0 -- 16 bit long */ + temp5 = _mm_unpackhi_epi16(temp0, zero_8x16b); + + /* b00*q00 0 b01*q01 0 b02*q02 0 b03*q03 0 -- 16 bit long */ + temp6 = _mm_unpacklo_epi16(temp1, zero_8x16b); + + /* b10*q10 0 b11*q11 0 b12*q12 0 b13*q13 0 -- 16 bit long */ + temp7 = _mm_unpackhi_epi16(temp1, zero_8x16b); + + /* a00 0 a01 0 a02 0 a03 0 -- 16 bit long */ + src_r0 = _mm_unpacklo_epi16(src_r0_r1, zero_8x16b); + /* a10 0 a11 0 a12 0 a13 0 -- 16 bit long */ + src_r1 = _mm_unpackhi_epi16(src_r0_r1, zero_8x16b); + /* a20 0 a21 0 a22 0 a23 0 -- 16 bit long */ + src_r2 = _mm_unpacklo_epi16(src_r2_r3, zero_8x16b); + /* a30 0 a31 0 a32 0 a33 0 -- 16 bit long */ + src_r3 = _mm_unpackhi_epi16(src_r2_r3, zero_8x16b); + + temp4 = _mm_madd_epi16(src_r0, temp4); + temp5 = _mm_madd_epi16(src_r1, temp5); + temp6 = _mm_madd_epi16(src_r2, temp6); + temp7 = _mm_madd_epi16(src_r3, temp7); + + if(u4_qp_div_6 >= 4) + { + resq_r0 = _mm_slli_epi32(temp4, u4_qp_div_6 - 4); + resq_r1 = _mm_slli_epi32(temp5, u4_qp_div_6 - 4); + resq_r2 = _mm_slli_epi32(temp6, u4_qp_div_6 - 4); + resq_r3 = _mm_slli_epi32(temp7, u4_qp_div_6 - 4); + } + else + { + temp4 = _mm_add_epi32(temp4, add_rshift); + temp5 = _mm_add_epi32(temp5, add_rshift); + temp6 = _mm_add_epi32(temp6, add_rshift); + temp7 = _mm_add_epi32(temp7, add_rshift); + resq_r0 = _mm_srai_epi32(temp4, 4 - u4_qp_div_6); + resq_r1 = _mm_srai_epi32(temp5, 4 - u4_qp_div_6); + resq_r2 = _mm_srai_epi32(temp6, 4 - u4_qp_div_6); + resq_r3 = _mm_srai_epi32(temp7, 4 - u4_qp_div_6); + } + + if(i4_iq_start_idx == 1) resq_r0 = _mm_insert_epi32(resq_r0, (WORD32) pi2_dc_src[0], 0); + /* Perform Inverse transform */ + /*-------------------------------------------------------------*/ + /* IDCT [ Horizontal transformation ] */ + /*-------------------------------------------------------------*/ + // Matrix transpose + /* + * a0 a1 a2 a3 + * b0 b1 b2 b3 + * c0 c1 c2 c3 + * d0 d1 d2 d3 + */ + + /* a0 b0 a1 b1 */ + temp1 = _mm_unpacklo_epi32(resq_r0, resq_r1); + /* c0 d0 c1 d1 */ + temp3 = _mm_unpacklo_epi32(resq_r2, resq_r3); + /* a2 b2 a3 b3 */ + temp2 = _mm_unpackhi_epi32(resq_r0, resq_r1); + /* c2 d2 c3 d3 */ + temp4 = _mm_unpackhi_epi32(resq_r2, resq_r3); + /* a0 b0 c0 d0 */ + resq_r0 = _mm_unpacklo_epi64(temp1, temp3); + /* a1 b1 c1 d1 */ + resq_r1 = _mm_unpackhi_epi64(temp1, temp3); + /* a2 b2 c2 d2 */ + resq_r2 = _mm_unpacklo_epi64(temp2, temp4); + /* a3 b3 c3 d3 */ + resq_r3 = _mm_unpackhi_epi64(temp2, temp4); + /* Transform starts -- horizontal transform */ + /*------------------------------------------------------------------*/ + /* z0 = w0 + w2 */ + temp0 = _mm_add_epi32(resq_r0, resq_r2); + /* z1 = w0 - w2 */ + temp1 = _mm_sub_epi32(resq_r0, resq_r2); + /* z2 = (w1 >> 1) - w3 */ + temp2 = _mm_srai_epi32(resq_r1, 1); + temp2 = _mm_sub_epi32(temp2, resq_r3); + /* z3 = w1 + (w3 >> 1) */ + temp3 = _mm_srai_epi32(resq_r3, 1); + temp3 = _mm_add_epi32(temp3, resq_r1); + /*----------------------------------------------------------*/ + /* x0 = z0 + z3 */ + resq_r0 = _mm_add_epi32(temp0, temp3); + /* x1 = z1 + z2 */ + resq_r1 = _mm_add_epi32(temp1, temp2); + /* x2 = z1 - z2 */ + resq_r2 = _mm_sub_epi32(temp1, temp2); + /* x3 = z0 - z3 */ + resq_r3 = _mm_sub_epi32(temp0, temp3); + + // Matrix transpose + /* + * a0 b0 c0 d0 + * a1 b1 c1 d1 + * a2 b2 c2 d2 + * a3 b3 c3 d3 + */ + + /* a0 a1 b0 b1 */ + temp1 = _mm_unpacklo_epi32(resq_r0, resq_r1); + /* a2 a3 b2 b3 */ + temp3 = _mm_unpacklo_epi32(resq_r2, resq_r3); + /* c0 c1 d0 d1 */ + temp2 = _mm_unpackhi_epi32(resq_r0, resq_r1); + /* c2 c3 d2 d3 */ + temp4 = _mm_unpackhi_epi32(resq_r2, resq_r3); + /* a0 a1 a2 a3 */ + resq_r0 = _mm_unpacklo_epi64(temp1, temp3); + /* b0 b1 b2 b3 */ + resq_r1 = _mm_unpackhi_epi64(temp1, temp3); + /* c0 c1 c2 c3 */ + resq_r2 = _mm_unpacklo_epi64(temp2, temp4); + /* d0 d1 d2 d3 */ + resq_r3 = _mm_unpackhi_epi64(temp2, temp4); + /* Transform ends -- horizontal transform */ + + temp0 = _mm_packs_epi32(resq_r0, resq_r1); + temp1 = _mm_packs_epi32(resq_r2, resq_r3); + + _mm_storeu_si128((__m128i *) (&pi2_tmp_ptr[0]), temp0); + _mm_storeu_si128((__m128i *) (&pi2_tmp_ptr[2 * 4]), temp1); + + /* Load pred buffer */ + pred_r0 = _mm_loadl_epi64((__m128i *) (&pu1_pred[0])); + pred_r1 = _mm_loadl_epi64((__m128i *) (&pu1_pred[i4_pred_stride])); + pred_r2 = _mm_loadl_epi64((__m128i *) (&pu1_pred[2 * i4_pred_stride])); + pred_r3 = _mm_loadl_epi64((__m128i *) (&pu1_pred[3 * i4_pred_stride])); + + pred_r0 = _mm_cvtepu8_epi16(pred_r0); + pred_r1 = _mm_cvtepu8_epi16(pred_r1); + pred_r2 = _mm_cvtepu8_epi16(pred_r2); + pred_r3 = _mm_cvtepu8_epi16(pred_r3); + + pred_r0 = _mm_unpacklo_epi64(pred_r0, pred_r1); + pred_r1 = _mm_unpacklo_epi64(pred_r2, pred_r3); + + /*--------------------------------------------------------------*/ + /* IDCT [ Vertical transformation] and Xij = (xij + 32)>>6 */ + /* */ + /* Add the prediction and store it back to same buffer */ + /*--------------------------------------------------------------*/ + /* z0j = y0j + y2j */ + temp0 = _mm_add_epi32(resq_r0, resq_r2); + /* z1j = y0j - y2j */ + temp1 = _mm_sub_epi32(resq_r0, resq_r2); + /* z2j = (y1j>>1) - y3j */ + temp2 = _mm_srai_epi32(resq_r1, 1); + temp2 = _mm_sub_epi32(temp2, resq_r3); + /* z3j = y1j + (y3j>>1) */ + temp3 = _mm_srai_epi32(resq_r3, 1); + temp3 = _mm_add_epi32(temp3, resq_r1); + + /* x0j = z0j + z3j */ + temp4 = _mm_add_epi32(temp0, temp3); + temp4 = _mm_add_epi32(temp4, value_32); + temp4 = _mm_srai_epi32(temp4, 6); + /* x1j = z1j + z2j */ + temp5 = _mm_add_epi32(temp1, temp2); + temp5 = _mm_add_epi32(temp5, value_32); + temp5 = _mm_srai_epi32(temp5, 6); + /* x2j = z1j - z2j */ + temp6 = _mm_sub_epi32(temp1, temp2); + temp6 = _mm_add_epi32(temp6, value_32); + temp6 = _mm_srai_epi32(temp6, 6); + /* x3j = z0j - z3j */ + temp7 = _mm_sub_epi32(temp0, temp3); + temp7 = _mm_add_epi32(temp7, value_32); + temp7 = _mm_srai_epi32(temp7, 6); + + /* 32-bit to 16-bit conversion */ + temp0 = _mm_packs_epi32(temp4, temp5); + temp1 = _mm_packs_epi32(temp6, temp7); + + /* Saturate all values < -255 to -255 and retain the rest as it is */ + temp4 = _mm_max_epi16(temp0, neg_255_8x16b); + /* Saturate all values > 255 to 255 and retain the rest as it is */ + temp4 = _mm_min_epi16(temp4, pos_255_8x16b); + + /* Saturate all values < -255 to -255 and retain the rest as it is */ + temp5 = _mm_max_epi16(temp1, neg_255_8x16b); + /* Saturate all values > 255 to 255 and retain the rest as it is */ + temp5 = _mm_min_epi16(temp5, pos_255_8x16b); + + temp0 = _mm_add_epi16(temp4, pred_r0); + temp1 = _mm_add_epi16(temp5, pred_r1); + + /*------------------------------------------------------------------*/ + /* Clipping the results to 8 bits */ + sign_reg = _mm_cmpgt_epi16(temp0, zero_8x16b); + temp0 = _mm_and_si128(temp0, sign_reg); + sign_reg = _mm_cmpgt_epi16(temp1, zero_8x16b); + temp1 = _mm_and_si128(temp1, sign_reg); + + resq_r0 = _mm_packus_epi16(temp0, temp1); + resq_r1 = _mm_srli_si128(resq_r0, 4); + resq_r2 = _mm_srli_si128(resq_r1, 4); + resq_r3 = _mm_srli_si128(resq_r2, 4); + + *pu4_out = _mm_cvtsi128_si32(resq_r0); + pu1_out += i4_out_stride; + pu4_out = (UWORD32 *) (pu1_out); + *(pu4_out) = _mm_cvtsi128_si32(resq_r1); + pu1_out += i4_out_stride; + pu4_out = (UWORD32 *) (pu1_out); + *(pu4_out) = _mm_cvtsi128_si32(resq_r2); + pu1_out += i4_out_stride; + pu4_out = (UWORD32 *) (pu1_out); + *(pu4_out) = _mm_cvtsi128_si32(resq_r3); +} + +void isvc_iquant_itrans_recon_res_4x4_sse42(buffer_container_t *ps_src, buffer_container_t *ps_pred, + buffer_container_t *ps_res_pred, + buffer_container_t *ps_res, buffer_container_t *ps_rec, + iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants, + WORD16 *pi2_tmp, WORD16 *pi2_dc_src, + WORD32 i4_iq_start_idx, UWORD8 u1_res_accumulate) +{ + WORD16 *pi2_src = (WORD16 *) ps_src->pv_data; + WORD16 *pi2_tmp_ptr = pi2_tmp; + WORD16 *pi2_res = (WORD16 *) ps_res->pv_data; + UWORD8 *pu1_pred = (UWORD8 *) ps_pred->pv_data; + UWORD8 *pu1_out = (UWORD8 *) ps_rec->pv_data; + WORD32 i4_src_stride = ps_src->i4_data_stride; + WORD32 i4_res_stride = ps_res->i4_data_stride; + WORD32 i4_pred_stride = ps_pred->i4_data_stride; + WORD32 i4_out_stride = ps_rec->i4_data_stride; + const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat; + const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat; + UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6; + UWORD32 *pu4_out = (UWORD32 *) pu1_out; + __m128i src_r0_r1, src_r2_r3; + __m128i src_r0, src_r1, src_r2, src_r3; + __m128i scalemat_r0_r1, scalemat_r2_r3; + __m128i pred_r0, pred_r1, pred_r2, pred_r3; + __m128i sign_reg, dequant_r0_r1, dequant_r2_r3; + /* all bits reset to zero */ + __m128i zero_8x16b = _mm_setzero_si128(); + __m128i neg_255_8x16b = _mm_set1_epi16(-((WORD16) UINT8_MAX)); + __m128i pos_255_8x16b = _mm_set1_epi16(((WORD16) UINT8_MAX)); + __m128i temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7; + __m128i resq_r0, resq_r1, resq_r2, resq_r3; + __m128i add_rshift = _mm_set1_epi32((u4_qp_div_6 < 4) ? (1 << (3 - u4_qp_div_6)) : 0); + __m128i value_32 = _mm_set1_epi32(32); + + ASSERT(4 == i4_src_stride); + ASSERT(0 == u1_res_accumulate); + + UNUSED(i4_src_stride); + UNUSED(ps_res_pred); + UNUSED(u1_res_accumulate); + + /*************************************************************/ + /* Dequantization of coefficients. Will be replaced by SIMD */ + /* operations on platform */ + /*************************************************************/ + + /* a00 a01 a02 a03 a10 a11 a12 a13 -- the source + matrix 0th,1st row */ + src_r0_r1 = _mm_loadu_si128((__m128i *) (pi2_src)); + + /* a20 a21 a22 a23 a30 a31 a32 a33 -- the + source matrix 2nd,3rd row */ + src_r2_r3 = _mm_loadu_si128((__m128i *) (pi2_src + 8)); + + /* b00 b01 b02 b03 b10 b11 b12 b13 -- the + scaling matrix 0th,1st row */ + scalemat_r0_r1 = _mm_loadu_si128((__m128i *) (pu2_iscal_mat)); + + /* b20 b21 b22 b23 b30 b31 b32 b33 --b12 b13 -- the + the scaling matrix 2nd,3rd row */ + scalemat_r2_r3 = _mm_loadu_si128((__m128i *) (pu2_iscal_mat + 8)); + + /* q00 q01 q02 q03 q10 q11 + q12 q13 -- all 16 bits */ + dequant_r0_r1 = _mm_loadu_si128((__m128i *) (pu2_weigh_mat)); + + /* q20 q21 q22 q23 q30 q31 + q32 q33 -- all 16 bits */ + dequant_r2_r3 = _mm_loadu_si128((__m128i *) (pu2_weigh_mat + 8)); + + /* b00*q00 b01*q01 b02*q02 b03*q03 b10*q10 b11*q11 + b12*q12 b13*q13 -- 16 bit result */ + temp0 = _mm_mullo_epi16(scalemat_r0_r1, dequant_r0_r1); + + /* b20*q20 b21*q21 b22*q22 b23*q23 b30*q30 b31*q31 + b32*q32 b33*q33 -- 16 bit result */ + temp1 = _mm_mullo_epi16(scalemat_r2_r3, dequant_r2_r3); + + /* b00*q00 0 b01*q01 0 b02*q02 0 b03*q03 0 -- 16 bit long */ + temp4 = _mm_unpacklo_epi16(temp0, zero_8x16b); + + /* b10*q10 0 b11*q11 0 b12*q12 0 b13*q13 0 -- 16 bit long */ + temp5 = _mm_unpackhi_epi16(temp0, zero_8x16b); + + /* b00*q00 0 b01*q01 0 b02*q02 0 b03*q03 0 -- 16 bit long */ + temp6 = _mm_unpacklo_epi16(temp1, zero_8x16b); + + /* b10*q10 0 b11*q11 0 b12*q12 0 b13*q13 0 -- 16 bit long */ + temp7 = _mm_unpackhi_epi16(temp1, zero_8x16b); + + /* a00 0 a01 0 a02 0 a03 0 -- 16 bit long */ + src_r0 = _mm_unpacklo_epi16(src_r0_r1, zero_8x16b); + /* a10 0 a11 0 a12 0 a13 0 -- 16 bit long */ + src_r1 = _mm_unpackhi_epi16(src_r0_r1, zero_8x16b); + /* a20 0 a21 0 a22 0 a23 0 -- 16 bit long */ + src_r2 = _mm_unpacklo_epi16(src_r2_r3, zero_8x16b); + /* a30 0 a31 0 a32 0 a33 0 -- 16 bit long */ + src_r3 = _mm_unpackhi_epi16(src_r2_r3, zero_8x16b); + + temp4 = _mm_madd_epi16(src_r0, temp4); + temp5 = _mm_madd_epi16(src_r1, temp5); + temp6 = _mm_madd_epi16(src_r2, temp6); + temp7 = _mm_madd_epi16(src_r3, temp7); + + if(u4_qp_div_6 >= 4) + { + resq_r0 = _mm_slli_epi32(temp4, u4_qp_div_6 - 4); + resq_r1 = _mm_slli_epi32(temp5, u4_qp_div_6 - 4); + resq_r2 = _mm_slli_epi32(temp6, u4_qp_div_6 - 4); + resq_r3 = _mm_slli_epi32(temp7, u4_qp_div_6 - 4); + } + else + { + temp4 = _mm_add_epi32(temp4, add_rshift); + temp5 = _mm_add_epi32(temp5, add_rshift); + temp6 = _mm_add_epi32(temp6, add_rshift); + temp7 = _mm_add_epi32(temp7, add_rshift); + resq_r0 = _mm_srai_epi32(temp4, 4 - u4_qp_div_6); + resq_r1 = _mm_srai_epi32(temp5, 4 - u4_qp_div_6); + resq_r2 = _mm_srai_epi32(temp6, 4 - u4_qp_div_6); + resq_r3 = _mm_srai_epi32(temp7, 4 - u4_qp_div_6); + } + + if(i4_iq_start_idx == 1) resq_r0 = _mm_insert_epi32(resq_r0, (WORD32) pi2_dc_src[0], 0); + /* Perform Inverse transform */ + /*-------------------------------------------------------------*/ + /* IDCT [ Horizontal transformation ] */ + /*-------------------------------------------------------------*/ + // Matrix transpose + /* + * a0 a1 a2 a3 + * b0 b1 b2 b3 + * c0 c1 c2 c3 + * d0 d1 d2 d3 + */ + + /* a0 b0 a1 b1 */ + temp1 = _mm_unpacklo_epi32(resq_r0, resq_r1); + /* c0 d0 c1 d1 */ + temp3 = _mm_unpacklo_epi32(resq_r2, resq_r3); + /* a2 b2 a3 b3 */ + temp2 = _mm_unpackhi_epi32(resq_r0, resq_r1); + /* c2 d2 c3 d3 */ + temp4 = _mm_unpackhi_epi32(resq_r2, resq_r3); + /* a0 b0 c0 d0 */ + resq_r0 = _mm_unpacklo_epi64(temp1, temp3); + /* a1 b1 c1 d1 */ + resq_r1 = _mm_unpackhi_epi64(temp1, temp3); + /* a2 b2 c2 d2 */ + resq_r2 = _mm_unpacklo_epi64(temp2, temp4); + /* a3 b3 c3 d3 */ + resq_r3 = _mm_unpackhi_epi64(temp2, temp4); + /* Transform starts -- horizontal transform */ + /*------------------------------------------------------------------*/ + /* z0 = w0 + w2 */ + temp0 = _mm_add_epi32(resq_r0, resq_r2); + /* z1 = w0 - w2 */ + temp1 = _mm_sub_epi32(resq_r0, resq_r2); + /* z2 = (w1 >> 1) - w3 */ + temp2 = _mm_srai_epi32(resq_r1, 1); + temp2 = _mm_sub_epi32(temp2, resq_r3); + /* z3 = w1 + (w3 >> 1) */ + temp3 = _mm_srai_epi32(resq_r3, 1); + temp3 = _mm_add_epi32(temp3, resq_r1); + /*----------------------------------------------------------*/ + /* x0 = z0 + z3 */ + resq_r0 = _mm_add_epi32(temp0, temp3); + /* x1 = z1 + z2 */ + resq_r1 = _mm_add_epi32(temp1, temp2); + /* x2 = z1 - z2 */ + resq_r2 = _mm_sub_epi32(temp1, temp2); + /* x3 = z0 - z3 */ + resq_r3 = _mm_sub_epi32(temp0, temp3); + + // Matrix transpose + /* + * a0 b0 c0 d0 + * a1 b1 c1 d1 + * a2 b2 c2 d2 + * a3 b3 c3 d3 + */ + + /* a0 a1 b0 b1 */ + temp1 = _mm_unpacklo_epi32(resq_r0, resq_r1); + /* a2 a3 b2 b3 */ + temp3 = _mm_unpacklo_epi32(resq_r2, resq_r3); + /* c0 c1 d0 d1 */ + temp2 = _mm_unpackhi_epi32(resq_r0, resq_r1); + /* c2 c3 d2 d3 */ + temp4 = _mm_unpackhi_epi32(resq_r2, resq_r3); + /* a0 a1 a2 a3 */ + resq_r0 = _mm_unpacklo_epi64(temp1, temp3); + /* b0 b1 b2 b3 */ + resq_r1 = _mm_unpackhi_epi64(temp1, temp3); + /* c0 c1 c2 c3 */ + resq_r2 = _mm_unpacklo_epi64(temp2, temp4); + /* d0 d1 d2 d3 */ + resq_r3 = _mm_unpackhi_epi64(temp2, temp4); + /* Transform ends -- horizontal transform */ + + temp0 = _mm_packs_epi32(resq_r0, resq_r1); + temp1 = _mm_packs_epi32(resq_r2, resq_r3); + + _mm_storeu_si128((__m128i *) (&pi2_tmp_ptr[0]), temp0); + _mm_storeu_si128((__m128i *) (&pi2_tmp_ptr[2 * 4]), temp1); + + /* Load pred buffer */ + pred_r0 = _mm_loadl_epi64((__m128i *) (&pu1_pred[0])); + pred_r1 = _mm_loadl_epi64((__m128i *) (&pu1_pred[i4_pred_stride])); + pred_r2 = _mm_loadl_epi64((__m128i *) (&pu1_pred[2 * i4_pred_stride])); + pred_r3 = _mm_loadl_epi64((__m128i *) (&pu1_pred[3 * i4_pred_stride])); + + pred_r0 = _mm_cvtepu8_epi16(pred_r0); + pred_r1 = _mm_cvtepu8_epi16(pred_r1); + pred_r2 = _mm_cvtepu8_epi16(pred_r2); + pred_r3 = _mm_cvtepu8_epi16(pred_r3); + + /*--------------------------------------------------------------*/ + /* IDCT [ Vertical transformation] and Xij = (xij + 32)>>6 */ + /* */ + /* Add the prediction and store it back to same buffer */ + /*--------------------------------------------------------------*/ + /* z0j = y0j + y2j */ + temp0 = _mm_add_epi32(resq_r0, resq_r2); + /* z1j = y0j - y2j */ + temp1 = _mm_sub_epi32(resq_r0, resq_r2); + /* z2j = (y1j>>1) - y3j */ + temp2 = _mm_srai_epi32(resq_r1, 1); + temp2 = _mm_sub_epi32(temp2, resq_r3); + /* z3j = y1j + (y3j>>1) */ + temp3 = _mm_srai_epi32(resq_r3, 1); + temp3 = _mm_add_epi32(temp3, resq_r1); + + /* x0j = z0j + z3j */ + temp4 = _mm_add_epi32(temp0, temp3); + temp4 = _mm_add_epi32(temp4, value_32); + temp4 = _mm_srai_epi32(temp4, 6); + /* x1j = z1j + z2j */ + temp5 = _mm_add_epi32(temp1, temp2); + temp5 = _mm_add_epi32(temp5, value_32); + temp5 = _mm_srai_epi32(temp5, 6); + /* x2j = z1j - z2j */ + temp6 = _mm_sub_epi32(temp1, temp2); + temp6 = _mm_add_epi32(temp6, value_32); + temp6 = _mm_srai_epi32(temp6, 6); + /* x3j = z0j - z3j */ + temp7 = _mm_sub_epi32(temp0, temp3); + temp7 = _mm_add_epi32(temp7, value_32); + temp7 = _mm_srai_epi32(temp7, 6); + + /* 32-bit to 16-bit conversion */ + temp0 = _mm_packs_epi32(temp4, temp5); + temp1 = _mm_packs_epi32(temp6, temp7); + + /* Saturate all values < -255 to -255 and retain the rest as it is */ + temp0 = _mm_max_epi16(temp0, neg_255_8x16b); + /* Saturate all values > 255 to 255 and retain the rest as it is */ + temp0 = _mm_min_epi16(temp0, pos_255_8x16b); + + /* Saturate all values < -255 to -255 and retain the rest as it is */ + temp1 = _mm_max_epi16(temp1, neg_255_8x16b); + /* Saturate all values > 255 to 255 and retain the rest as it is */ + temp1 = _mm_min_epi16(temp1, pos_255_8x16b); + + _mm_storel_epi64((__m128i *) (&pi2_res[0]), temp0); + _mm_storel_epi64((__m128i *) (&pi2_res[2 * i4_res_stride]), temp1); + + temp4 = _mm_add_epi16(temp0, pred_r0); + temp0 = _mm_srli_si128(temp0, 8); + _mm_storel_epi64((__m128i *) (&pi2_res[i4_res_stride]), temp0); + + temp6 = _mm_add_epi16(temp1, pred_r2); + temp1 = _mm_srli_si128(temp1, 8); + _mm_storel_epi64((__m128i *) (&pi2_res[3 * i4_res_stride]), temp1); + + temp5 = _mm_add_epi16(temp0, pred_r1); + temp7 = _mm_add_epi16(temp1, pred_r3); + + temp4 = _mm_cvtepi16_epi32(temp4); + temp5 = _mm_cvtepi16_epi32(temp5); + temp6 = _mm_cvtepi16_epi32(temp6); + temp7 = _mm_cvtepi16_epi32(temp7); + + /* 32-bit to 16-bit conversion */ + temp0 = _mm_packs_epi32(temp4, temp5); + temp1 = _mm_packs_epi32(temp6, temp7); + /*------------------------------------------------------------------*/ + /* Clipping the results to 8 bits */ + sign_reg = _mm_cmpgt_epi16(temp0, zero_8x16b); + temp0 = _mm_and_si128(temp0, sign_reg); + sign_reg = _mm_cmpgt_epi16(temp1, zero_8x16b); + temp1 = _mm_and_si128(temp1, sign_reg); + + resq_r0 = _mm_packus_epi16(temp0, temp1); + resq_r1 = _mm_srli_si128(resq_r0, 4); + resq_r2 = _mm_srli_si128(resq_r1, 4); + resq_r3 = _mm_srli_si128(resq_r2, 4); + + *pu4_out = _mm_cvtsi128_si32(resq_r0); + pu1_out += i4_out_stride; + pu4_out = (UWORD32 *) (pu1_out); + *(pu4_out) = _mm_cvtsi128_si32(resq_r1); + pu1_out += i4_out_stride; + pu4_out = (UWORD32 *) (pu1_out); + *(pu4_out) = _mm_cvtsi128_si32(resq_r2); + pu1_out += i4_out_stride; + pu4_out = (UWORD32 *) (pu1_out); + *(pu4_out) = _mm_cvtsi128_si32(resq_r3); +} + +void isvc_iquant_itrans_recon_res_4x4_with_res_acc_sse42( + buffer_container_t *ps_src, buffer_container_t *ps_pred, buffer_container_t *ps_res_pred, + buffer_container_t *ps_res, buffer_container_t *ps_rec, + iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants, WORD16 *pi2_tmp, WORD16 *pi2_dc_src, + WORD32 i4_iq_start_idx, UWORD8 u1_res_accumulate) +{ + WORD16 *pi2_src = (WORD16 *) ps_src->pv_data; + WORD16 *pi2_tmp_ptr = pi2_tmp; + WORD16 *pi2_res = (WORD16 *) ps_res->pv_data; + WORD16 *pi2_res_pred = (WORD16 *) ps_res_pred->pv_data; + UWORD8 *pu1_pred = (UWORD8 *) ps_pred->pv_data; + UWORD8 *pu1_out = (UWORD8 *) ps_rec->pv_data; + WORD32 i4_src_stride = ps_src->i4_data_stride; + WORD32 i4_res_stride = ps_res->i4_data_stride; + WORD32 i4_res_pred_stride = ps_res_pred->i4_data_stride; + WORD32 i4_pred_stride = ps_pred->i4_data_stride; + WORD32 i4_out_stride = ps_rec->i4_data_stride; + const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat; + const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat; + UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6; + UWORD32 *pu4_out = (UWORD32 *) pu1_out; + __m128i src_r0_r1, src_r2_r3; + __m128i src_r0, src_r1, src_r2, src_r3; + __m128i scalemat_r0_r1, scalemat_r2_r3; + __m128i pred_r0, pred_r1, pred_r2, pred_r3; + __m128i res_pred_r0, res_pred_r1, res_pred_r2, res_pred_r3; + __m128i res_r0, res_r1, res_r2, res_r3; + __m128i sign_reg, dequant_r0_r1, dequant_r2_r3; + /* all bits reset to zero */ + __m128i zero_8x16b = _mm_setzero_si128(); + __m128i neg_255_8x16b = _mm_set1_epi16(-((WORD16) UINT8_MAX)); + __m128i pos_255_8x16b = _mm_set1_epi16(((WORD16) UINT8_MAX)); + __m128i temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7; + __m128i resq_r0, resq_r1, resq_r2, resq_r3; + __m128i add_rshift = _mm_set1_epi32((u4_qp_div_6 < 4) ? (1 << (3 - u4_qp_div_6)) : 0); + __m128i value_32 = _mm_set1_epi32(32); + + ASSERT(4 == i4_src_stride); + ASSERT(1 == u1_res_accumulate); + + UNUSED(i4_src_stride); + UNUSED(ps_res_pred); + UNUSED(u1_res_accumulate); + + /*************************************************************/ + /* Dequantization of coefficients. Will be replaced by SIMD */ + /* operations on platform */ + /*************************************************************/ + + /* a00 a01 a02 a03 a10 a11 a12 a13 -- the source + matrix 0th,1st row */ + src_r0_r1 = _mm_loadu_si128((__m128i *) (pi2_src)); + + /* a20 a21 a22 a23 a30 a31 a32 a33 -- the + source matrix 2nd,3rd row */ + src_r2_r3 = _mm_loadu_si128((__m128i *) (pi2_src + 8)); + + /* b00 b01 b02 b03 b10 b11 b12 b13 -- the + scaling matrix 0th,1st row */ + scalemat_r0_r1 = _mm_loadu_si128((__m128i *) (pu2_iscal_mat)); + + /* b20 b21 b22 b23 b30 b31 b32 b33 --b12 b13 -- the + the scaling matrix 2nd,3rd row */ + scalemat_r2_r3 = _mm_loadu_si128((__m128i *) (pu2_iscal_mat + 8)); + + /* q00 q01 q02 q03 q10 q11 + q12 q13 -- all 16 bits */ + dequant_r0_r1 = _mm_loadu_si128((__m128i *) (pu2_weigh_mat)); + + /* q20 q21 q22 q23 q30 q31 + q32 q33 -- all 16 bits */ + dequant_r2_r3 = _mm_loadu_si128((__m128i *) (pu2_weigh_mat + 8)); + + /* b00*q00 b01*q01 b02*q02 b03*q03 b10*q10 b11*q11 + b12*q12 b13*q13 -- 16 bit result */ + temp0 = _mm_mullo_epi16(scalemat_r0_r1, dequant_r0_r1); + + /* b20*q20 b21*q21 b22*q22 b23*q23 b30*q30 b31*q31 + b32*q32 b33*q33 -- 16 bit result */ + temp1 = _mm_mullo_epi16(scalemat_r2_r3, dequant_r2_r3); + + /* b00*q00 0 b01*q01 0 b02*q02 0 b03*q03 0 -- 16 bit long */ + temp4 = _mm_unpacklo_epi16(temp0, zero_8x16b); + + /* b10*q10 0 b11*q11 0 b12*q12 0 b13*q13 0 -- 16 bit long */ + temp5 = _mm_unpackhi_epi16(temp0, zero_8x16b); + + /* b00*q00 0 b01*q01 0 b02*q02 0 b03*q03 0 -- 16 bit long */ + temp6 = _mm_unpacklo_epi16(temp1, zero_8x16b); + + /* b10*q10 0 b11*q11 0 b12*q12 0 b13*q13 0 -- 16 bit long */ + temp7 = _mm_unpackhi_epi16(temp1, zero_8x16b); + + /* a00 0 a01 0 a02 0 a03 0 -- 16 bit long */ + src_r0 = _mm_unpacklo_epi16(src_r0_r1, zero_8x16b); + /* a10 0 a11 0 a12 0 a13 0 -- 16 bit long */ + src_r1 = _mm_unpackhi_epi16(src_r0_r1, zero_8x16b); + /* a20 0 a21 0 a22 0 a23 0 -- 16 bit long */ + src_r2 = _mm_unpacklo_epi16(src_r2_r3, zero_8x16b); + /* a30 0 a31 0 a32 0 a33 0 -- 16 bit long */ + src_r3 = _mm_unpackhi_epi16(src_r2_r3, zero_8x16b); + + temp4 = _mm_madd_epi16(src_r0, temp4); + temp5 = _mm_madd_epi16(src_r1, temp5); + temp6 = _mm_madd_epi16(src_r2, temp6); + temp7 = _mm_madd_epi16(src_r3, temp7); + + if(u4_qp_div_6 >= 4) + { + resq_r0 = _mm_slli_epi32(temp4, u4_qp_div_6 - 4); + resq_r1 = _mm_slli_epi32(temp5, u4_qp_div_6 - 4); + resq_r2 = _mm_slli_epi32(temp6, u4_qp_div_6 - 4); + resq_r3 = _mm_slli_epi32(temp7, u4_qp_div_6 - 4); + } + else + { + temp4 = _mm_add_epi32(temp4, add_rshift); + temp5 = _mm_add_epi32(temp5, add_rshift); + temp6 = _mm_add_epi32(temp6, add_rshift); + temp7 = _mm_add_epi32(temp7, add_rshift); + resq_r0 = _mm_srai_epi32(temp4, 4 - u4_qp_div_6); + resq_r1 = _mm_srai_epi32(temp5, 4 - u4_qp_div_6); + resq_r2 = _mm_srai_epi32(temp6, 4 - u4_qp_div_6); + resq_r3 = _mm_srai_epi32(temp7, 4 - u4_qp_div_6); + } + + if(i4_iq_start_idx == 1) resq_r0 = _mm_insert_epi32(resq_r0, (WORD32) pi2_dc_src[0], 0); + /* Perform Inverse transform */ + /*-------------------------------------------------------------*/ + /* IDCT [ Horizontal transformation ] */ + /*-------------------------------------------------------------*/ + // Matrix transpose + /* + * a0 a1 a2 a3 + * b0 b1 b2 b3 + * c0 c1 c2 c3 + * d0 d1 d2 d3 + */ + + /* a0 b0 a1 b1 */ + temp1 = _mm_unpacklo_epi32(resq_r0, resq_r1); + /* c0 d0 c1 d1 */ + temp3 = _mm_unpacklo_epi32(resq_r2, resq_r3); + /* a2 b2 a3 b3 */ + temp2 = _mm_unpackhi_epi32(resq_r0, resq_r1); + /* c2 d2 c3 d3 */ + temp4 = _mm_unpackhi_epi32(resq_r2, resq_r3); + /* a0 b0 c0 d0 */ + resq_r0 = _mm_unpacklo_epi64(temp1, temp3); + /* a1 b1 c1 d1 */ + resq_r1 = _mm_unpackhi_epi64(temp1, temp3); + /* a2 b2 c2 d2 */ + resq_r2 = _mm_unpacklo_epi64(temp2, temp4); + /* a3 b3 c3 d3 */ + resq_r3 = _mm_unpackhi_epi64(temp2, temp4); + /* Transform starts -- horizontal transform */ + /*------------------------------------------------------------------*/ + /* z0 = w0 + w2 */ + temp0 = _mm_add_epi32(resq_r0, resq_r2); + /* z1 = w0 - w2 */ + temp1 = _mm_sub_epi32(resq_r0, resq_r2); + /* z2 = (w1 >> 1) - w3 */ + temp2 = _mm_srai_epi32(resq_r1, 1); + temp2 = _mm_sub_epi32(temp2, resq_r3); + /* z3 = w1 + (w3 >> 1) */ + temp3 = _mm_srai_epi32(resq_r3, 1); + temp3 = _mm_add_epi32(temp3, resq_r1); + /*----------------------------------------------------------*/ + /* x0 = z0 + z3 */ + resq_r0 = _mm_add_epi32(temp0, temp3); + /* x1 = z1 + z2 */ + resq_r1 = _mm_add_epi32(temp1, temp2); + /* x2 = z1 - z2 */ + resq_r2 = _mm_sub_epi32(temp1, temp2); + /* x3 = z0 - z3 */ + resq_r3 = _mm_sub_epi32(temp0, temp3); + + // Matrix transpose + /* + * a0 b0 c0 d0 + * a1 b1 c1 d1 + * a2 b2 c2 d2 + * a3 b3 c3 d3 + */ + + /* a0 a1 b0 b1 */ + temp1 = _mm_unpacklo_epi32(resq_r0, resq_r1); + /* a2 a3 b2 b3 */ + temp3 = _mm_unpacklo_epi32(resq_r2, resq_r3); + /* c0 c1 d0 d1 */ + temp2 = _mm_unpackhi_epi32(resq_r0, resq_r1); + /* c2 c3 d2 d3 */ + temp4 = _mm_unpackhi_epi32(resq_r2, resq_r3); + /* a0 a1 a2 a3 */ + resq_r0 = _mm_unpacklo_epi64(temp1, temp3); + /* b0 b1 b2 b3 */ + resq_r1 = _mm_unpackhi_epi64(temp1, temp3); + /* c0 c1 c2 c3 */ + resq_r2 = _mm_unpacklo_epi64(temp2, temp4); + /* d0 d1 d2 d3 */ + resq_r3 = _mm_unpackhi_epi64(temp2, temp4); + /* Transform ends -- horizontal transform */ + + temp0 = _mm_packs_epi32(resq_r0, resq_r1); + temp1 = _mm_packs_epi32(resq_r2, resq_r3); + + _mm_storeu_si128((__m128i *) (&pi2_tmp_ptr[0]), temp0); + _mm_storeu_si128((__m128i *) (&pi2_tmp_ptr[2 * 4]), temp1); + + /* Load pred buffer */ + pred_r0 = _mm_loadl_epi64((__m128i *) (&pu1_pred[0])); + pred_r1 = _mm_loadl_epi64((__m128i *) (&pu1_pred[i4_pred_stride])); + pred_r2 = _mm_loadl_epi64((__m128i *) (&pu1_pred[2 * i4_pred_stride])); + pred_r3 = _mm_loadl_epi64((__m128i *) (&pu1_pred[3 * i4_pred_stride])); + + pred_r0 = _mm_cvtepu8_epi16(pred_r0); + pred_r1 = _mm_cvtepu8_epi16(pred_r1); + pred_r2 = _mm_cvtepu8_epi16(pred_r2); + pred_r3 = _mm_cvtepu8_epi16(pred_r3); + + /*--------------------------------------------------------------*/ + /* IDCT [ Vertical transformation] and Xij = (xij + 32)>>6 */ + /* */ + /* Add the prediction and store it back to same buffer */ + /*--------------------------------------------------------------*/ + /* z0j = y0j + y2j */ + temp0 = _mm_add_epi32(resq_r0, resq_r2); + /* z1j = y0j - y2j */ + temp1 = _mm_sub_epi32(resq_r0, resq_r2); + /* z2j = (y1j>>1) - y3j */ + temp2 = _mm_srai_epi32(resq_r1, 1); + temp2 = _mm_sub_epi32(temp2, resq_r3); + /* z3j = y1j + (y3j>>1) */ + temp3 = _mm_srai_epi32(resq_r3, 1); + temp3 = _mm_add_epi32(temp3, resq_r1); + + /* x0j = z0j + z3j */ + temp4 = _mm_add_epi32(temp0, temp3); + temp4 = _mm_add_epi32(temp4, value_32); + temp4 = _mm_srai_epi32(temp4, 6); + res_r0 = temp4; + /* x1j = z1j + z2j */ + temp5 = _mm_add_epi32(temp1, temp2); + temp5 = _mm_add_epi32(temp5, value_32); + temp5 = _mm_srai_epi32(temp5, 6); + res_r1 = temp5; + /* x2j = z1j - z2j */ + temp6 = _mm_sub_epi32(temp1, temp2); + temp6 = _mm_add_epi32(temp6, value_32); + temp6 = _mm_srai_epi32(temp6, 6); + res_r2 = temp6; + /* x3j = z0j - z3j */ + temp7 = _mm_sub_epi32(temp0, temp3); + temp7 = _mm_add_epi32(temp7, value_32); + temp7 = _mm_srai_epi32(temp7, 6); + res_r3 = temp7; + + /* Accumulating res */ + res_pred_r0 = _mm_loadl_epi64((__m128i *) &pi2_res_pred[0]); + res_pred_r1 = _mm_loadl_epi64((__m128i *) &pi2_res_pred[i4_res_pred_stride]); + res_pred_r2 = _mm_loadl_epi64((__m128i *) &pi2_res_pred[2 * i4_res_pred_stride]); + res_pred_r3 = _mm_loadl_epi64((__m128i *) &pi2_res_pred[3 * i4_res_pred_stride]); + + res_pred_r0 = _mm_cvtepi16_epi32(res_pred_r0); + res_pred_r1 = _mm_cvtepi16_epi32(res_pred_r1); + res_pred_r2 = _mm_cvtepi16_epi32(res_pred_r2); + res_pred_r3 = _mm_cvtepi16_epi32(res_pred_r3); + + temp0 = _mm_add_epi32(res_r0, res_pred_r0); + temp1 = _mm_add_epi32(res_r1, res_pred_r1); + temp2 = _mm_add_epi32(res_r2, res_pred_r2); + temp3 = _mm_add_epi32(res_r3, res_pred_r3); + + temp0 = _mm_packs_epi32(temp0, temp1); + temp1 = _mm_packs_epi32(temp2, temp3); + + /* Saturate all values < -255 to -255 and retain the rest as it is */ + temp0 = _mm_max_epi16(temp0, neg_255_8x16b); + /* Saturate all values > 255 to 255 and retain the rest as it is */ + temp0 = _mm_min_epi16(temp0, pos_255_8x16b); + + /* Saturate all values < -255 to -255 and retain the rest as it is */ + temp1 = _mm_max_epi16(temp1, neg_255_8x16b); + /* Saturate all values > 255 to 255 and retain the rest as it is */ + temp1 = _mm_min_epi16(temp1, pos_255_8x16b); + + _mm_storel_epi64((__m128i *) (&pi2_res[0]), temp0); + _mm_storel_epi64((__m128i *) (&pi2_res[2 * i4_res_stride]), temp1); + + temp4 = _mm_add_epi16(temp0, pred_r0); + temp0 = _mm_srli_si128(temp0, 8); + _mm_storel_epi64((__m128i *) (&pi2_res[i4_res_stride]), temp0); + + temp6 = _mm_add_epi16(temp1, pred_r2); + temp1 = _mm_srli_si128(temp1, 8); + _mm_storel_epi64((__m128i *) (&pi2_res[3 * i4_res_stride]), temp1); + + temp5 = _mm_add_epi16(temp0, pred_r1); + temp7 = _mm_add_epi16(temp1, pred_r3); + + temp4 = _mm_cvtepi16_epi32(temp4); + temp5 = _mm_cvtepi16_epi32(temp5); + temp6 = _mm_cvtepi16_epi32(temp6); + temp7 = _mm_cvtepi16_epi32(temp7); + + /* 32-bit to 16-bit conversion */ + temp0 = _mm_packs_epi32(temp4, temp5); + temp1 = _mm_packs_epi32(temp6, temp7); + /*------------------------------------------------------------------*/ + /* Clipping the results to 8 bits */ + sign_reg = _mm_cmpgt_epi16(temp0, zero_8x16b); + temp0 = _mm_and_si128(temp0, sign_reg); + sign_reg = _mm_cmpgt_epi16(temp1, zero_8x16b); + temp1 = _mm_and_si128(temp1, sign_reg); + + resq_r0 = _mm_packus_epi16(temp0, temp1); + resq_r1 = _mm_srli_si128(resq_r0, 4); + resq_r2 = _mm_srli_si128(resq_r1, 4); + resq_r3 = _mm_srli_si128(resq_r2, 4); + + *pu4_out = _mm_cvtsi128_si32(resq_r0); + pu1_out += i4_out_stride; + pu4_out = (UWORD32 *) (pu1_out); + *(pu4_out) = _mm_cvtsi128_si32(resq_r1); + pu1_out += i4_out_stride; + pu4_out = (UWORD32 *) (pu1_out); + *(pu4_out) = _mm_cvtsi128_si32(resq_r2); + pu1_out += i4_out_stride; + pu4_out = (UWORD32 *) (pu1_out); + *(pu4_out) = _mm_cvtsi128_si32(resq_r3); +} + +/* + ******************************************************************************** + * + * @brief This function reconstructs a 4x4 sub block from quantized chroma + *resiude and prediction buffer + * + * @par Description: + * The quantized residue is first inverse quantized, then inverse transformed. + * This inverse transformed content is added to the prediction buffer to recon- + * struct the end output + * + * @param[in] pi2_src + * quantized 4x4 block + * + * @param[in] pu1_pred + * prediction 4x4 block + * + * @param[out] pu1_out + * reconstructed 4x4 block + * + * @param[in] src_strd + * quantization buffer stride + * + * @param[in] i4_pred_stride, + * Prediction buffer stride + * + * @param[in] i4_out_stride + * recon buffer Stride + * + * @param[in] pu2_scaling_list + * pointer to scaling list + * + * @param[in] pu2_norm_adjust + * pointer to inverse scale matrix + * + * @param[in] u4_qp_div_6 + * Floor (qp/6) + * + * @param[in] pi4_tmp + * temporary buffer of size 1*16 + * + * @returns none + * + * @remarks none + * + ******************************************************************************* + */ +void isvc_iquant_itrans_recon_chroma_4x4_sse42( + buffer_container_t *ps_src, buffer_container_t *ps_pred, buffer_container_t *ps_res_pred, + buffer_container_t *ps_res, buffer_container_t *ps_rec, + iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants, WORD16 *pi2_tmp, WORD16 *pi2_dc_src, + WORD32 i4_iq_start_idx, UWORD8 u1_res_accumulate) +{ + WORD16 *pi2_src = (WORD16 *) ps_src->pv_data; + UWORD8 *pu1_pred = (UWORD8 *) ps_pred->pv_data; + UWORD8 *pu1_out = (UWORD8 *) ps_rec->pv_data; + WORD32 i4_src_stride = ps_src->i4_data_stride; + WORD32 i4_pred_stride = ps_pred->i4_data_stride; + WORD32 i4_out_stride = ps_rec->i4_data_stride; + const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat; + const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat; + UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6; + __m128i src_r0_r1, src_r2_r3; + __m128i src_r0, src_r1, src_r2, src_r3; + __m128i scalemat_r0_r1, scalemat_r2_r3; + __m128i pred_r0, pred_r1, pred_r2, pred_r3; + __m128i sign_reg, dequant_r0_r1, dequant_r2_r3; + /* all bits reset to zero */ + __m128i zero_8x16b = _mm_setzero_si128(); + __m128i neg_255_8x16b = _mm_set1_epi16(-((WORD16) UINT8_MAX)); + __m128i pos_255_8x16b = _mm_set1_epi16(((WORD16) UINT8_MAX)); + __m128i temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7; + __m128i resq_r0, resq_r1, resq_r2, resq_r3; + __m128i add_rshift = _mm_set1_epi32((u4_qp_div_6 < 4) ? (1 << (3 - u4_qp_div_6)) : 0); + __m128i value_32 = _mm_set1_epi32(32); + __m128i chroma_mask = _mm_set1_epi16(0xFF); + __m128i out_r0, out_r1, out_r2, out_r3; + + ASSERT(4 == i4_src_stride); + ASSERT(0 == u1_res_accumulate); + + UNUSED(i4_src_stride); + UNUSED(u1_res_accumulate); + UNUSED(ps_res); + UNUSED(ps_res_pred); + UNUSED(i4_iq_start_idx); + + /*************************************************************/ + /* Dequantization of coefficients. Will be replaced by SIMD */ + /* operations on platform */ + /*************************************************************/ + /* a00 a01 a02 a03 a10 a11 a12 a13 -- the source + matrix 0th,1st row */ + src_r0_r1 = _mm_loadu_si128((__m128i *) (pi2_src)); + + /* a20 a21 a22 a23 a30 a31 a32 a33 -- the + source matrix 2nd,3rd row */ + src_r2_r3 = _mm_loadu_si128((__m128i *) (pi2_src + 8)); + + /* b00 b01 b02 b03 b10 b11 b12 b13 -- the + scaling matrix 0th,1st row */ + scalemat_r0_r1 = _mm_loadu_si128((__m128i *) (pu2_iscal_mat)); + + /* b20 b21 b22 b23 b30 b31 b32 b33 --b12 b13 -- the + the scaling matrix 2nd,3rd row */ + scalemat_r2_r3 = _mm_loadu_si128((__m128i *) (pu2_iscal_mat + 8)); + + /* q00 q01 q02 q03 q10 q11 + q12 q13 -- all 16 bits */ + dequant_r0_r1 = _mm_loadu_si128((__m128i *) (pu2_weigh_mat)); + + /* q20 q21 q22 q23 q30 q31 + q32 q33 -- all 16 bits */ + dequant_r2_r3 = _mm_loadu_si128((__m128i *) (pu2_weigh_mat + 8)); + + temp0 = _mm_mullo_epi16(scalemat_r0_r1, + dequant_r0_r1); // b00*q00 b01*q01 b02*q02 b03*q03 b10*q10 b11*q11 + // b12*q12 b13*q13 -- 16 bit result + + temp1 = _mm_mullo_epi16(scalemat_r2_r3, dequant_r2_r3); + + /* b00*q00 0 b01*q01 0 b02*q02 0 b03*q03 0 -- 16 bit long */ + temp4 = _mm_unpacklo_epi16(temp0, zero_8x16b); + + /* b10*q10 0 b11*q11 0 b12*q12 0 b13*q13 0 -- 16 bit long */ + temp5 = _mm_unpackhi_epi16(temp0, zero_8x16b); + + /* b00*q00 0 b01*q01 0 b02*q02 0 b03*q03 0 -- 16 bit long */ + temp6 = _mm_unpacklo_epi16(temp1, zero_8x16b); + + /* b10*q10 0 b11*q11 0 b12*q12 0 b13*q13 0 -- 16 bit long */ + temp7 = _mm_unpackhi_epi16(temp1, zero_8x16b); + + /* a00 0 a01 0 a02 0 a03 0 -- 16 bit long */ + src_r0 = _mm_unpacklo_epi16(src_r0_r1, zero_8x16b); + /* a10 0 a11 0 a12 0 a13 0 -- 16 bit long */ + src_r1 = _mm_unpackhi_epi16(src_r0_r1, zero_8x16b); + /* a20 0 a21 0 a22 0 a23 0 -- 16 bit long */ + src_r2 = _mm_unpacklo_epi16(src_r2_r3, zero_8x16b); + /* a30 0 a31 0 a32 0 a33 0 -- 16 bit long */ + src_r3 = _mm_unpackhi_epi16(src_r2_r3, zero_8x16b); + + temp4 = _mm_madd_epi16(src_r0, temp4); + temp5 = _mm_madd_epi16(src_r1, temp5); + temp6 = _mm_madd_epi16(src_r2, temp6); + temp7 = _mm_madd_epi16(src_r3, temp7); + + if(u4_qp_div_6 >= 4) + { + resq_r0 = _mm_slli_epi32(temp4, u4_qp_div_6 - 4); + resq_r1 = _mm_slli_epi32(temp5, u4_qp_div_6 - 4); + resq_r2 = _mm_slli_epi32(temp6, u4_qp_div_6 - 4); + resq_r3 = _mm_slli_epi32(temp7, u4_qp_div_6 - 4); + } + else + { + temp4 = _mm_add_epi32(temp4, add_rshift); + temp5 = _mm_add_epi32(temp5, add_rshift); + temp6 = _mm_add_epi32(temp6, add_rshift); + temp7 = _mm_add_epi32(temp7, add_rshift); + resq_r0 = _mm_srai_epi32(temp4, 4 - u4_qp_div_6); + resq_r1 = _mm_srai_epi32(temp5, 4 - u4_qp_div_6); + resq_r2 = _mm_srai_epi32(temp6, 4 - u4_qp_div_6); + resq_r3 = _mm_srai_epi32(temp7, 4 - u4_qp_div_6); + } + + resq_r0 = _mm_insert_epi32(resq_r0, (WORD32) pi2_dc_src[0], 0); + /* Perform Inverse transform */ + /*-------------------------------------------------------------*/ + /* IDCT [ Horizontal transformation ] */ + /*-------------------------------------------------------------*/ + // Matrix transpose + /* + * a0 a1 a2 a3 + * b0 b1 b2 b3 + * c0 c1 c2 c3 + * d0 d1 d2 d3 + */ + /* a0 b0 a1 b1 */ + temp1 = _mm_unpacklo_epi32(resq_r0, resq_r1); + /* c0 d0 c1 d1 */ + temp3 = _mm_unpacklo_epi32(resq_r2, resq_r3); + /* a2 b2 a3 b3 */ + temp2 = _mm_unpackhi_epi32(resq_r0, resq_r1); + /* c2 d2 c3 d3 */ + temp4 = _mm_unpackhi_epi32(resq_r2, resq_r3); + /* a0 b0 c0 d0 */ + resq_r0 = _mm_unpacklo_epi64(temp1, temp3); + /* a1 b1 c1 d1 */ + resq_r1 = _mm_unpackhi_epi64(temp1, temp3); + /* a2 b2 c2 d2 */ + resq_r2 = _mm_unpacklo_epi64(temp2, temp4); + /* a3 b3 c3 d3 */ + resq_r3 = _mm_unpackhi_epi64(temp2, temp4); + /* Transform starts -- horizontal transform */ + + /*------------------------------------------------------------------*/ + /* z0 = w0 + w2 */ + temp0 = _mm_add_epi32(resq_r0, resq_r2); + /* z1 = w0 - w2 */ + temp1 = _mm_sub_epi32(resq_r0, resq_r2); + /* z2 = (w1 >> 1) - w3 */ + temp2 = _mm_srai_epi32(resq_r1, 1); + temp2 = _mm_sub_epi32(temp2, resq_r3); + /* z3 = w1 + (w3 >> 1) */ + temp3 = _mm_srai_epi32(resq_r3, 1); //(w3>>1) + w1 + temp3 = _mm_add_epi32(temp3, resq_r1); + /*----------------------------------------------------------*/ + /* x0 = z0 + z3 */ + resq_r0 = _mm_add_epi32(temp0, temp3); + /* x1 = z1 + z2 */ + resq_r1 = _mm_add_epi32(temp1, temp2); + /* x2 = z1 - z2 */ + resq_r2 = _mm_sub_epi32(temp1, temp2); + /* x3 = z0 - z3 */ + resq_r3 = _mm_sub_epi32(temp0, temp3); + // Matrix transpose + /* + * a0 b0 c0 d0 + * a1 b1 c1 d1 + * a2 b2 c2 d2 + * a3 b3 c3 d3 + */ + /* a0 a1 b0 b1 */ + temp1 = _mm_unpacklo_epi32(resq_r0, resq_r1); + /* a2 a3 b2 b3 */ + temp3 = _mm_unpacklo_epi32(resq_r2, resq_r3); + /* c0 c1 d0 d1 */ + temp2 = _mm_unpackhi_epi32(resq_r0, resq_r1); + /* c2 c3 d2 d3 */ + temp4 = _mm_unpackhi_epi32(resq_r2, resq_r3); + /* a0 a1 a2 a3 */ + resq_r0 = _mm_unpacklo_epi64(temp1, temp3); + /* b0 b1 b2 b3 */ + resq_r1 = _mm_unpackhi_epi64(temp1, temp3); + /* c0 c1 c2 c3 */ + resq_r2 = _mm_unpacklo_epi64(temp2, temp4); + /* d0 d1 d2 d3 */ + resq_r3 = _mm_unpackhi_epi64(temp2, temp4); + /* Transform ends -- horizontal transform */ + + temp0 = _mm_packs_epi32(resq_r0, resq_r1); + temp1 = _mm_packs_epi32(resq_r2, resq_r3); + + _mm_storeu_si128((__m128i *) (&pi2_tmp[0]), temp0); + _mm_storeu_si128((__m128i *) (&pi2_tmp[2 * 4]), temp1); + + /* Load pred buffer */ + pred_r0 = _mm_loadl_epi64((__m128i *) (&pu1_pred[0])); + pred_r1 = _mm_loadl_epi64((__m128i *) (&pu1_pred[i4_pred_stride])); + pred_r2 = _mm_loadl_epi64((__m128i *) (&pu1_pred[2 * i4_pred_stride])); + pred_r3 = _mm_loadl_epi64((__m128i *) (&pu1_pred[3 * i4_pred_stride])); + + pred_r0 = _mm_and_si128(pred_r0, chroma_mask); + pred_r1 = _mm_and_si128(pred_r1, chroma_mask); + pred_r2 = _mm_and_si128(pred_r2, chroma_mask); + pred_r3 = _mm_and_si128(pred_r3, chroma_mask); + + pred_r0 = _mm_unpacklo_epi64(pred_r0, pred_r1); + pred_r1 = _mm_unpacklo_epi64(pred_r2, pred_r3); + + /*--------------------------------------------------------------*/ + /* IDCT [ Vertical transformation] and Xij = (xij + 32)>>6 */ + /* */ + /* Add the prediction and store it back to same buffer */ + /*--------------------------------------------------------------*/ + /* z0j = y0j + y2j */ + temp0 = _mm_add_epi32(resq_r0, resq_r2); + /* z1j = y0j - y2j */ + temp1 = _mm_sub_epi32(resq_r0, resq_r2); + /* z2j = (y1j>>1) - y3j */ + temp2 = _mm_srai_epi32(resq_r1, 1); + temp2 = _mm_sub_epi32(temp2, resq_r3); + /* z3j = y1j + (y3j>>1) */ + temp3 = _mm_srai_epi32(resq_r3, 1); + temp3 = _mm_add_epi32(temp3, resq_r1); + + /* x0j = z0j + z3j */ + temp4 = _mm_add_epi32(temp0, temp3); + temp4 = _mm_add_epi32(temp4, value_32); + temp4 = _mm_srai_epi32(temp4, 6); + /* x1j = z1j + z2j */ + temp5 = _mm_add_epi32(temp1, temp2); + temp5 = _mm_add_epi32(temp5, value_32); + temp5 = _mm_srai_epi32(temp5, 6); + /* x2j = z1j - z2j */ + temp6 = _mm_sub_epi32(temp1, temp2); + temp6 = _mm_add_epi32(temp6, value_32); + temp6 = _mm_srai_epi32(temp6, 6); + /* x3j = z0j - z3j */ + temp7 = _mm_sub_epi32(temp0, temp3); + temp7 = _mm_add_epi32(temp7, value_32); + temp7 = _mm_srai_epi32(temp7, 6); + + /* 32-bit to 16-bit conversion */ + temp0 = _mm_packs_epi32(temp4, temp5); + temp1 = _mm_packs_epi32(temp6, temp7); + + /* Saturate all values < -255 to -255 and retain the rest as it is */ + temp4 = _mm_max_epi16(temp0, neg_255_8x16b); + /* Saturate all values > 255 to 255 and retain the rest as it is */ + temp4 = _mm_min_epi16(temp4, pos_255_8x16b); + + /* Saturate all values < -255 to -255 and retain the rest as it is */ + temp5 = _mm_max_epi16(temp1, neg_255_8x16b); + /* Saturate all values > 255 to 255 and retain the rest as it is */ + temp5 = _mm_min_epi16(temp5, pos_255_8x16b); + + temp0 = _mm_add_epi16(temp4, pred_r0); + temp1 = _mm_add_epi16(temp5, pred_r1); + + /*------------------------------------------------------------------*/ + /* Clipping the results to 8 bits */ + sign_reg = _mm_cmpgt_epi16(temp0, zero_8x16b); + temp0 = _mm_and_si128(temp0, sign_reg); + sign_reg = _mm_cmpgt_epi16(temp1, zero_8x16b); + temp1 = _mm_and_si128(temp1, sign_reg); + + resq_r0 = _mm_packus_epi16(temp0, temp1); + resq_r1 = _mm_srli_si128(resq_r0, 4); + resq_r2 = _mm_srli_si128(resq_r1, 4); + resq_r3 = _mm_srli_si128(resq_r2, 4); + + resq_r0 = _mm_cvtepu8_epi16(resq_r0); + resq_r1 = _mm_cvtepu8_epi16(resq_r1); + resq_r2 = _mm_cvtepu8_epi16(resq_r2); + resq_r3 = _mm_cvtepu8_epi16(resq_r3); + + chroma_mask = _mm_set1_epi16(0xFF00); + out_r0 = _mm_loadl_epi64((__m128i *) (&pu1_out[0])); + out_r1 = _mm_loadl_epi64((__m128i *) (&pu1_out[i4_out_stride])); + out_r2 = _mm_loadl_epi64((__m128i *) (&pu1_out[2 * i4_out_stride])); + out_r3 = _mm_loadl_epi64((__m128i *) (&pu1_out[3 * i4_out_stride])); + + out_r0 = _mm_and_si128(out_r0, chroma_mask); + out_r1 = _mm_and_si128(out_r1, chroma_mask); + out_r2 = _mm_and_si128(out_r2, chroma_mask); + out_r3 = _mm_and_si128(out_r3, chroma_mask); + + out_r0 = _mm_add_epi8(out_r0, resq_r0); + out_r1 = _mm_add_epi8(out_r1, resq_r1); + out_r2 = _mm_add_epi8(out_r2, resq_r2); + out_r3 = _mm_add_epi8(out_r3, resq_r3); + + _mm_storel_epi64((__m128i *) (&pu1_out[0]), out_r0); + _mm_storel_epi64((__m128i *) (&pu1_out[i4_out_stride]), out_r1); + _mm_storel_epi64((__m128i *) (&pu1_out[2 * i4_out_stride]), out_r2); + _mm_storel_epi64((__m128i *) (&pu1_out[3 * i4_out_stride]), out_r3); +} + +void isvc_iquant_itrans_recon_res_chroma_4x4_sse42( + buffer_container_t *ps_src, buffer_container_t *ps_pred, buffer_container_t *ps_res_pred, + buffer_container_t *ps_res, buffer_container_t *ps_rec, + iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants, WORD16 *pi2_tmp, WORD16 *pi2_dc_src, + WORD32 i4_iq_start_idx, UWORD8 u1_res_accumulate) +{ + WORD16 *pi2_src = (WORD16 *) ps_src->pv_data; + WORD16 *pi2_res = (WORD16 *) ps_res->pv_data; + WORD16 *pi2_res_ptr = pi2_res; + UWORD8 *pu1_pred = (UWORD8 *) ps_pred->pv_data; + UWORD8 *pu1_out = (UWORD8 *) ps_rec->pv_data; + WORD32 i4_src_stride = ps_src->i4_data_stride; + WORD32 i4_res_stride = ps_res->i4_data_stride; + WORD32 i4_pred_stride = ps_pred->i4_data_stride; + WORD32 i4_out_stride = ps_rec->i4_data_stride; + const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat; + const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat; + UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6; + __m128i src_r0_r1, src_r2_r3; + __m128i src_r0, src_r1, src_r2, src_r3; + __m128i scalemat_r0_r1, scalemat_r2_r3; + __m128i pred_r0, pred_r1, pred_r2, pred_r3; + __m128i sign_reg, dequant_r0_r1, dequant_r2_r3; + /* all bits reset to zero */ + __m128i zero_8x16b = _mm_setzero_si128(); + __m128i neg_255_8x16b = _mm_set1_epi16(-((WORD16) UINT8_MAX)); + __m128i pos_255_8x16b = _mm_set1_epi16(((WORD16) UINT8_MAX)); + __m128i temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7; + __m128i resq_r0, resq_r1, resq_r2, resq_r3; + __m128i add_rshift = _mm_set1_epi32((u4_qp_div_6 < 4) ? (1 << (3 - u4_qp_div_6)) : 0); + __m128i value_32 = _mm_set1_epi32(32); + __m128i chroma_mask = _mm_set1_epi16(0xFF); + __m128i out_r0, out_r1, out_r2, out_r3; + __m128i res_r0, res_r1, res_r2, res_r3; + + ASSERT(4 == i4_src_stride); + ASSERT(0 == u1_res_accumulate); + + UNUSED(i4_src_stride); + UNUSED(u1_res_accumulate); + UNUSED(ps_res_pred); + UNUSED(i4_iq_start_idx); + + /*************************************************************/ + /* Dequantization of coefficients. Will be replaced by SIMD */ + /* operations on platform */ + /*************************************************************/ + /* a00 a01 a02 a03 a10 a11 a12 a13 -- the source + matrix 0th,1st row */ + src_r0_r1 = _mm_loadu_si128((__m128i *) (pi2_src)); + + /* a20 a21 a22 a23 a30 a31 a32 a33 -- the + source matrix 2nd,3rd row */ + src_r2_r3 = _mm_loadu_si128((__m128i *) (pi2_src + 8)); + + /* b00 b01 b02 b03 b10 b11 b12 b13 -- the + scaling matrix 0th,1st row */ + scalemat_r0_r1 = _mm_loadu_si128((__m128i *) (pu2_iscal_mat)); + + /* b20 b21 b22 b23 b30 b31 b32 b33 --b12 b13 -- the + the scaling matrix 2nd,3rd row */ + scalemat_r2_r3 = _mm_loadu_si128((__m128i *) (pu2_iscal_mat + 8)); + + /* q00 q01 q02 q03 q10 q11 + q12 q13 -- all 16 bits */ + dequant_r0_r1 = _mm_loadu_si128((__m128i *) (pu2_weigh_mat)); + + /* q20 q21 q22 q23 q30 q31 + q32 q33 -- all 16 bits */ + dequant_r2_r3 = _mm_loadu_si128((__m128i *) (pu2_weigh_mat + 8)); + + temp0 = _mm_mullo_epi16(scalemat_r0_r1, + dequant_r0_r1); // b00*q00 b01*q01 b02*q02 b03*q03 b10*q10 b11*q11 + // b12*q12 b13*q13 -- 16 bit result + + temp1 = _mm_mullo_epi16(scalemat_r2_r3, dequant_r2_r3); + + /* b00*q00 0 b01*q01 0 b02*q02 0 b03*q03 0 -- 16 bit long */ + temp4 = _mm_unpacklo_epi16(temp0, zero_8x16b); + + /* b10*q10 0 b11*q11 0 b12*q12 0 b13*q13 0 -- 16 bit long */ + temp5 = _mm_unpackhi_epi16(temp0, zero_8x16b); + + /* b00*q00 0 b01*q01 0 b02*q02 0 b03*q03 0 -- 16 bit long */ + temp6 = _mm_unpacklo_epi16(temp1, zero_8x16b); + + /* b10*q10 0 b11*q11 0 b12*q12 0 b13*q13 0 -- 16 bit long */ + temp7 = _mm_unpackhi_epi16(temp1, zero_8x16b); + + /* a00 0 a01 0 a02 0 a03 0 -- 16 bit long */ + src_r0 = _mm_unpacklo_epi16(src_r0_r1, zero_8x16b); + /* a10 0 a11 0 a12 0 a13 0 -- 16 bit long */ + src_r1 = _mm_unpackhi_epi16(src_r0_r1, zero_8x16b); + /* a20 0 a21 0 a22 0 a23 0 -- 16 bit long */ + src_r2 = _mm_unpacklo_epi16(src_r2_r3, zero_8x16b); + /* a30 0 a31 0 a32 0 a33 0 -- 16 bit long */ + src_r3 = _mm_unpackhi_epi16(src_r2_r3, zero_8x16b); + + temp4 = _mm_madd_epi16(src_r0, temp4); + temp5 = _mm_madd_epi16(src_r1, temp5); + temp6 = _mm_madd_epi16(src_r2, temp6); + temp7 = _mm_madd_epi16(src_r3, temp7); + + if(u4_qp_div_6 >= 4) + { + resq_r0 = _mm_slli_epi32(temp4, u4_qp_div_6 - 4); + resq_r1 = _mm_slli_epi32(temp5, u4_qp_div_6 - 4); + resq_r2 = _mm_slli_epi32(temp6, u4_qp_div_6 - 4); + resq_r3 = _mm_slli_epi32(temp7, u4_qp_div_6 - 4); + } + else + { + temp4 = _mm_add_epi32(temp4, add_rshift); + temp5 = _mm_add_epi32(temp5, add_rshift); + temp6 = _mm_add_epi32(temp6, add_rshift); + temp7 = _mm_add_epi32(temp7, add_rshift); + resq_r0 = _mm_srai_epi32(temp4, 4 - u4_qp_div_6); + resq_r1 = _mm_srai_epi32(temp5, 4 - u4_qp_div_6); + resq_r2 = _mm_srai_epi32(temp6, 4 - u4_qp_div_6); + resq_r3 = _mm_srai_epi32(temp7, 4 - u4_qp_div_6); + } + + resq_r0 = _mm_insert_epi32(resq_r0, (WORD32) pi2_dc_src[0], 0); + /* Perform Inverse transform */ + /*-------------------------------------------------------------*/ + /* IDCT [ Horizontal transformation ] */ + /*-------------------------------------------------------------*/ + // Matrix transpose + /* + * a0 a1 a2 a3 + * b0 b1 b2 b3 + * c0 c1 c2 c3 + * d0 d1 d2 d3 + */ + /* a0 b0 a1 b1 */ + temp1 = _mm_unpacklo_epi32(resq_r0, resq_r1); + /* c0 d0 c1 d1 */ + temp3 = _mm_unpacklo_epi32(resq_r2, resq_r3); + /* a2 b2 a3 b3 */ + temp2 = _mm_unpackhi_epi32(resq_r0, resq_r1); + /* c2 d2 c3 d3 */ + temp4 = _mm_unpackhi_epi32(resq_r2, resq_r3); + /* a0 b0 c0 d0 */ + resq_r0 = _mm_unpacklo_epi64(temp1, temp3); + /* a1 b1 c1 d1 */ + resq_r1 = _mm_unpackhi_epi64(temp1, temp3); + /* a2 b2 c2 d2 */ + resq_r2 = _mm_unpacklo_epi64(temp2, temp4); + /* a3 b3 c3 d3 */ + resq_r3 = _mm_unpackhi_epi64(temp2, temp4); + /* Transform starts -- horizontal transform */ + + /*------------------------------------------------------------------*/ + /* z0 = w0 + w2 */ + temp0 = _mm_add_epi32(resq_r0, resq_r2); + /* z1 = w0 - w2 */ + temp1 = _mm_sub_epi32(resq_r0, resq_r2); + /* z2 = (w1 >> 1) - w3 */ + temp2 = _mm_srai_epi32(resq_r1, 1); + temp2 = _mm_sub_epi32(temp2, resq_r3); + /* z3 = w1 + (w3 >> 1) */ + temp3 = _mm_srai_epi32(resq_r3, 1); + temp3 = _mm_add_epi32(temp3, resq_r1); + /*----------------------------------------------------------*/ + /* x0 = z0 + z3 */ + resq_r0 = _mm_add_epi32(temp0, temp3); + /* x1 = z1 + z2 */ + resq_r1 = _mm_add_epi32(temp1, temp2); + /* x2 = z1 - z2 */ + resq_r2 = _mm_sub_epi32(temp1, temp2); + /* x3 = z0 - z3 */ + resq_r3 = _mm_sub_epi32(temp0, temp3); + // Matrix transpose + /* + * a0 b0 c0 d0 + * a1 b1 c1 d1 + * a2 b2 c2 d2 + * a3 b3 c3 d3 + */ + /* a0 a1 b0 b1 */ + temp1 = _mm_unpacklo_epi32(resq_r0, resq_r1); + /* a2 a3 b2 b3 */ + temp3 = _mm_unpacklo_epi32(resq_r2, resq_r3); + /* c0 c1 d0 d1 */ + temp2 = _mm_unpackhi_epi32(resq_r0, resq_r1); + /* c2 c3 d2 d3 */ + temp4 = _mm_unpackhi_epi32(resq_r2, resq_r3); + /* a0 a1 a2 a3 */ + resq_r0 = _mm_unpacklo_epi64(temp1, temp3); + /* b0 b1 b2 b3 */ + resq_r1 = _mm_unpackhi_epi64(temp1, temp3); + /* c0 c1 c2 c3 */ + resq_r2 = _mm_unpacklo_epi64(temp2, temp4); + /* d0 d1 d2 d3 */ + resq_r3 = _mm_unpackhi_epi64(temp2, temp4); + /* Transform ends -- horizontal transform */ + + temp0 = _mm_packs_epi32(resq_r0, resq_r1); + temp1 = _mm_packs_epi32(resq_r2, resq_r3); + + _mm_storeu_si128((__m128i *) (&pi2_tmp[0]), temp0); + _mm_storeu_si128((__m128i *) (&pi2_tmp[2 * 4]), temp1); + + /* Load pred buffer */ + pred_r0 = _mm_loadl_epi64((__m128i *) (&pu1_pred[0])); + pred_r1 = _mm_loadl_epi64((__m128i *) (&pu1_pred[i4_pred_stride])); + pred_r2 = _mm_loadl_epi64((__m128i *) (&pu1_pred[2 * i4_pred_stride])); + pred_r3 = _mm_loadl_epi64((__m128i *) (&pu1_pred[3 * i4_pred_stride])); + + pred_r0 = _mm_and_si128(pred_r0, chroma_mask); + pred_r1 = _mm_and_si128(pred_r1, chroma_mask); + pred_r2 = _mm_and_si128(pred_r2, chroma_mask); + pred_r3 = _mm_and_si128(pred_r3, chroma_mask); + + pred_r0 = _mm_cvtepu16_epi32(pred_r0); + pred_r1 = _mm_cvtepu16_epi32(pred_r1); + pred_r2 = _mm_cvtepu16_epi32(pred_r2); + pred_r3 = _mm_cvtepu16_epi32(pred_r3); + + /*--------------------------------------------------------------*/ + /* IDCT [ Vertical transformation] and Xij = (xij + 32)>>6 */ + /* */ + /* Add the prediction and store it back to same buffer */ + /*--------------------------------------------------------------*/ + /* z0j = y0j + y2j */ + temp0 = _mm_add_epi32(resq_r0, resq_r2); + /* z1j = y0j - y2j */ + temp1 = _mm_sub_epi32(resq_r0, resq_r2); + /* z2j = (y1j>>1) - y3j */ + temp2 = _mm_srai_epi32(resq_r1, 1); + temp2 = _mm_sub_epi32(temp2, resq_r3); + /* z3j = y1j + (y3j>>1) */ + temp3 = _mm_srai_epi32(resq_r3, 1); + temp3 = _mm_add_epi32(temp3, resq_r1); + + /* x0j = z0j + z3j */ + temp4 = _mm_add_epi32(temp0, temp3); + temp4 = _mm_add_epi32(temp4, value_32); + temp4 = _mm_srai_epi32(temp4, 6); + /* x1j = z1j + z2j */ + temp5 = _mm_add_epi32(temp1, temp2); + temp5 = _mm_add_epi32(temp5, value_32); + temp5 = _mm_srai_epi32(temp5, 6); + /* x2j = z1j - z2j */ + temp6 = _mm_sub_epi32(temp1, temp2); + temp6 = _mm_add_epi32(temp6, value_32); + temp6 = _mm_srai_epi32(temp6, 6); + /* x3j = z0j - z3j */ + temp7 = _mm_sub_epi32(temp0, temp3); + temp7 = _mm_add_epi32(temp7, value_32); + temp7 = _mm_srai_epi32(temp7, 6); + + /* 32-bit to 16-bit conversion */ + temp0 = _mm_packs_epi32(temp4, temp5); + temp1 = _mm_packs_epi32(temp6, temp7); + + /* Saturate all values < -255 to -255 and retain the rest as it is */ + temp0 = _mm_max_epi16(temp0, neg_255_8x16b); + /* Saturate all values > 255 to 255 and retain the rest as it is */ + temp0 = _mm_min_epi16(temp0, pos_255_8x16b); + + /* Saturate all values < -255 to -255 and retain the rest as it is */ + temp1 = _mm_max_epi16(temp1, neg_255_8x16b); + /* Saturate all values > 255 to 255 and retain the rest as it is */ + temp1 = _mm_min_epi16(temp1, pos_255_8x16b); + + chroma_mask = _mm_set1_epi32(0xffff0000); + out_r0 = _mm_loadu_si128((__m128i *) (&pi2_res_ptr[0 * i4_res_stride])); + out_r1 = _mm_loadu_si128((__m128i *) (&pi2_res_ptr[1 * i4_res_stride])); + out_r2 = _mm_loadu_si128((__m128i *) (&pi2_res_ptr[2 * i4_res_stride])); + out_r3 = _mm_loadu_si128((__m128i *) (&pi2_res_ptr[3 * i4_res_stride])); + + out_r0 = _mm_and_si128(out_r0, chroma_mask); + out_r1 = _mm_and_si128(out_r1, chroma_mask); + out_r2 = _mm_and_si128(out_r2, chroma_mask); + out_r3 = _mm_and_si128(out_r3, chroma_mask); + + res_r0 = _mm_cvtepu16_epi32(temp0); + res_r2 = _mm_cvtepu16_epi32(temp1); + res_r1 = _mm_srli_si128(temp0, 8); + res_r3 = _mm_srli_si128(temp1, 8); + res_r1 = _mm_cvtepu16_epi32(res_r1); + res_r3 = _mm_cvtepu16_epi32(res_r3); + + out_r0 = _mm_add_epi16(out_r0, res_r0); + out_r1 = _mm_add_epi16(out_r1, res_r1); + out_r2 = _mm_add_epi16(out_r2, res_r2); + out_r3 = _mm_add_epi16(out_r3, res_r3); + + _mm_storeu_si128((__m128i *) (&pi2_res_ptr[0 * i4_res_stride]), out_r0); + _mm_storeu_si128((__m128i *) (&pi2_res_ptr[1 * i4_res_stride]), out_r1); + _mm_storeu_si128((__m128i *) (&pi2_res_ptr[2 * i4_res_stride]), out_r2); + _mm_storeu_si128((__m128i *) (&pi2_res_ptr[3 * i4_res_stride]), out_r3); + + resq_r0 = _mm_add_epi16(pred_r0, res_r0); + resq_r1 = _mm_add_epi16(pred_r1, res_r1); + resq_r2 = _mm_add_epi16(pred_r2, res_r2); + resq_r3 = _mm_add_epi16(pred_r3, res_r3); + + temp0 = _mm_packus_epi32(resq_r0, resq_r1); + temp1 = _mm_packus_epi32(resq_r2, resq_r3); + + /*------------------------------------------------------------------*/ + /* Clipping the results to 8 bits */ + sign_reg = _mm_cmpgt_epi16(temp0, zero_8x16b); + temp0 = _mm_and_si128(temp0, sign_reg); + sign_reg = _mm_cmpgt_epi16(temp1, zero_8x16b); + temp1 = _mm_and_si128(temp1, sign_reg); + + resq_r0 = _mm_packus_epi16(temp0, temp1); + resq_r1 = _mm_srli_si128(resq_r0, 4); + resq_r2 = _mm_srli_si128(resq_r1, 4); + resq_r3 = _mm_srli_si128(resq_r2, 4); + + resq_r0 = _mm_cvtepu8_epi16(resq_r0); + resq_r1 = _mm_cvtepu8_epi16(resq_r1); + resq_r2 = _mm_cvtepu8_epi16(resq_r2); + resq_r3 = _mm_cvtepu8_epi16(resq_r3); + + chroma_mask = _mm_set1_epi16(0xff00); + out_r0 = _mm_loadl_epi64((__m128i *) (&pu1_out[0])); + out_r1 = _mm_loadl_epi64((__m128i *) (&pu1_out[i4_out_stride])); + out_r2 = _mm_loadl_epi64((__m128i *) (&pu1_out[2 * i4_out_stride])); + out_r3 = _mm_loadl_epi64((__m128i *) (&pu1_out[3 * i4_out_stride])); + + out_r0 = _mm_and_si128(out_r0, chroma_mask); + out_r1 = _mm_and_si128(out_r1, chroma_mask); + out_r2 = _mm_and_si128(out_r2, chroma_mask); + out_r3 = _mm_and_si128(out_r3, chroma_mask); + + out_r0 = _mm_add_epi8(out_r0, resq_r0); + out_r1 = _mm_add_epi8(out_r1, resq_r1); + out_r2 = _mm_add_epi8(out_r2, resq_r2); + out_r3 = _mm_add_epi8(out_r3, resq_r3); + + _mm_storel_epi64((__m128i *) (&pu1_out[0]), out_r0); + _mm_storel_epi64((__m128i *) (&pu1_out[i4_out_stride]), out_r1); + _mm_storel_epi64((__m128i *) (&pu1_out[2 * i4_out_stride]), out_r2); + _mm_storel_epi64((__m128i *) (&pu1_out[3 * i4_out_stride]), out_r3); +} + +void isvc_iquant_itrans_recon_res_chroma_4x4_with_res_acc_sse42( + buffer_container_t *ps_src, buffer_container_t *ps_pred, buffer_container_t *ps_res_pred, + buffer_container_t *ps_res, buffer_container_t *ps_rec, + iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants, WORD16 *pi2_tmp, WORD16 *pi2_dc_src, + WORD32 i4_iq_start_idx, UWORD8 u1_res_accumulate) +{ + WORD16 *pi2_src = (WORD16 *) ps_src->pv_data; + WORD16 *pi2_res = (WORD16 *) ps_res->pv_data; + WORD16 *pi2_res_pred = (WORD16 *) ps_res_pred->pv_data; + UWORD8 *pu1_pred = (UWORD8 *) ps_pred->pv_data; + UWORD8 *pu1_out = (UWORD8 *) ps_rec->pv_data; + WORD32 i4_src_stride = ps_src->i4_data_stride; + WORD32 i4_res_stride = ps_res->i4_data_stride; + WORD32 i4_res_pred_stride = ps_res_pred->i4_data_stride; + WORD32 i4_pred_stride = ps_pred->i4_data_stride; + WORD32 i4_out_stride = ps_rec->i4_data_stride; + const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat; + const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat; + UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6; + __m128i src_r0_r1, src_r2_r3; + __m128i src_r0, src_r1, src_r2, src_r3; + __m128i scalemat_r0_r1, scalemat_r2_r3; + __m128i pred_r0, pred_r1, pred_r2, pred_r3; + __m128i res_pred_r0, res_pred_r1, res_pred_r2, res_pred_r3; + __m128i res_r0, res_r1, res_r2, res_r3; + __m128i dequant_r0_r1, dequant_r2_r3; + /* all bits reset to zero */ + __m128i zero_8x16b = _mm_setzero_si128(); + __m128i reg_chroma = _mm_set1_epi32(0xFFFF); + __m128i neg_255_8x16b = _mm_set1_epi16(-((WORD16) UINT8_MAX)); + __m128i pos_255_8x16b = _mm_set1_epi16(((WORD16) UINT8_MAX)); + __m128i temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7; + __m128i resq_r0, resq_r1, resq_r2, resq_r3; + __m128i add_rshift = _mm_set1_epi32((u4_qp_div_6 < 4) ? (1 << (3 - u4_qp_div_6)) : 0); + __m128i value_32 = _mm_set1_epi32(32); + __m128i chroma_mask = _mm_set1_epi16(0xFF); + __m128i out_r0, out_r1, out_r2, out_r3; + __m128i mask_r0; + + ASSERT(4 == i4_src_stride); + ASSERT(1 == u1_res_accumulate); + + UNUSED(i4_src_stride); + UNUSED(u1_res_accumulate); + UNUSED(i4_iq_start_idx); + + /*************************************************************/ + /* Dequantization of coefficients. Will be replaced by SIMD */ + /* operations on platform */ + /*************************************************************/ + /* a00 a01 a02 a03 a10 a11 a12 a13 -- the source + matrix 0th,1st row */ + src_r0_r1 = _mm_loadu_si128((__m128i *) (pi2_src)); + + /* a20 a21 a22 a23 a30 a31 a32 a33 -- the + source matrix 2nd,3rd row */ + src_r2_r3 = _mm_loadu_si128((__m128i *) (pi2_src + 8)); + + /* b00 b01 b02 b03 b10 b11 b12 b13 -- the + scaling matrix 0th,1st row */ + scalemat_r0_r1 = _mm_loadu_si128((__m128i *) (pu2_iscal_mat)); + + /* b20 b21 b22 b23 b30 b31 b32 b33 --b12 b13 -- the + the scaling matrix 2nd,3rd row */ + scalemat_r2_r3 = _mm_loadu_si128((__m128i *) (pu2_iscal_mat + 8)); + + /* q00 q01 q02 q03 q10 q11 + q12 q13 -- all 16 bits */ + dequant_r0_r1 = _mm_loadu_si128((__m128i *) (pu2_weigh_mat)); + + /* q20 q21 q22 q23 q30 q31 + q32 q33 -- all 16 bits */ + dequant_r2_r3 = _mm_loadu_si128((__m128i *) (pu2_weigh_mat + 8)); + + temp0 = _mm_mullo_epi16(scalemat_r0_r1, + dequant_r0_r1); // b00*q00 b01*q01 b02*q02 b03*q03 b10*q10 b11*q11 + // b12*q12 b13*q13 -- 16 bit result + + temp1 = _mm_mullo_epi16(scalemat_r2_r3, dequant_r2_r3); + + /* b00*q00 0 b01*q01 0 b02*q02 0 b03*q03 0 -- 16 bit long */ + temp4 = _mm_unpacklo_epi16(temp0, zero_8x16b); + + /* b10*q10 0 b11*q11 0 b12*q12 0 b13*q13 0 -- 16 bit long */ + temp5 = _mm_unpackhi_epi16(temp0, zero_8x16b); + + /* b00*q00 0 b01*q01 0 b02*q02 0 b03*q03 0 -- 16 bit long */ + temp6 = _mm_unpacklo_epi16(temp1, zero_8x16b); + + /* b10*q10 0 b11*q11 0 b12*q12 0 b13*q13 0 -- 16 bit long */ + temp7 = _mm_unpackhi_epi16(temp1, zero_8x16b); + + /* a00 0 a01 0 a02 0 a03 0 -- 16 bit long */ + src_r0 = _mm_unpacklo_epi16(src_r0_r1, zero_8x16b); + /* a10 0 a11 0 a12 0 a13 0 -- 16 bit long */ + src_r1 = _mm_unpackhi_epi16(src_r0_r1, zero_8x16b); + /* a20 0 a21 0 a22 0 a23 0 -- 16 bit long */ + src_r2 = _mm_unpacklo_epi16(src_r2_r3, zero_8x16b); + /* a30 0 a31 0 a32 0 a33 0 -- 16 bit long */ + src_r3 = _mm_unpackhi_epi16(src_r2_r3, zero_8x16b); + + temp4 = _mm_madd_epi16(src_r0, temp4); + temp5 = _mm_madd_epi16(src_r1, temp5); + temp6 = _mm_madd_epi16(src_r2, temp6); + temp7 = _mm_madd_epi16(src_r3, temp7); + + if(u4_qp_div_6 >= 4) + { + resq_r0 = _mm_slli_epi32(temp4, u4_qp_div_6 - 4); + resq_r1 = _mm_slli_epi32(temp5, u4_qp_div_6 - 4); + resq_r2 = _mm_slli_epi32(temp6, u4_qp_div_6 - 4); + resq_r3 = _mm_slli_epi32(temp7, u4_qp_div_6 - 4); + } + else + { + temp4 = _mm_add_epi32(temp4, add_rshift); + temp5 = _mm_add_epi32(temp5, add_rshift); + temp6 = _mm_add_epi32(temp6, add_rshift); + temp7 = _mm_add_epi32(temp7, add_rshift); + resq_r0 = _mm_srai_epi32(temp4, 4 - u4_qp_div_6); + resq_r1 = _mm_srai_epi32(temp5, 4 - u4_qp_div_6); + resq_r2 = _mm_srai_epi32(temp6, 4 - u4_qp_div_6); + resq_r3 = _mm_srai_epi32(temp7, 4 - u4_qp_div_6); + } + + resq_r0 = _mm_insert_epi32(resq_r0, (WORD32) pi2_dc_src[0], 0); + /* Perform Inverse transform */ + /*-------------------------------------------------------------*/ + /* IDCT [ Horizontal transformation ] */ + /*-------------------------------------------------------------*/ + // Matrix transpose + /* + * a0 a1 a2 a3 + * b0 b1 b2 b3 + * c0 c1 c2 c3 + * d0 d1 d2 d3 + */ + /* a0 b0 a1 b1 */ + temp1 = _mm_unpacklo_epi32(resq_r0, resq_r1); + /* c0 d0 c1 d1 */ + temp3 = _mm_unpacklo_epi32(resq_r2, resq_r3); + /* a2 b2 a3 b3 */ + temp2 = _mm_unpackhi_epi32(resq_r0, resq_r1); + /* c2 d2 c3 d3 */ + temp4 = _mm_unpackhi_epi32(resq_r2, resq_r3); + /* a0 b0 c0 d0 */ + resq_r0 = _mm_unpacklo_epi64(temp1, temp3); + /* a1 b1 c1 d1 */ + resq_r1 = _mm_unpackhi_epi64(temp1, temp3); + /* a2 b2 c2 d2 */ + resq_r2 = _mm_unpacklo_epi64(temp2, temp4); + /* a3 b3 c3 d3 */ + resq_r3 = _mm_unpackhi_epi64(temp2, temp4); + /* Transform starts -- horizontal transform */ + + /*------------------------------------------------------------------*/ + /* z0 = w0 + w2 */ + temp0 = _mm_add_epi32(resq_r0, resq_r2); + /* z1 = w0 - w2 */ + temp1 = _mm_sub_epi32(resq_r0, resq_r2); + /* z2 = (w1 >> 1) - w3 */ + temp2 = _mm_srai_epi32(resq_r1, 1); + temp2 = _mm_sub_epi32(temp2, resq_r3); + /* z3 = w1 + (w3 >> 1) */ + temp3 = _mm_srai_epi32(resq_r3, 1); //(w3>>1) + w1 + temp3 = _mm_add_epi32(temp3, resq_r1); + /*----------------------------------------------------------*/ + /* x0 = z0 + z3 */ + resq_r0 = _mm_add_epi32(temp0, temp3); + /* x1 = z1 + z2 */ + resq_r1 = _mm_add_epi32(temp1, temp2); + /* x2 = z1 - z2 */ + resq_r2 = _mm_sub_epi32(temp1, temp2); + /* x3 = z0 - z3 */ + resq_r3 = _mm_sub_epi32(temp0, temp3); + // Matrix transpose + /* + * a0 b0 c0 d0 + * a1 b1 c1 d1 + * a2 b2 c2 d2 + * a3 b3 c3 d3 + */ + /* a0 a1 b0 b1 */ + temp1 = _mm_unpacklo_epi32(resq_r0, resq_r1); + /* a2 a3 b2 b3 */ + temp3 = _mm_unpacklo_epi32(resq_r2, resq_r3); + /* c0 c1 d0 d1 */ + temp2 = _mm_unpackhi_epi32(resq_r0, resq_r1); + /* c2 c3 d2 d3 */ + temp4 = _mm_unpackhi_epi32(resq_r2, resq_r3); + /* a0 a1 a2 a3 */ + resq_r0 = _mm_unpacklo_epi64(temp1, temp3); + /* b0 b1 b2 b3 */ + resq_r1 = _mm_unpackhi_epi64(temp1, temp3); + /* c0 c1 c2 c3 */ + resq_r2 = _mm_unpacklo_epi64(temp2, temp4); + /* d0 d1 d2 d3 */ + resq_r3 = _mm_unpackhi_epi64(temp2, temp4); + /* Transform ends -- horizontal transform */ + + temp0 = _mm_packs_epi32(resq_r0, resq_r1); + temp1 = _mm_packs_epi32(resq_r2, resq_r3); + + _mm_storeu_si128((__m128i *) (&pi2_tmp[0]), temp0); + _mm_storeu_si128((__m128i *) (&pi2_tmp[2 * 4]), temp1); + + /* Load pred buffer */ + pred_r0 = _mm_loadl_epi64((__m128i *) (&pu1_pred[0])); + pred_r1 = _mm_loadl_epi64((__m128i *) (&pu1_pred[i4_pred_stride])); + pred_r2 = _mm_loadl_epi64((__m128i *) (&pu1_pred[2 * i4_pred_stride])); + pred_r3 = _mm_loadl_epi64((__m128i *) (&pu1_pred[3 * i4_pred_stride])); + + pred_r0 = _mm_and_si128(pred_r0, chroma_mask); + pred_r1 = _mm_and_si128(pred_r1, chroma_mask); + pred_r2 = _mm_and_si128(pred_r2, chroma_mask); + pred_r3 = _mm_and_si128(pred_r3, chroma_mask); + + /*--------------------------------------------------------------*/ + /* IDCT [ Vertical transformation] and Xij = (xij + 32)>>6 */ + /* */ + /* Add the prediction and store it back to same buffer */ + /*--------------------------------------------------------------*/ + /* z0j = y0j + y2j */ + temp0 = _mm_add_epi32(resq_r0, resq_r2); + /* z1j = y0j - y2j */ + temp1 = _mm_sub_epi32(resq_r0, resq_r2); + /* z2j = (y1j>>1) - y3j */ + temp2 = _mm_srai_epi32(resq_r1, 1); + temp2 = _mm_sub_epi32(temp2, resq_r3); + /* z3j = y1j + (y3j>>1) */ + temp3 = _mm_srai_epi32(resq_r3, 1); + temp3 = _mm_add_epi32(temp3, resq_r1); + + /* x0j = z0j + z3j */ + temp4 = _mm_add_epi32(temp0, temp3); + temp4 = _mm_add_epi32(temp4, value_32); + temp4 = _mm_srai_epi32(temp4, 6); + res_r0 = temp4; + /* x1j = z1j + z2j */ + temp5 = _mm_add_epi32(temp1, temp2); + temp5 = _mm_add_epi32(temp5, value_32); + temp5 = _mm_srai_epi32(temp5, 6); + res_r1 = temp5; + /* x2j = z1j - z2j */ + temp6 = _mm_sub_epi32(temp1, temp2); + temp6 = _mm_add_epi32(temp6, value_32); + temp6 = _mm_srai_epi32(temp6, 6); + res_r2 = temp6; + /* x3j = z0j - z3j */ + temp7 = _mm_sub_epi32(temp0, temp3); + temp7 = _mm_add_epi32(temp7, value_32); + temp7 = _mm_srai_epi32(temp7, 6); + res_r3 = temp7; + + res_pred_r0 = _mm_loadu_si128((__m128i *) &pi2_res_pred[0 * i4_res_pred_stride]); + res_pred_r1 = _mm_loadu_si128((__m128i *) &pi2_res_pred[1 * i4_res_pred_stride]); + res_pred_r2 = _mm_loadu_si128((__m128i *) &pi2_res_pred[2 * i4_res_pred_stride]); + res_pred_r3 = _mm_loadu_si128((__m128i *) &pi2_res_pred[3 * i4_res_pred_stride]); + + res_pred_r0 = _mm_and_si128(res_pred_r0, reg_chroma); + res_pred_r1 = _mm_and_si128(res_pred_r1, reg_chroma); + res_pred_r2 = _mm_and_si128(res_pred_r2, reg_chroma); + res_pred_r3 = _mm_and_si128(res_pred_r3, reg_chroma); + + temp0 = _mm_packs_epi32(res_r0, res_r1); + temp1 = _mm_packs_epi32(res_r2, res_r3); + + res_r0 = _mm_cvtepu16_epi32(temp0); + res_r2 = _mm_cvtepu16_epi32(temp1); + res_r1 = _mm_srli_si128(temp0, 8); + res_r3 = _mm_srli_si128(temp1, 8); + res_r1 = _mm_cvtepu16_epi32(res_r1); + res_r3 = _mm_cvtepu16_epi32(res_r3); + + res_r0 = _mm_add_epi16(res_pred_r0, res_r0); + res_r1 = _mm_add_epi16(res_pred_r1, res_r1); + res_r2 = _mm_add_epi16(res_pred_r2, res_r2); + res_r3 = _mm_add_epi16(res_pred_r3, res_r3); + + temp0 = _mm_packus_epi32(res_r0, res_r1); + temp1 = _mm_packus_epi32(res_r2, res_r3); + + /* Saturate all values < -255 to -255 and retain the rest as it is */ + temp0 = _mm_max_epi16(temp0, neg_255_8x16b); + /* Saturate all values > 255 to 255 and retain the rest as it is */ + temp0 = _mm_min_epi16(temp0, pos_255_8x16b); + + /* Saturate all values < -255 to -255 and retain the rest as it is */ + temp1 = _mm_max_epi16(temp1, neg_255_8x16b); + /* Saturate all values > 255 to 255 and retain the rest as it is */ + temp1 = _mm_min_epi16(temp1, pos_255_8x16b); + + res_r0 = _mm_cvtepu16_epi32(temp0); + res_r1 = _mm_srli_si128(temp0, 8); + res_r1 = _mm_cvtepu16_epi32(res_r1); + + res_r2 = _mm_cvtepu16_epi32(temp1); + res_r3 = _mm_srli_si128(temp1, 8); + res_r3 = _mm_cvtepu16_epi32(res_r3); + + chroma_mask = _mm_set1_epi32(0xffff0000); + out_r0 = _mm_loadu_si128((__m128i *) (&pi2_res[0 * i4_res_stride])); + out_r1 = _mm_loadu_si128((__m128i *) (&pi2_res[1 * i4_res_stride])); + out_r2 = _mm_loadu_si128((__m128i *) (&pi2_res[2 * i4_res_stride])); + out_r3 = _mm_loadu_si128((__m128i *) (&pi2_res[3 * i4_res_stride])); + + out_r0 = _mm_and_si128(out_r0, chroma_mask); + out_r1 = _mm_and_si128(out_r1, chroma_mask); + out_r2 = _mm_and_si128(out_r2, chroma_mask); + out_r3 = _mm_and_si128(out_r3, chroma_mask); + + out_r0 = _mm_add_epi16(out_r0, res_r0); + out_r1 = _mm_add_epi16(out_r1, res_r1); + out_r2 = _mm_add_epi16(out_r2, res_r2); + out_r3 = _mm_add_epi16(out_r3, res_r3); + + _mm_storeu_si128((__m128i *) (&pi2_res[0 * i4_res_stride]), out_r0); + _mm_storeu_si128((__m128i *) (&pi2_res[1 * i4_res_stride]), out_r1); + _mm_storeu_si128((__m128i *) (&pi2_res[2 * i4_res_stride]), out_r2); + _mm_storeu_si128((__m128i *) (&pi2_res[3 * i4_res_stride]), out_r3); + + pred_r0 = _mm_cvtepu16_epi32(pred_r0); + pred_r1 = _mm_cvtepu16_epi32(pred_r1); + pred_r2 = _mm_cvtepu16_epi32(pred_r2); + pred_r3 = _mm_cvtepu16_epi32(pred_r3); + + resq_r0 = _mm_add_epi16(pred_r0, res_r0); + resq_r1 = _mm_add_epi16(pred_r1, res_r1); + resq_r2 = _mm_add_epi16(pred_r2, res_r2); + resq_r3 = _mm_add_epi16(pred_r3, res_r3); + + temp0 = _mm_packus_epi32(resq_r0, resq_r1); + temp1 = _mm_packus_epi32(resq_r2, resq_r3); + + /* Clipping the results to 8 bits */ + mask_r0 = _mm_cmpgt_epi16(temp0, zero_8x16b); + temp0 = _mm_and_si128(temp0, mask_r0); + mask_r0 = _mm_cmpgt_epi16(temp1, zero_8x16b); + temp1 = _mm_and_si128(temp1, mask_r0); + + resq_r0 = _mm_packus_epi16(temp0, temp1); + resq_r1 = _mm_srli_si128(resq_r0, 4); + resq_r2 = _mm_srli_si128(resq_r1, 4); + resq_r3 = _mm_srli_si128(resq_r2, 4); + + resq_r0 = _mm_cvtepu8_epi16(resq_r0); + resq_r1 = _mm_cvtepu8_epi16(resq_r1); + resq_r2 = _mm_cvtepu8_epi16(resq_r2); + resq_r3 = _mm_cvtepu8_epi16(resq_r3); + + chroma_mask = _mm_set1_epi16(0xFF00); + out_r0 = _mm_loadl_epi64((__m128i *) (&pu1_out[0 * i4_out_stride])); + out_r1 = _mm_loadl_epi64((__m128i *) (&pu1_out[1 * i4_out_stride])); + out_r2 = _mm_loadl_epi64((__m128i *) (&pu1_out[2 * i4_out_stride])); + out_r3 = _mm_loadl_epi64((__m128i *) (&pu1_out[3 * i4_out_stride])); + + out_r0 = _mm_and_si128(out_r0, chroma_mask); + out_r1 = _mm_and_si128(out_r1, chroma_mask); + out_r2 = _mm_and_si128(out_r2, chroma_mask); + out_r3 = _mm_and_si128(out_r3, chroma_mask); + + out_r0 = _mm_add_epi8(out_r0, resq_r0); + out_r1 = _mm_add_epi8(out_r1, resq_r1); + out_r2 = _mm_add_epi8(out_r2, resq_r2); + out_r3 = _mm_add_epi8(out_r3, resq_r3); + + _mm_storel_epi64((__m128i *) (&pu1_out[0 * i4_out_stride]), out_r0); + _mm_storel_epi64((__m128i *) (&pu1_out[1 * i4_out_stride]), out_r1); + _mm_storel_epi64((__m128i *) (&pu1_out[2 * i4_out_stride]), out_r2); + _mm_storel_epi64((__m128i *) (&pu1_out[3 * i4_out_stride]), out_r3); +} + +void isvc_iquant_itrans_recon_dc_4x4_sse42(buffer_container_t *ps_src, buffer_container_t *ps_pred, + buffer_container_t *ps_res_pred, + buffer_container_t *ps_res, buffer_container_t *ps_rec, + iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants, + WORD16 *pi2_tmp, WORD16 *pi2_dc_src, + WORD32 i4_iq_start_idx, UWORD8 u1_res_accumulate) +{ + UWORD8 *pu1_pred = (UWORD8 *) ps_pred->pv_data; + UWORD8 *pu1_out = (UWORD8 *) ps_rec->pv_data; + WORD32 i4_pred_stride = ps_pred->i4_data_stride; + WORD32 i4_out_stride = ps_rec->i4_data_stride; + const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat; + const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat; + UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6; + UWORD32 *pu4_out = (UWORD32 *) pu1_out; + WORD32 q0 = ((WORD16 *) (ps_src->pv_data))[0]; + WORD16 i_macro, rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0; + + __m128i pred_r0, pred_r1, pred_r2, pred_r3; + __m128i sign_reg; + /* all bits reset to zero */ + __m128i zero_8x16b = _mm_setzero_si128(); + __m128i temp4, temp5, temp6, temp7; + __m128i value_add; + + ASSERT(0 == u1_res_accumulate); + + UNUSED(pi2_tmp); + UNUSED(ps_res); + UNUSED(ps_res_pred); + UNUSED(u1_res_accumulate); + + INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4); + + /* Restoring dc value for intra case */ + if(i4_iq_start_idx != 0) + { + q0 = pi2_dc_src[0]; + } + + i_macro = ((q0 + 32) >> 6); + + value_add = _mm_set1_epi16(i_macro); + + zero_8x16b = _mm_setzero_si128(); + + /* Load pred buffer */ + + /* p00 p01 p02 p03 0 0 0 0 -- all 8 bits */ + pred_r0 = _mm_loadl_epi64((__m128i *) (&pu1_pred[0])); + + /* p10 p11 p12 p13 0 0 0 0 -- all 8 bits */ + pred_r1 = _mm_loadl_epi64((__m128i *) (&pu1_pred[i4_pred_stride])); + + /* p20 p21 p22 p23 0 0 0 0 -- all 8 bits */ + pred_r2 = _mm_loadl_epi64((__m128i *) (&pu1_pred[2 * i4_pred_stride])); + + /* p30 p31 p32 p33 0 0 0 0 -- all 8 bits */ + pred_r3 = _mm_loadl_epi64((__m128i *) (&pu1_pred[3 * i4_pred_stride])); + + pred_r0 = _mm_cvtepu8_epi16(pred_r0); + pred_r1 = _mm_cvtepu8_epi16(pred_r1); + pred_r2 = _mm_cvtepu8_epi16(pred_r2); + pred_r3 = _mm_cvtepu8_epi16(pred_r3); + + pred_r0 = _mm_unpacklo_epi64(pred_r0, pred_r1); + pred_r2 = _mm_unpacklo_epi64(pred_r2, pred_r3); + + temp4 = _mm_add_epi16(value_add, pred_r0); + temp5 = _mm_add_epi16(value_add, pred_r2); + /*------------------------------------------------------------------*/ + /* Clipping the results to 8 bits */ + sign_reg = _mm_cmpgt_epi16(temp4, zero_8x16b); + temp4 = _mm_and_si128(temp4, sign_reg); + sign_reg = _mm_cmpgt_epi16(temp5, zero_8x16b); + temp5 = _mm_and_si128(temp5, sign_reg); + + temp4 = _mm_packus_epi16(temp4, temp5); + temp5 = _mm_srli_si128(temp4, 4); + temp6 = _mm_srli_si128(temp5, 4); + temp7 = _mm_srli_si128(temp6, 4); + + *pu4_out = _mm_cvtsi128_si32(temp4); + pu1_out += i4_out_stride; + pu4_out = (UWORD32 *) (pu1_out); + *(pu4_out) = _mm_cvtsi128_si32(temp5); + pu1_out += i4_out_stride; + pu4_out = (UWORD32 *) (pu1_out); + *(pu4_out) = _mm_cvtsi128_si32(temp6); + pu1_out += i4_out_stride; + pu4_out = (UWORD32 *) (pu1_out); + *(pu4_out) = _mm_cvtsi128_si32(temp7); +} + +void isvc_iquant_itrans_recon_res_dc_4x4_sse42( + buffer_container_t *ps_src, buffer_container_t *ps_pred, buffer_container_t *ps_res_pred, + buffer_container_t *ps_res, buffer_container_t *ps_rec, + iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants, WORD16 *pi2_tmp, WORD16 *pi2_dc_src, + WORD32 i4_iq_start_idx, UWORD8 u1_res_accumulate) +{ + WORD16 *pi2_res = (WORD16 *) ps_res->pv_data; + WORD16 *pi2_res_ptr = pi2_res; + UWORD8 *pu1_pred = (UWORD8 *) ps_pred->pv_data; + UWORD8 *pu1_out = (UWORD8 *) ps_rec->pv_data; + WORD32 i4_res_stride = ps_res->i4_data_stride; + WORD32 i4_pred_stride = ps_pred->i4_data_stride; + WORD32 i4_out_stride = ps_rec->i4_data_stride; + const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat; + const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat; + UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6; + UWORD32 *pu4_out = (UWORD32 *) pu1_out; + WORD32 q0 = ((WORD16 *) (ps_src->pv_data))[0]; + WORD16 i_macro, rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0; + + __m128i pred_r0, pred_r1, pred_r2, pred_r3; + __m128i sign_reg; + /* all bits reset to zero */ + __m128i zero_8x16b = _mm_setzero_si128(); + __m128i temp4, temp5, temp6, temp7; + __m128i value_add; + + ASSERT(0 == u1_res_accumulate); + + UNUSED(pi2_tmp); + UNUSED(ps_res_pred); + UNUSED(u1_res_accumulate); + + INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4); + + /* Restoring dc value for intra case */ + if(i4_iq_start_idx != 0) q0 = pi2_dc_src[0]; + + i_macro = ((q0 + 32) >> 6); + + value_add = _mm_set1_epi16(isvc_get_residue(i_macro, 0, 0)); + + zero_8x16b = _mm_setzero_si128(); + + /* Load pred buffer */ + + /* p00 p01 p02 p03 0 0 0 0 -- all 8 bits */ + pred_r0 = _mm_loadl_epi64((__m128i *) (&pu1_pred[0])); + + /* p10 p11 p12 p13 0 0 0 0 -- all 8 bits */ + pred_r1 = _mm_loadl_epi64((__m128i *) (&pu1_pred[i4_pred_stride])); + + /* p20 p21 p22 p23 0 0 0 0 -- all 8 bits */ + pred_r2 = _mm_loadl_epi64((__m128i *) (&pu1_pred[2 * i4_pred_stride])); + + /* p30 p31 p32 p33 0 0 0 0 -- all 8 bits */ + pred_r3 = _mm_loadl_epi64((__m128i *) (&pu1_pred[3 * i4_pred_stride])); + + pred_r0 = _mm_cvtepu8_epi16(pred_r0); + pred_r1 = _mm_cvtepu8_epi16(pred_r1); + pred_r2 = _mm_cvtepu8_epi16(pred_r2); + pred_r3 = _mm_cvtepu8_epi16(pred_r3); + + pred_r0 = _mm_unpacklo_epi64(pred_r0, pred_r1); + pred_r2 = _mm_unpacklo_epi64(pred_r2, pred_r3); + + temp4 = _mm_add_epi16(value_add, pred_r0); + temp5 = _mm_add_epi16(value_add, pred_r2); + + _mm_storeu_si128((__m128i *) (&pi2_res_ptr[0]), value_add); + _mm_storeu_si128((__m128i *) (&pi2_res_ptr[i4_res_stride]), value_add); + _mm_storeu_si128((__m128i *) (&pi2_res_ptr[2 * i4_res_stride]), value_add); + _mm_storeu_si128((__m128i *) (&pi2_res_ptr[3 * i4_res_stride]), value_add); + /*------------------------------------------------------------------*/ + /* Clipping the results to 8 bits */ + sign_reg = _mm_cmpgt_epi16(temp4, zero_8x16b); + temp4 = _mm_and_si128(temp4, sign_reg); + sign_reg = _mm_cmpgt_epi16(temp5, zero_8x16b); + temp5 = _mm_and_si128(temp5, sign_reg); + + temp4 = _mm_packus_epi16(temp4, temp5); + temp5 = _mm_srli_si128(temp4, 4); + temp6 = _mm_srli_si128(temp5, 4); + temp7 = _mm_srli_si128(temp6, 4); + + *pu4_out = _mm_cvtsi128_si32(temp4); + pu1_out += i4_out_stride; + pu4_out = (UWORD32 *) (pu1_out); + *(pu4_out) = _mm_cvtsi128_si32(temp5); + pu1_out += i4_out_stride; + pu4_out = (UWORD32 *) (pu1_out); + *(pu4_out) = _mm_cvtsi128_si32(temp6); + pu1_out += i4_out_stride; + pu4_out = (UWORD32 *) (pu1_out); + *(pu4_out) = _mm_cvtsi128_si32(temp7); +} + +void isvc_iquant_itrans_recon_res_dc_with_res_acc_4x4_sse42( + buffer_container_t *ps_src, buffer_container_t *ps_pred, buffer_container_t *ps_res_pred, + buffer_container_t *ps_res, buffer_container_t *ps_rec, + iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants, WORD16 *pi2_tmp, WORD16 *pi2_dc_src, + WORD32 i4_iq_start_idx, UWORD8 u1_res_accumulate) +{ + WORD16 *pi2_res = (WORD16 *) ps_res->pv_data; + WORD16 *pi2_res_ptr = pi2_res; + WORD16 *pi2_res_pred = (WORD16 *) ps_res_pred->pv_data; + WORD16 *pi2_res_pred_ptr = pi2_res_pred; + UWORD8 *pu1_pred = (UWORD8 *) ps_pred->pv_data; + UWORD8 *pu1_out = (UWORD8 *) ps_rec->pv_data; + WORD32 i4_res_stride = ps_res->i4_data_stride; + WORD32 i4_res_pred_stride = ps_res_pred->i4_data_stride; + WORD32 i4_pred_stride = ps_pred->i4_data_stride; + WORD32 i4_out_stride = ps_rec->i4_data_stride; + const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat; + const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat; + UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6; + UWORD32 *pu4_out = (UWORD32 *) pu1_out; + WORD32 q0 = ((WORD16 *) (ps_src->pv_data))[0]; + WORD16 i_macro, rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0; + + __m128i pred_r0, pred_r1, pred_r2, pred_r3; + __m128i sign_reg; + /* all bits reset to zero */ + __m128i zero_8x16b = _mm_setzero_si128(); + __m128i temp4, temp5, temp6, temp7; + __m128i value_add; + __m128i res_pred_r0, res_pred_r1, res_pred_r2, res_pred_r3; + __m128i temp0, temp1; + __m128i neg_255_8x16b = _mm_set1_epi16(-((WORD16) UINT8_MAX)); + __m128i pos_255_8x16b = _mm_set1_epi16(((WORD16) UINT8_MAX)); + + ASSERT(1 == u1_res_accumulate); + + UNUSED(pi2_tmp); + UNUSED(u1_res_accumulate); + + INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4); + + /* Restoring dc value for intra case */ + if(i4_iq_start_idx != 0) q0 = pi2_dc_src[0]; + + i_macro = ((q0 + 32) >> 6); + + value_add = _mm_set1_epi16(i_macro); + + zero_8x16b = _mm_setzero_si128(); + + /* Load pred buffer */ + + /* p00 p01 p02 p03 0 0 0 0 -- all 8 bits */ + pred_r0 = _mm_loadl_epi64((__m128i *) (&pu1_pred[0])); + + /* p10 p11 p12 p13 0 0 0 0 -- all 8 bits */ + pred_r1 = _mm_loadl_epi64((__m128i *) (&pu1_pred[i4_pred_stride])); + + /* p20 p21 p22 p23 0 0 0 0 -- all 8 bits */ + pred_r2 = _mm_loadl_epi64((__m128i *) (&pu1_pred[2 * i4_pred_stride])); + + /* p30 p31 p32 p33 0 0 0 0 -- all 8 bits */ + pred_r3 = _mm_loadl_epi64((__m128i *) (&pu1_pred[3 * i4_pred_stride])); + + pred_r0 = _mm_cvtepu8_epi16(pred_r0); + pred_r1 = _mm_cvtepu8_epi16(pred_r1); + pred_r2 = _mm_cvtepu8_epi16(pred_r2); + pred_r3 = _mm_cvtepu8_epi16(pred_r3); + + pred_r0 = _mm_unpacklo_epi64(pred_r0, pred_r1); + pred_r2 = _mm_unpacklo_epi64(pred_r2, pred_r3); + + /* Accumulating res */ + res_pred_r0 = _mm_loadl_epi64((__m128i *) &pi2_res_pred_ptr[0]); + res_pred_r1 = _mm_loadl_epi64((__m128i *) &pi2_res_pred_ptr[i4_res_pred_stride]); + res_pred_r2 = _mm_loadl_epi64((__m128i *) &pi2_res_pred_ptr[2 * i4_res_pred_stride]); + res_pred_r3 = _mm_loadl_epi64((__m128i *) &pi2_res_pred_ptr[3 * i4_res_pred_stride]); + + res_pred_r0 = _mm_unpacklo_epi64(res_pred_r0, res_pred_r1); + res_pred_r1 = _mm_unpacklo_epi64(res_pred_r2, res_pred_r3); + + temp0 = _mm_add_epi16(value_add, res_pred_r0); + temp1 = _mm_add_epi16(value_add, res_pred_r1); + + /* Saturate all values < -255 to -255 and retain the rest as it is */ + temp0 = _mm_max_epi16(temp0, neg_255_8x16b); + /* Saturate all values > 255 to 255 and retain the rest as it is */ + temp0 = _mm_min_epi16(temp0, pos_255_8x16b); + + /* Saturate all values < -255 to -255 and retain the rest as it is */ + temp1 = _mm_max_epi16(temp1, neg_255_8x16b); + /* Saturate all values > 255 to 255 and retain the rest as it is */ + temp1 = _mm_min_epi16(temp1, pos_255_8x16b); + + _mm_storeu_si128((__m128i *) (&pi2_res_ptr[0]), temp0); + _mm_storeu_si128((__m128i *) (&pi2_res_ptr[2 * i4_res_stride]), temp1); + + temp4 = _mm_add_epi16(temp0, pred_r0); + temp5 = _mm_add_epi16(temp1, pred_r2); + + temp0 = _mm_srli_si128(temp0, 8); + temp1 = _mm_srli_si128(temp1, 8); + + _mm_storeu_si128((__m128i *) (&pi2_res_ptr[i4_res_stride]), temp0); + _mm_storeu_si128((__m128i *) (&pi2_res_ptr[3 * i4_res_stride]), temp1); + + /*------------------------------------------------------------------*/ + /* Clipping the results to 8 bits */ + sign_reg = _mm_cmpgt_epi16(temp4, zero_8x16b); + temp4 = _mm_and_si128(temp4, sign_reg); + sign_reg = _mm_cmpgt_epi16(temp5, zero_8x16b); + temp5 = _mm_and_si128(temp5, sign_reg); + + temp4 = _mm_packus_epi16(temp4, temp5); + temp5 = _mm_srli_si128(temp4, 4); + temp6 = _mm_srli_si128(temp5, 4); + temp7 = _mm_srli_si128(temp6, 4); + + *pu4_out = _mm_cvtsi128_si32(temp4); + pu1_out += i4_out_stride; + pu4_out = (UWORD32 *) (pu1_out); + *(pu4_out) = _mm_cvtsi128_si32(temp5); + pu1_out += i4_out_stride; + pu4_out = (UWORD32 *) (pu1_out); + *(pu4_out) = _mm_cvtsi128_si32(temp6); + pu1_out += i4_out_stride; + pu4_out = (UWORD32 *) (pu1_out); + *(pu4_out) = _mm_cvtsi128_si32(temp7); +} + +void isvc_iquant_itrans_recon_chroma_4x4_dc_sse42( + buffer_container_t *ps_src, buffer_container_t *ps_pred, buffer_container_t *ps_res_pred, + buffer_container_t *ps_res, buffer_container_t *ps_rec, + iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants, WORD16 *pi2_tmp, WORD16 *pi2_dc_src, + WORD32 i4_iq_start_idx, UWORD8 u1_res_accumulate) +{ + WORD16 *pi2_src = (WORD16 *) ps_src->pv_data; + UWORD8 *pu1_pred = (UWORD8 *) ps_pred->pv_data; + UWORD8 *pu1_out = (UWORD8 *) ps_rec->pv_data; + WORD32 i4_pred_stride = ps_pred->i4_data_stride; + WORD32 i4_out_stride = ps_rec->i4_data_stride; + const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat; + const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat; + UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6; + /* DC value won't be dequantized for chroma + inverse transform */ + WORD16 q0 = pi2_dc_src[0]; + WORD16 i_macro = ((q0 + 32) >> 6); + + __m128i pred_r0, pred_r1, pred_r2, pred_r3; + /* all bits reset to zero */ + __m128i zero_8x16b = _mm_setzero_si128(); + __m128i chroma_mask = _mm_set1_epi16(0xFF); + __m128i value_add = _mm_set1_epi16(i_macro); + __m128i out_r0, out_r1, out_r2, out_r3; + + ASSERT(0 == u1_res_accumulate); + + UNUSED(pi2_src); + UNUSED(pu2_iscal_mat); + UNUSED(pu2_weigh_mat); + UNUSED(u4_qp_div_6); + UNUSED(pi2_tmp); + UNUSED(ps_res_pred); + UNUSED(ps_res); + UNUSED(i4_iq_start_idx); + UNUSED(u1_res_accumulate); + + /* Load pred buffer */ + pred_r0 = _mm_loadl_epi64((__m128i *) (&pu1_pred[0])); + + pred_r1 = _mm_loadl_epi64((__m128i *) (&pu1_pred[i4_pred_stride])); + + pred_r2 = _mm_loadl_epi64((__m128i *) (&pu1_pred[2 * i4_pred_stride])); + + pred_r3 = _mm_loadl_epi64((__m128i *) (&pu1_pred[3 * i4_pred_stride])); + + /* Mask alternate pred values from the interleaved pred buf */ + pred_r0 = _mm_and_si128(pred_r0, chroma_mask); + pred_r1 = _mm_and_si128(pred_r1, chroma_mask); + pred_r2 = _mm_and_si128(pred_r2, chroma_mask); + pred_r3 = _mm_and_si128(pred_r3, chroma_mask); + + /* Pack the first four 16 bit values of 2 regs into a single reg*/ + pred_r0 = _mm_unpacklo_epi64(pred_r0, pred_r1); + pred_r2 = _mm_unpacklo_epi64(pred_r2, pred_r3); + + /* Compute out pixel by adding res to pred */ + pred_r0 = _mm_add_epi16(value_add, pred_r0); + pred_r2 = _mm_add_epi16(value_add, pred_r2); + /*------------------------------------------------------------------*/ + /* Clipping the results to 8 bits */ + pred_r0 = _mm_packus_epi16(pred_r0, pred_r2); + pred_r1 = _mm_srli_si128(pred_r0, 4); + pred_r2 = _mm_srli_si128(pred_r1, 4); + pred_r3 = _mm_srli_si128(pred_r2, 4); + + /* p00 p01 p02 p03 -- all 16 bits */ + pred_r0 = _mm_unpacklo_epi8(pred_r0, zero_8x16b); + /* p10 p11 p12 p13 -- all 16 bits */ + pred_r1 = _mm_unpacklo_epi8(pred_r1, zero_8x16b); + /* p20 p21 p22 p23 -- all 16 bits */ + pred_r2 = _mm_unpacklo_epi8(pred_r2, zero_8x16b); + /* p30 p31 p32 p33 -- all 16 bits */ + pred_r3 = _mm_unpacklo_epi8(pred_r3, zero_8x16b); + + /* Load interleaved out buffer */ + out_r0 = _mm_loadl_epi64((__m128i *) (&pu1_out[0])); + out_r1 = _mm_loadl_epi64((__m128i *) (&pu1_out[i4_out_stride])); + out_r2 = _mm_loadl_epi64((__m128i *) (&pu1_out[2 * i4_out_stride])); + out_r3 = _mm_loadl_epi64((__m128i *) (&pu1_out[3 * i4_out_stride])); + + /* Mask the interleaved out buf in order to save the U/V out pixel computed in + this function call without thrashing the U/V out pixel that was saved + during an earlier function call */ + chroma_mask = _mm_set1_epi16(0xFF00); + + out_r0 = _mm_and_si128(out_r0, chroma_mask); + out_r1 = _mm_and_si128(out_r1, chroma_mask); + out_r2 = _mm_and_si128(out_r2, chroma_mask); + out_r3 = _mm_and_si128(out_r3, chroma_mask); + + /* Save the out pixels in alternate locations */ + out_r0 = _mm_add_epi8(out_r0, pred_r0); + out_r1 = _mm_add_epi8(out_r1, pred_r1); + out_r2 = _mm_add_epi8(out_r2, pred_r2); + out_r3 = _mm_add_epi8(out_r3, pred_r3); + + _mm_storel_epi64((__m128i *) (&pu1_out[0]), out_r0); + _mm_storel_epi64((__m128i *) (&pu1_out[i4_out_stride]), out_r1); + _mm_storel_epi64((__m128i *) (&pu1_out[2 * i4_out_stride]), out_r2); + _mm_storel_epi64((__m128i *) (&pu1_out[3 * i4_out_stride]), out_r3); +} + +void isvc_iquant_itrans_recon_res_chroma_4x4_dc_sse42( + buffer_container_t *ps_src, buffer_container_t *ps_pred, buffer_container_t *ps_res_pred, + buffer_container_t *ps_res, buffer_container_t *ps_rec, + iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants, WORD16 *pi2_tmp, WORD16 *pi2_dc_src, + WORD32 i4_iq_start_idx, UWORD8 u1_res_accumulate) +{ + WORD16 *pi2_src = (WORD16 *) ps_src->pv_data; + WORD16 *pi2_res = (WORD16 *) ps_res->pv_data; + WORD16 *pi2_res_ptr = pi2_res; + UWORD8 *pu1_pred = (UWORD8 *) ps_pred->pv_data; + UWORD8 *pu1_out = (UWORD8 *) ps_rec->pv_data; + WORD32 i4_res_stride = ps_res->i4_data_stride; + WORD32 i4_pred_stride = ps_pred->i4_data_stride; + WORD32 i4_out_stride = ps_rec->i4_data_stride; + const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat; + const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat; + UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6; + /* DC value won't be dequantized for chroma + inverse transform */ + WORD16 q0 = pi2_dc_src[0]; + WORD16 i_macro = ((q0 + 32) >> 6); + + __m128i pred_r0, pred_r1, pred_r2, pred_r3, sign_reg; + /* all bits reset to zero */ + __m128i zero_8x16b = _mm_setzero_si128(); + __m128i chroma_mask = _mm_set1_epi16(0xFF); + __m128i value_add = _mm_set1_epi16(isvc_get_residue(i_macro, 0, 0)); + __m128i out_r0, out_r1, out_r2, out_r3; + + ASSERT(0 == u1_res_accumulate); + + UNUSED(pi2_src); + UNUSED(pu2_iscal_mat); + UNUSED(pu2_weigh_mat); + UNUSED(u4_qp_div_6); + UNUSED(pi2_tmp); + UNUSED(ps_res_pred); + UNUSED(i4_iq_start_idx); + UNUSED(u1_res_accumulate); + + /* Load pred buffer */ + pred_r0 = _mm_loadl_epi64((__m128i *) (&pu1_pred[0])); + + pred_r1 = _mm_loadl_epi64((__m128i *) (&pu1_pred[i4_pred_stride])); + + pred_r2 = _mm_loadl_epi64((__m128i *) (&pu1_pred[2 * i4_pred_stride])); + + pred_r3 = _mm_loadl_epi64((__m128i *) (&pu1_pred[3 * i4_pred_stride])); + + /* Mask alternate pred values from the interleaved pred buf */ + pred_r0 = _mm_and_si128(pred_r0, chroma_mask); + pred_r1 = _mm_and_si128(pred_r1, chroma_mask); + pred_r2 = _mm_and_si128(pred_r2, chroma_mask); + pred_r3 = _mm_and_si128(pred_r3, chroma_mask); + + /* Pack the first four 16 bit values of 2 regs into a single reg*/ + pred_r0 = _mm_unpacklo_epi64(pred_r0, pred_r1); + pred_r2 = _mm_unpacklo_epi64(pred_r2, pred_r3); + + /* Compute out pixel by adding res to pred */ + pred_r0 = _mm_add_epi16(value_add, pred_r0); + pred_r2 = _mm_add_epi16(value_add, pred_r2); + + /* Convert res from 16 bits to 32 bits */ + value_add = _mm_cvtepu16_epi32(value_add); + + out_r0 = _mm_loadu_si128((__m128i *) (&pi2_res_ptr[0 * i4_res_stride])); + out_r1 = _mm_loadu_si128((__m128i *) (&pi2_res_ptr[1 * i4_res_stride])); + out_r2 = _mm_loadu_si128((__m128i *) (&pi2_res_ptr[2 * i4_res_stride])); + out_r3 = _mm_loadu_si128((__m128i *) (&pi2_res_ptr[3 * i4_res_stride])); + + /* Mask the loaded res in order to save the U/V res data computed in + this function call without thrashing the U/V res data that was saved + during an earlier function call */ + chroma_mask = _mm_set1_epi32(0xffff0000); + out_r0 = _mm_and_si128(out_r0, chroma_mask); + out_r1 = _mm_and_si128(out_r1, chroma_mask); + out_r2 = _mm_and_si128(out_r2, chroma_mask); + out_r3 = _mm_and_si128(out_r3, chroma_mask); + + /* Save the res in alternate locations */ + out_r0 = _mm_add_epi16(out_r0, value_add); + out_r1 = _mm_add_epi16(out_r1, value_add); + out_r2 = _mm_add_epi16(out_r2, value_add); + out_r3 = _mm_add_epi16(out_r3, value_add); + + _mm_storeu_si128((__m128i *) (&pi2_res_ptr[0 * i4_res_stride]), out_r0); + _mm_storeu_si128((__m128i *) (&pi2_res_ptr[1 * i4_res_stride]), out_r1); + _mm_storeu_si128((__m128i *) (&pi2_res_ptr[2 * i4_res_stride]), out_r2); + _mm_storeu_si128((__m128i *) (&pi2_res_ptr[3 * i4_res_stride]), out_r3); + /*------------------------------------------------------------------*/ + /* Clipping the results to 8 bits */ + sign_reg = _mm_cmpgt_epi16(pred_r0, zero_8x16b); + pred_r0 = _mm_and_si128(pred_r0, sign_reg); + sign_reg = _mm_cmpgt_epi16(pred_r2, zero_8x16b); + pred_r2 = _mm_and_si128(pred_r2, sign_reg); + + pred_r0 = _mm_packus_epi16(pred_r0, pred_r2); + pred_r1 = _mm_srli_si128(pred_r0, 4); + pred_r2 = _mm_srli_si128(pred_r1, 4); + pred_r3 = _mm_srli_si128(pred_r2, 4); + + /* p00 p01 p02 p03 -- all 16 bits */ + pred_r0 = _mm_unpacklo_epi8(pred_r0, zero_8x16b); + /* p10 p11 p12 p13 -- all 16 bits */ + pred_r1 = _mm_unpacklo_epi8(pred_r1, zero_8x16b); + /* p20 p21 p22 p23 -- all 16 bits */ + pred_r2 = _mm_unpacklo_epi8(pred_r2, zero_8x16b); + /* p30 p31 p32 p33 -- all 16 bits */ + pred_r3 = _mm_unpacklo_epi8(pred_r3, zero_8x16b); + + /* Load interleaved out buffer */ + out_r0 = _mm_loadl_epi64((__m128i *) (&pu1_out[0])); + out_r1 = _mm_loadl_epi64((__m128i *) (&pu1_out[i4_out_stride])); + out_r2 = _mm_loadl_epi64((__m128i *) (&pu1_out[2 * i4_out_stride])); + out_r3 = _mm_loadl_epi64((__m128i *) (&pu1_out[3 * i4_out_stride])); + + /* Mask the interleaved out buf in order to save the U/V out pixel computed in + this function call without thrashing the U/V out pixel that was saved + during an earlier function call */ + chroma_mask = _mm_set1_epi16(0xFF00); + + out_r0 = _mm_and_si128(out_r0, chroma_mask); + out_r1 = _mm_and_si128(out_r1, chroma_mask); + out_r2 = _mm_and_si128(out_r2, chroma_mask); + out_r3 = _mm_and_si128(out_r3, chroma_mask); + + /* Save the out pixels in alternate locations */ + out_r0 = _mm_add_epi8(out_r0, pred_r0); + out_r1 = _mm_add_epi8(out_r1, pred_r1); + out_r2 = _mm_add_epi8(out_r2, pred_r2); + out_r3 = _mm_add_epi8(out_r3, pred_r3); + + _mm_storel_epi64((__m128i *) (&pu1_out[0]), out_r0); + _mm_storel_epi64((__m128i *) (&pu1_out[i4_out_stride]), out_r1); + _mm_storel_epi64((__m128i *) (&pu1_out[2 * i4_out_stride]), out_r2); + _mm_storel_epi64((__m128i *) (&pu1_out[3 * i4_out_stride]), out_r3); +} + +void isvc_iquant_itrans_recon_res_chroma_4x4_dc_with_res_acc_sse42( + buffer_container_t *ps_src, buffer_container_t *ps_pred, buffer_container_t *ps_res_pred, + buffer_container_t *ps_res, buffer_container_t *ps_rec, + iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants, WORD16 *pi2_tmp, WORD16 *pi2_dc_src, + WORD32 i4_iq_start_idx, UWORD8 u1_res_accumulate) +{ + WORD16 *pi2_src = (WORD16 *) ps_src->pv_data; + WORD16 *pi2_res = (WORD16 *) ps_res->pv_data; + WORD16 *pi2_res_pred = (WORD16 *) ps_res_pred->pv_data; + UWORD8 *pu1_pred = (UWORD8 *) ps_pred->pv_data; + UWORD8 *pu1_out = (UWORD8 *) ps_rec->pv_data; + WORD32 i4_res_stride = ps_res->i4_data_stride; + WORD32 i4_res_pred_stride = ps_res_pred->i4_data_stride; + WORD32 i4_pred_stride = ps_pred->i4_data_stride; + WORD32 i4_out_stride = ps_rec->i4_data_stride; + const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat; + const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat; + UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6; + /* DC value won't be dequantized for chroma + inverse transform */ + WORD16 q0 = pi2_dc_src[0]; + WORD16 i_macro = ((q0 + 32) >> 6); + + __m128i pred_r0, pred_r1, pred_r2, pred_r3; + /* all bits reset to zero */ + __m128i zero_8x16b = _mm_setzero_si128(); + __m128i chroma_mask = _mm_set1_epi16(0xFF); + __m128i reg_chroma = _mm_set_epi16(0, 0xFFFF, 0, 0xFFFF, 0, 0xFFFF, 0, 0xFFFF); + __m128i value_add = _mm_set1_epi16(i_macro); + __m128i out_r0, out_r1, out_r2, out_r3; + __m128i res_r0, res_r1, res_r2, res_r3; + __m128i res_pred_r0, res_pred_r1, res_pred_r2, res_pred_r3; + __m128i temp0, temp1; + __m128i neg_255_8x16b = _mm_set1_epi16(-((WORD16) UINT8_MAX)); + __m128i pos_255_8x16b = _mm_set1_epi16(((WORD16) UINT8_MAX)); + + ASSERT(1 == u1_res_accumulate); + + UNUSED(pi2_src); + UNUSED(pu2_iscal_mat); + UNUSED(pu2_weigh_mat); + UNUSED(u4_qp_div_6); + UNUSED(pi2_tmp); + UNUSED(i4_iq_start_idx); + UNUSED(u1_res_accumulate); + + /* Load pred buffer */ + pred_r0 = _mm_loadl_epi64((__m128i *) (&pu1_pred[0])); + + pred_r1 = _mm_loadl_epi64((__m128i *) (&pu1_pred[i4_pred_stride])); + + pred_r2 = _mm_loadl_epi64((__m128i *) (&pu1_pred[2 * i4_pred_stride])); + + pred_r3 = _mm_loadl_epi64((__m128i *) (&pu1_pred[3 * i4_pred_stride])); + /* Mask alternate pred values from the interleaved pred buf */ + pred_r0 = _mm_and_si128(pred_r0, chroma_mask); + pred_r1 = _mm_and_si128(pred_r1, chroma_mask); + pred_r2 = _mm_and_si128(pred_r2, chroma_mask); + pred_r3 = _mm_and_si128(pred_r3, chroma_mask); + + /* Pack the first four 16 bit values of 2 regs into a single reg*/ + pred_r0 = _mm_unpacklo_epi64(pred_r0, pred_r1); + pred_r2 = _mm_unpacklo_epi64(pred_r2, pred_r3); + + /* Accumulating res */ + + /* load res pred buffer */ + res_pred_r0 = _mm_loadu_si128((__m128i *) &pi2_res_pred[0 * i4_res_pred_stride]); + res_pred_r1 = _mm_loadu_si128((__m128i *) &pi2_res_pred[1 * i4_res_pred_stride]); + res_pred_r2 = _mm_loadu_si128((__m128i *) &pi2_res_pred[2 * i4_res_pred_stride]); + res_pred_r3 = _mm_loadu_si128((__m128i *) &pi2_res_pred[3 * i4_res_pred_stride]); + + /* Mask res pred and retain alternate values */ + res_pred_r0 = _mm_and_si128(res_pred_r0, reg_chroma); + res_pred_r1 = _mm_and_si128(res_pred_r1, reg_chroma); + res_pred_r2 = _mm_and_si128(res_pred_r2, reg_chroma); + res_pred_r3 = _mm_and_si128(res_pred_r3, reg_chroma); + + /* Convert to 32 bits */ + res_r0 = _mm_cvtepu16_epi32(value_add); + res_r2 = _mm_cvtepu16_epi32(value_add); + res_r1 = _mm_cvtepu16_epi32(value_add); + res_r3 = _mm_cvtepu16_epi32(value_add); + + /* Add res pred to the res obtained from inv transform */ + res_r0 = _mm_add_epi16(res_pred_r0, res_r0); + res_r1 = _mm_add_epi16(res_pred_r1, res_r1); + res_r2 = _mm_add_epi16(res_pred_r2, res_r2); + res_r3 = _mm_add_epi16(res_pred_r3, res_r3); + + /* Convert 32 bit res of the format [a0 0 a1 0 a2 0 a3 0] to + 16 bits of the format [a0 a1 a2 a3] using hadd [ao + 0, + a1 + 0, a2 + 0, a3 + 0] To be optimized */ + temp0 = _mm_hadd_epi16(res_r0, res_r1); + temp1 = _mm_hadd_epi16(res_r2, res_r3); + + /* Saturate all values < -255 to -255 and retain the rest as it is */ + temp0 = _mm_max_epi16(temp0, neg_255_8x16b); + /* Saturate all values > 255 to 255 and retain the rest as it is */ + temp0 = _mm_min_epi16(temp0, pos_255_8x16b); + + /* Saturate all values < -255 to -255 and retain the rest as it is */ + temp1 = _mm_max_epi16(temp1, neg_255_8x16b); + /* Saturate all values > 255 to 255 and retain the rest as it is */ + temp1 = _mm_min_epi16(temp1, pos_255_8x16b); + + /* Compute out pixel by adding res to pred */ + pred_r0 = _mm_add_epi16(temp0, pred_r0); + pred_r2 = _mm_add_epi16(temp1, pred_r2); + + res_r0 = _mm_cvtepu16_epi32(temp0); + res_r2 = _mm_cvtepu16_epi32(temp1); + res_r1 = _mm_srli_si128(temp0, 8); + res_r3 = _mm_srli_si128(temp1, 8); + res_r1 = _mm_cvtepu16_epi32(res_r1); + res_r3 = _mm_cvtepu16_epi32(res_r3); + + /* Load res buffer */ + out_r0 = _mm_loadu_si128((__m128i *) (&pi2_res[0 * i4_res_stride])); + out_r1 = _mm_loadu_si128((__m128i *) (&pi2_res[1 * i4_res_stride])); + out_r2 = _mm_loadu_si128((__m128i *) (&pi2_res[2 * i4_res_stride])); + out_r3 = _mm_loadu_si128((__m128i *) (&pi2_res[3 * i4_res_stride])); + + /* Mask the loaded res in order to save the U/V res data computed in + this function call without thrashing the U/V res data that was saved + during an earlier function call */ + chroma_mask = _mm_set1_epi32(0xffff0000); + + out_r0 = _mm_and_si128(out_r0, chroma_mask); + out_r1 = _mm_and_si128(out_r1, chroma_mask); + out_r2 = _mm_and_si128(out_r2, chroma_mask); + out_r3 = _mm_and_si128(out_r3, chroma_mask); + + /* Save the res in alternate locations */ + out_r0 = _mm_add_epi16(out_r0, res_r0); + out_r1 = _mm_add_epi16(out_r1, res_r1); + out_r2 = _mm_add_epi16(out_r2, res_r2); + out_r3 = _mm_add_epi16(out_r3, res_r3); + + _mm_storeu_si128((__m128i *) (&pi2_res[0 * i4_res_stride]), out_r0); + _mm_storeu_si128((__m128i *) (&pi2_res[1 * i4_res_stride]), out_r1); + _mm_storeu_si128((__m128i *) (&pi2_res[2 * i4_res_stride]), out_r2); + _mm_storeu_si128((__m128i *) (&pi2_res[3 * i4_res_stride]), out_r3); + /*------------------------------------------------------------------*/ + /* Clipping the results to 8 bits */ + pred_r0 = _mm_packus_epi16(pred_r0, pred_r2); + pred_r1 = _mm_srli_si128(pred_r0, 4); + pred_r2 = _mm_srli_si128(pred_r1, 4); + pred_r3 = _mm_srli_si128(pred_r2, 4); + + /* p00 p01 p02 p03 -- all 16 bits */ + pred_r0 = _mm_unpacklo_epi8(pred_r0, zero_8x16b); + /* p10 p11 p12 p13 -- all 16 bits */ + pred_r1 = _mm_unpacklo_epi8(pred_r1, zero_8x16b); + /* p20 p21 p22 p23 -- all 16 bits */ + pred_r2 = _mm_unpacklo_epi8(pred_r2, zero_8x16b); + /* p30 p31 p32 p33 -- all 16 bits */ + pred_r3 = _mm_unpacklo_epi8(pred_r3, zero_8x16b); + + /* Load interleaved out buffer */ + out_r0 = _mm_loadl_epi64((__m128i *) (&pu1_out[0])); + out_r1 = _mm_loadl_epi64((__m128i *) (&pu1_out[i4_out_stride])); + out_r2 = _mm_loadl_epi64((__m128i *) (&pu1_out[2 * i4_out_stride])); + out_r3 = _mm_loadl_epi64((__m128i *) (&pu1_out[3 * i4_out_stride])); + + /* Mask the interleaved out buf in order to save the U/V out pixel computed in + this function call without thrashing the U/V out pixel that was saved + during an earlier function call */ + chroma_mask = _mm_set1_epi16(0xFF00); + + out_r0 = _mm_and_si128(out_r0, chroma_mask); + out_r1 = _mm_and_si128(out_r1, chroma_mask); + out_r2 = _mm_and_si128(out_r2, chroma_mask); + out_r3 = _mm_and_si128(out_r3, chroma_mask); + + /* Save the out pixels in alternate locations */ + out_r0 = _mm_add_epi8(out_r0, pred_r0); + out_r1 = _mm_add_epi8(out_r1, pred_r1); + out_r2 = _mm_add_epi8(out_r2, pred_r2); + out_r3 = _mm_add_epi8(out_r3, pred_r3); + + _mm_storel_epi64((__m128i *) (&pu1_out[0]), out_r0); + _mm_storel_epi64((__m128i *) (&pu1_out[i4_out_stride]), out_r1); + _mm_storel_epi64((__m128i *) (&pu1_out[2 * i4_out_stride]), out_r2); + _mm_storel_epi64((__m128i *) (&pu1_out[3 * i4_out_stride]), out_r3); +} diff --git a/common/x86/svc/isvc_iquant_itrans_recon_ssse3.c b/common/x86/svc/isvc_iquant_itrans_recon_ssse3.c new file mode 100644 index 0000000..2cbdc94 --- /dev/null +++ b/common/x86/svc/isvc_iquant_itrans_recon_ssse3.c @@ -0,0 +1,1291 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ +/** + ******************************************************************************* + * @file + * isvc_iquant_itrans_recon_ssse3.c + * + * @brief + * Contains function definitions for inverse quantization, inverse + * transform and reconstruction + * + * @author + * Mohit [100664] + * + * @par List of Functions: + * - isvc_iquant_itrans_recon_4x4_ssse3() + * - isvc_iquant_itrans_recon_8x8_ssse3() + * + * @remarks + * None + * + ******************************************************************************* + */ +#include + +#include "ih264_typedefs.h" +#include "ih264_debug.h" +#include "ih264_defs.h" +#include "ih264_trans_macros.h" +#include "ih264_macros.h" +#include "ih264_platform_macros.h" +#include "ih264_trans_data.h" +#include "ih264_size_defs.h" +#include "isvc_structs.h" +#include "isvc_trans_quant_itrans_iquant.h" + +/* + ******************************************************************************** + * + * @brief This function reconstructs a 4x4 sub block from quantized resiude and + * prediction buffer + * + * @par Description: + * The quantized residue is first inverse quantized, then inverse transformed. + * This inverse transformed content is added to the prediction buffer to recon- + * struct the end output + * + * @param[in] pi2_src + * quantized 4x4 block + * + * @param[in] pu1_pred + * prediction 4x4 block + * + * @param[out] pu1_out + * reconstructed 4x4 block + * + * @param[in] src_strd + * quantization buffer stride + * + * @param[in] i4_pred_stride, + * Prediction buffer stride + * + * @param[in] i4_out_stride + * recon buffer Stride + * + * @param[in] pu2_scaling_list + * pointer to scaling list + * + * @param[in] pu2_norm_adjust + * pointer to inverse scale matrix + * + * @param[in] u4_qp_div_6 + * Floor (qp/6) + * + * @param[in] pi4_tmp + * temporary buffer of size 1*16 + * + * @returns none + * + * @remarks none + * + ******************************************************************************* + */ +void isvc_iquant_itrans_recon_4x4_ssse3(buffer_container_t *ps_src, buffer_container_t *ps_pred, + buffer_container_t *ps_res_pred, buffer_container_t *ps_res, + buffer_container_t *ps_rec, + iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants, + WORD16 *pi2_tmp, WORD16 *pi2_dc_src, WORD32 i4_iq_start_idx, + UWORD8 u1_res_accumulate) +{ + WORD16 *pi2_src = ps_src->pv_data; + WORD16 *pi2_res = ps_res->pv_data; + WORD16 *pi2_res_pred = ps_res_pred->pv_data; + UWORD8 *pu1_pred = ps_pred->pv_data; + UWORD8 *pu1_out = ps_rec->pv_data; + WORD32 i4_src_stride = ps_src->i4_data_stride; + WORD32 i4_res_stride = ps_res->i4_data_stride; + WORD32 i4_res_pred_stride = ps_res_pred->i4_data_stride; + WORD32 i4_pred_stride = ps_pred->i4_data_stride; + WORD32 i4_out_stride = ps_rec->i4_data_stride; + const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat; + const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat; + UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6; + UWORD32 *pu4_out = (UWORD32 *) pu1_out; + __m128i src_r0_r1, src_r2_r3; + __m128i src_r0, src_r1, src_r2, src_r3; + __m128i scalemat_r0_r1, scalemat_r2_r3, predload_r; + __m128i pred_r0, pred_r1, pred_r2, pred_r3; + __m128i sign_reg, dequant_r0_r1, dequant_r2_r3; + __m128i zero_8x16b = _mm_setzero_si128(); // all bits reset to zero + __m128i temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7; + __m128i resq_r0, resq_r1, resq_r2, resq_r3; + __m128i add_rshift = _mm_set1_epi32((u4_qp_div_6 < 4) ? (1 << (3 - u4_qp_div_6)) : 0); + __m128i value_32 = _mm_set1_epi32(32); + + UNUSED(pi2_tmp); + UNUSED(pi2_dc_src); + UNUSED(u1_res_accumulate); + UNUSED(i4_src_stride); + UNUSED(i4_res_stride); + UNUSED(i4_res_pred_stride); + UNUSED(pi2_res); + UNUSED(pi2_res_pred); + UNUSED(i4_iq_start_idx); + + /* Implement residue accumulation */ + ASSERT(0); + + /*************************************************************/ + /* Dequantization of coefficients. Will be replaced by SIMD */ + /* operations on platform */ + /*************************************************************/ + src_r0_r1 = _mm_loadu_si128((__m128i *) (pi2_src)); // a00 a01 a02 a03 a10 a11 a12 a13 -- the + // source matrix 0th,1st row + src_r2_r3 = _mm_loadu_si128((__m128i *) (pi2_src + 8)); // a20 a21 a22 a23 a30 a31 a32 a33 -- + // the source matrix 2nd,3rd row + scalemat_r0_r1 = + _mm_loadu_si128((__m128i *) (pu2_iscal_mat)); // b00 b01 b02 b03 b10 b11 b12 b13 -- the + // scaling matrix 0th,1st row + scalemat_r2_r3 = + _mm_loadu_si128((__m128i *) (pu2_iscal_mat + 8)); // b20 b21 b22 b23 b30 b31 b32 b33 -- + // the scaling matrix 2nd,3rd row + dequant_r0_r1 = _mm_loadu_si128((__m128i *) (pu2_weigh_mat)); // q00 q01 q02 q03 q10 q11 + // q12 q13 -- all 16 bits + dequant_r2_r3 = _mm_loadu_si128( + (__m128i *) (pu2_weigh_mat + 8)); // q20 q21 q22 q23 q30 q31 q32 q33 -- all 16 bits + + temp0 = _mm_mullo_epi16(scalemat_r0_r1, + dequant_r0_r1); // b00*q00 b01*q01 b02*q02 b03*q03 b10*q10 b11*q11 + // b12*q12 b13*q13 -- 16 bit result + temp1 = _mm_mullo_epi16(scalemat_r2_r3, + dequant_r2_r3); // b00*q00 b01*q01 b02*q02 b03*q03 b10*q10 b11*q11 + // b12*q12 b13*q13 -- 16 bit result + + temp4 = + _mm_unpacklo_epi16(temp0, + zero_8x16b); // b00*q00 0 b01*q01 0 b02*q02 0 b03*q03 0 -- 16 bit long + temp5 = + _mm_unpackhi_epi16(temp0, + zero_8x16b); // b10*q10 0 b11*q11 0 b12*q12 0 b13*q13 0 -- 16 bit long + temp6 = + _mm_unpacklo_epi16(temp1, + zero_8x16b); // b00*q00 0 b01*q01 0 b02*q02 0 b03*q03 0 -- 16 bit long + temp7 = + _mm_unpackhi_epi16(temp1, + zero_8x16b); // b10*q10 0 b11*q11 0 b12*q12 0 b13*q13 0 -- 16 bit long + + src_r0 = _mm_unpacklo_epi16(src_r0_r1, zero_8x16b); // a00 0 a01 0 a02 0 a03 0 -- 16 bit long + src_r1 = _mm_unpackhi_epi16(src_r0_r1, zero_8x16b); // a10 0 a11 0 a12 0 a13 0 -- 16 bit long + src_r2 = _mm_unpacklo_epi16(src_r2_r3, zero_8x16b); // a20 0 a21 0 a22 0 a23 0 -- 16 bit long + src_r3 = _mm_unpackhi_epi16(src_r2_r3, zero_8x16b); // a30 0 a31 0 a32 0 a33 0 -- 16 bit long + + temp4 = _mm_madd_epi16(src_r0, temp4); // a00*b00*q00 a10*b10*q10 a20*b20*q20 + // a30*b30 q30 -- 32 bits long + temp5 = _mm_madd_epi16(src_r1, temp5); + temp6 = _mm_madd_epi16(src_r2, temp6); + temp7 = _mm_madd_epi16(src_r3, temp7); + + if(u4_qp_div_6 >= 4) + { + resq_r0 = _mm_slli_epi32(temp4, u4_qp_div_6 - 4); + resq_r1 = _mm_slli_epi32(temp5, u4_qp_div_6 - 4); + resq_r2 = _mm_slli_epi32(temp6, u4_qp_div_6 - 4); + resq_r3 = _mm_slli_epi32(temp7, u4_qp_div_6 - 4); + } + else + { + temp4 = _mm_add_epi32(temp4, add_rshift); + temp5 = _mm_add_epi32(temp5, add_rshift); + temp6 = _mm_add_epi32(temp6, add_rshift); + temp7 = _mm_add_epi32(temp7, add_rshift); + resq_r0 = _mm_srai_epi32(temp4, 4 - u4_qp_div_6); + resq_r1 = _mm_srai_epi32(temp5, 4 - u4_qp_div_6); + resq_r2 = _mm_srai_epi32(temp6, 4 - u4_qp_div_6); + resq_r3 = _mm_srai_epi32(temp7, 4 - u4_qp_div_6); + } + + if(i4_iq_start_idx == 1) + { + resq_r0 = _mm_insert_epi16(resq_r0, (WORD32) pi2_src[0], 0); + if(pi2_src[0] >= 0) + resq_r0 = _mm_insert_epi16(resq_r0, 0, 1); + else + resq_r0 = _mm_insert_epi16(resq_r0, -1, 1); + } + /* Perform Inverse transform */ + /*-------------------------------------------------------------*/ + /* IDCT [ Horizontal transformation ] */ + /*-------------------------------------------------------------*/ + // Matrix transpose + /* + * a0 a1 a2 a3 + * b0 b1 b2 b3 + * c0 c1 c2 c3 + * d0 d1 d2 d3 + */ + temp1 = _mm_unpacklo_epi32(resq_r0, resq_r1); // a0 b0 a1 b1 + temp3 = _mm_unpacklo_epi32(resq_r2, resq_r3); // c0 d0 c1 d1 + temp2 = _mm_unpackhi_epi32(resq_r0, resq_r1); // a2 b2 a3 b3 + temp4 = _mm_unpackhi_epi32(resq_r2, resq_r3); // c2 d2 c3 d3 + resq_r0 = _mm_unpacklo_epi64(temp1, temp3); // a0 b0 c0 d0 + resq_r1 = _mm_unpackhi_epi64(temp1, temp3); // a1 b1 c1 d1 + resq_r2 = _mm_unpacklo_epi64(temp2, temp4); // a2 b2 c2 d2 + resq_r3 = _mm_unpackhi_epi64(temp2, temp4); // a3 b3 c3 d3 + // Transform starts -- horizontal transform + /*------------------------------------------------------------------*/ + /* z0 = w0 + w2 */ + temp0 = _mm_add_epi32(resq_r0, resq_r2); + /* z1 = w0 - w2 */ + temp1 = _mm_sub_epi32(resq_r0, resq_r2); + /* z2 = (w1 >> 1) - w3 */ + temp2 = _mm_srai_epi32(resq_r1, 1); //(w1>>1) + temp2 = _mm_sub_epi32(temp2, resq_r3); //(w1>>1) - w3 + /* z3 = w1 + (w3 >> 1) */ + temp3 = _mm_srai_epi32(resq_r3, 1); //(w3>>1) + w1 + temp3 = _mm_add_epi32(temp3, resq_r1); + /*----------------------------------------------------------*/ + /* x0 = z0 + z3 */ + resq_r0 = _mm_add_epi32(temp0, temp3); + /* x1 = z1 + z2 */ + resq_r1 = _mm_add_epi32(temp1, temp2); + /* x2 = z1 - z2 */ + resq_r2 = _mm_sub_epi32(temp1, temp2); + /* x3 = z0 - z3 */ + resq_r3 = _mm_sub_epi32(temp0, temp3); + // Matrix transpose + /* + * a0 b0 c0 d0 + * a1 b1 c1 d1 + * a2 b2 c2 d2 + * a3 b3 c3 d3 + */ + temp1 = _mm_unpacklo_epi32(resq_r0, resq_r1); // a0 a1 b0 b1 + temp3 = _mm_unpacklo_epi32(resq_r2, resq_r3); // a2 a3 b2 b3 + temp2 = _mm_unpackhi_epi32(resq_r0, resq_r1); // c0 c1 d0 d1 + temp4 = _mm_unpackhi_epi32(resq_r2, resq_r3); // c2 c3 d2 d3 + resq_r0 = _mm_unpacklo_epi64(temp1, temp3); // a0 a1 a2 a3 + resq_r1 = _mm_unpackhi_epi64(temp1, temp3); // b0 b1 b2 b3 + resq_r2 = _mm_unpacklo_epi64(temp2, temp4); // c0 c1 c2 c3 + resq_r3 = _mm_unpackhi_epi64(temp2, temp4); // d0 d1 d2 d3 + // Transform ends -- horizontal transform + + zero_8x16b = _mm_setzero_si128(); // all bits reset to zero + // Load pred buffer + predload_r = _mm_loadl_epi64((__m128i *) (&pu1_pred[0])); // p00 p01 p02 p03 0 0 0 0 0 + // 0 0 0 -- all 8 bits + pred_r0 = _mm_unpacklo_epi8(predload_r, zero_8x16b); // p00 p01 p02 p03 0 0 0 0 -- all 16 bits + + predload_r = + _mm_loadl_epi64((__m128i *) (&pu1_pred[i4_pred_stride])); // p10 p11 p12 p13 0 0 0 0 0 0 + // 0 0 -- all 8 bits + pred_r1 = _mm_unpacklo_epi8(predload_r, zero_8x16b); // p10 p11 p12 p13 0 0 0 0 -- all 16 bits + + predload_r = + _mm_loadl_epi64((__m128i *) (&pu1_pred[2 * i4_pred_stride])); // p20 p21 p22 p23 0 0 0 0 + // 0 0 0 0 -- all 8 bits + pred_r2 = _mm_unpacklo_epi8(predload_r, zero_8x16b); // p20 p21 p22 p23 0 0 0 0 -- all 16 bits + + predload_r = + _mm_loadl_epi64((__m128i *) (&pu1_pred[3 * i4_pred_stride])); // p30 p31 p32 p33 0 0 0 0 + // 0 0 0 0 -- all 8 bits + pred_r3 = _mm_unpacklo_epi8(predload_r, zero_8x16b); // p30 p31 p32 p33 0 0 0 0 -- all 16 bits + pred_r0 = _mm_unpacklo_epi16(pred_r0, zero_8x16b); // p00 p01 p02 p03 -- 32 bits sign extended + pred_r1 = _mm_unpacklo_epi16(pred_r1, zero_8x16b); // p10 p11 p12 p13 -- 32 bits sign extended + pred_r2 = _mm_unpacklo_epi16(pred_r2, zero_8x16b); // p20 p21 p22 p23 -- 32 bits sign extended + pred_r3 = _mm_unpacklo_epi16(pred_r3, zero_8x16b); // p30 p31 p32 p33 -- 32 bits sign extended + + /*--------------------------------------------------------------*/ + /* IDCT [ Vertical transformation] and Xij = (xij + 32)>>6 */ + /* */ + /* Add the prediction and store it back to same buffer */ + /*--------------------------------------------------------------*/ + /* z0j = y0j + y2j */ + temp0 = _mm_add_epi32(resq_r0, resq_r2); + /* z1j = y0j - y2j */ + temp1 = _mm_sub_epi32(resq_r0, resq_r2); + /* z2j = (y1j>>1) - y3j */ + temp2 = _mm_srai_epi32(resq_r1, 1); //(y1j>>1) + temp2 = _mm_sub_epi32(temp2, resq_r3); + /* z3j = y1j + (y3j>>1) */ + temp3 = _mm_srai_epi32(resq_r3, 1); //(y3j>>1) + temp3 = _mm_add_epi32(temp3, resq_r1); + + /* x0j = z0j + z3j */ + temp4 = _mm_add_epi32(temp0, temp3); + temp4 = _mm_add_epi32(temp4, value_32); + temp4 = _mm_srai_epi32(temp4, 6); + temp4 = _mm_add_epi32(temp4, pred_r0); + /* x1j = z1j + z2j */ + temp5 = _mm_add_epi32(temp1, temp2); + temp5 = _mm_add_epi32(temp5, value_32); + temp5 = _mm_srai_epi32(temp5, 6); + temp5 = _mm_add_epi32(temp5, pred_r1); + /* x2j = z1j - z2j */ + temp6 = _mm_sub_epi32(temp1, temp2); + temp6 = _mm_add_epi32(temp6, value_32); + temp6 = _mm_srai_epi32(temp6, 6); + temp6 = _mm_add_epi32(temp6, pred_r2); + /* x3j = z0j - z3j */ + temp7 = _mm_sub_epi32(temp0, temp3); + temp7 = _mm_add_epi32(temp7, value_32); + temp7 = _mm_srai_epi32(temp7, 6); + temp7 = _mm_add_epi32(temp7, pred_r3); + + // 32-bit to 16-bit conversion + temp0 = _mm_packs_epi32(temp4, temp5); + temp1 = _mm_packs_epi32(temp6, temp7); + /*------------------------------------------------------------------*/ + // Clipping the results to 8 bits + sign_reg = _mm_cmpgt_epi16(temp0, zero_8x16b); // sign check + temp0 = _mm_and_si128(temp0, sign_reg); + sign_reg = _mm_cmpgt_epi16(temp1, zero_8x16b); + temp1 = _mm_and_si128(temp1, sign_reg); + + resq_r0 = _mm_packus_epi16(temp0, temp1); + resq_r1 = _mm_srli_si128(resq_r0, 4); + resq_r2 = _mm_srli_si128(resq_r1, 4); + resq_r3 = _mm_srli_si128(resq_r2, 4); + + *pu4_out = _mm_cvtsi128_si32(resq_r0); + pu1_out += i4_out_stride; + pu4_out = (UWORD32 *) (pu1_out); + *(pu4_out) = _mm_cvtsi128_si32(resq_r1); + pu1_out += i4_out_stride; + pu4_out = (UWORD32 *) (pu1_out); + *(pu4_out) = _mm_cvtsi128_si32(resq_r2); + pu1_out += i4_out_stride; + pu4_out = (UWORD32 *) (pu1_out); + *(pu4_out) = _mm_cvtsi128_si32(resq_r3); +} + +/** + ******************************************************************************* + * + * @brief + * This function performs inverse quant and Inverse transform type Ci4 for 8x8 + *block + * + * @par Description: + * Performs inverse transform Ci8 and adds the residue to get the + * reconstructed block + * + * @param[in] pi2_src + * Input 8x8coefficients + * + * @param[in] pu1_pred + * Prediction 8x8 block + * + * @param[out] pu1_recon + * Output 8x8 block + * + * @param[in] q_div + * QP/6 + * + * @param[in] q_rem + * QP%6 + * + * @param[in] q_lev + * Quantizer level + * + * @param[in] u4_src_stride + * Input stride + * + * @param[in] u4_pred_stride, + * Prediction stride + * + * @param[in] u4_out_stride + * Output Stride + * + * @param[in] pi4_tmp + * temporary buffer of size 1*64 + * the tmp for each block + * + * @param[in] pu4_iquant_mat + * Pointer to the inverse quantization matrix + * + * @returns Void + * + * @remarks + * None + * + ******************************************************************************* + */ + +void isvc_iquant_itrans_recon_8x8_ssse3(buffer_container_t *ps_src, buffer_container_t *ps_pred, + buffer_container_t *ps_res_pred, buffer_container_t *ps_res, + buffer_container_t *ps_rec, + iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants, + WORD16 *pi2_tmp, WORD16 *pi2_dc_src, WORD32 i4_iq_start_idx, + UWORD8 u1_res_accumulate) +{ + WORD16 *pi2_src = ps_src->pv_data; + WORD16 *pi2_res = ps_res->pv_data; + WORD16 *pi2_res_pred = ps_res_pred->pv_data; + UWORD8 *pu1_pred = ps_pred->pv_data; + UWORD8 *pu1_out = ps_rec->pv_data; + WORD32 i4_src_stride = ps_src->i4_data_stride; + WORD32 i4_res_stride = ps_res->i4_data_stride; + WORD32 i4_res_pred_stride = ps_res_pred->i4_data_stride; + WORD32 i4_pred_stride = ps_pred->i4_data_stride; + WORD32 i4_out_stride = ps_rec->i4_data_stride; + const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat; + const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat; + UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6; + __m128i src_r0; + __m128i scalemat_r0; + __m128i zero_8x16b = _mm_setzero_si128(); // all bits reset to zero + // __m128i one_8x16b = _mm_set1_epi8(255); // all bits set to 1 + // __m128i one_zero_mask = _mm_unpacklo_epi16(one_8x16b, zero_8x16b); // 1 0 1 + // 0 1 0 1 0 --- 16 bits size + __m128i value_32 = _mm_set1_epi32(32); + __m128i add_rshift = _mm_set1_epi32((u4_qp_div_6 < 6) ? (1 << (5 - u4_qp_div_6)) : 0); + __m128i dequant_r0; + __m128i predload_r; + __m128i pred_r0_1, pred_r1_1, pred_r2_1, pred_r3_1, pred_r4_1, pred_r5_1, pred_r6_1, pred_r7_1; + __m128i sign_reg; + __m128i src_r0_1, src_r0_2; + __m128i scalemat_r0_1, scalemat_r0_2; + __m128i temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8; + __m128i temp10, temp11, temp12, temp13, temp14, temp15, temp16, temp17, temp18, temp19, temp20; + // To store dequantization results + __m128i resq_r0_1, resq_r0_2, resq_r1_1, resq_r1_2, resq_r2_1, resq_r2_2, resq_r3_1, resq_r3_2, + resq_r4_1, resq_r4_2, resq_r5_1, resq_r5_2, resq_r6_1, resq_r6_2, resq_r7_1, resq_r7_2; + + UNUSED(pi2_tmp); + UNUSED(i4_iq_start_idx); + UNUSED(pi2_dc_src); + UNUSED(u1_res_accumulate); + UNUSED(i4_src_stride); + UNUSED(i4_res_stride); + UNUSED(i4_res_pred_stride); + UNUSED(pi2_res); + UNUSED(pi2_res_pred); + UNUSED(i4_iq_start_idx); + + /* Implement residue accumulation */ + ASSERT(0); + + /*************************************************************/ + /* Dequantization of coefficients. Will be replaced by SIMD */ + /* operations on platform. Note : DC coeff is not scaled */ + /*************************************************************/ + + // Row 0 processing + src_r0 = _mm_loadu_si128((__m128i *) (pi2_src)); // a00 a01 a02 a03 a04 a05 a06 a07 -- the + // source matrix 0th row + scalemat_r0 = _mm_loadu_si128((__m128i *) (pu2_iscal_mat)); // b00 b01 b02 b03 b04 b05 b06 b07 + // -- the scaling matrix 0th row + dequant_r0 = _mm_loadu_si128((__m128i *) (&pu2_weigh_mat[0])); // q0 q1 q2 q3 q4 q5 q6 + // q7 -- all 16 bits + src_r0_1 = _mm_unpacklo_epi16(src_r0, zero_8x16b); // a00 0 a01 0 a02 0 a03 0 -- 16 bit long + src_r0_2 = _mm_unpackhi_epi16(src_r0, zero_8x16b); // a04 0 a05 0 a06 0 a07 0 -- 16 bit long + temp10 = _mm_mullo_epi16(scalemat_r0, + dequant_r0); // b00*q0 b01*q1 b02*q2 b03*q3 b04*q4 + // b05*q5 b06*q6 b07*q7 -- 16 bit result + scalemat_r0_1 = + _mm_unpacklo_epi16(temp10, + zero_8x16b); // b00*q0 0 b01*q1 0 b02*q2 0 b03*q3 0 -- 16 bit long + scalemat_r0_2 = + _mm_unpackhi_epi16(temp10, + zero_8x16b); // b04*q4 0 b05*q5 0 b06*q6 0 b07*q7 0 -- 16 bit long + + temp5 = _mm_madd_epi16(src_r0_1, + scalemat_r0_1); // a00*b00*q0 a01*b01*q1 a02*b02*q2 + // a03*b03*q3 -- 32 bits long + temp7 = _mm_madd_epi16(src_r0_2, + scalemat_r0_2); // a04*b04*q4 a05*b05*q5 a06*b06*q6 + // a07*b07*q7 -- 32 bits long + + if(u4_qp_div_6 >= 6) + { + resq_r0_1 = _mm_slli_epi32(temp5, u4_qp_div_6 - 6); + resq_r0_2 = _mm_slli_epi32(temp7, u4_qp_div_6 - 6); + } + else + { + temp5 = _mm_add_epi32(temp5, add_rshift); + temp7 = _mm_add_epi32(temp7, add_rshift); + resq_r0_1 = _mm_srai_epi32(temp5, 6 - u4_qp_div_6); + resq_r0_2 = _mm_srai_epi32(temp7, 6 - u4_qp_div_6); + } + resq_r0_1 = + _mm_packs_epi32(resq_r0_1, + resq_r0_2); // a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 a04*b04*q4 + // a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 16 bit long + // Row 1 processing + src_r0 = _mm_loadu_si128((__m128i *) (pi2_src + 8)); // a00 a01 a02 a03 a04 a05 a06 a07 a08 -- + // the source matrix 1st row + scalemat_r0 = + _mm_loadu_si128((__m128i *) (pu2_iscal_mat + 8)); // b00 b01 b02 b03 b04 b05 b06 b07 b08 + // -- the scaling matrix 1st row + dequant_r0 = _mm_loadu_si128((__m128i *) (&pu2_weigh_mat[8])); // q0 q1 q2 q3 q4 q5 q6 + // q7 -- all 16 bits + src_r0_1 = _mm_unpacklo_epi16(src_r0, zero_8x16b); // a00 0 a01 0 a02 0 a03 0 -- 16 bit long + src_r0_2 = _mm_unpackhi_epi16(src_r0, zero_8x16b); // a04 0 a05 0 a06 0 a07 0 -- 16 bit long + temp10 = _mm_mullo_epi16(scalemat_r0, + dequant_r0); // b00*q0 b01*q1 b02*q2 b03*q3 b04*q4 + // b05*q5 b06*q6 b07*q7 -- 16 bit result + scalemat_r0_1 = + _mm_unpacklo_epi16(temp10, + zero_8x16b); // b00*q0 0 b01*q1 0 b02*q2 0 b03*q3 0 -- 16 bit long + scalemat_r0_2 = + _mm_unpackhi_epi16(temp10, + zero_8x16b); // b04*q4 0 b05*q5 0 b06*q6 0 b07*q7 0 -- 16 bit long + temp5 = _mm_madd_epi16(src_r0_1, + scalemat_r0_1); // a00*b00*q0 a01*b01*q1 a02*b02*q2 + // a03*b03*q3 -- 32 bits long + temp7 = _mm_madd_epi16(src_r0_2, + scalemat_r0_2); // a04*b04*q4 a05*b05*q5 a06*b06*q6 + // a07*b07*q7 -- 32 bits long + if(u4_qp_div_6 >= 6) + { + resq_r1_1 = _mm_slli_epi32(temp5, u4_qp_div_6 - 6); + resq_r1_2 = _mm_slli_epi32(temp7, u4_qp_div_6 - 6); + } + else + { + temp5 = _mm_add_epi32(temp5, add_rshift); + temp7 = _mm_add_epi32(temp7, add_rshift); + resq_r1_1 = _mm_srai_epi32(temp5, 6 - u4_qp_div_6); + resq_r1_2 = _mm_srai_epi32(temp7, 6 - u4_qp_div_6); + } + resq_r1_1 = + _mm_packs_epi32(resq_r1_1, + resq_r1_2); // a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 a04*b04*q4 + // a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 16 bit long + // Row 2 processing + src_r0 = _mm_loadu_si128((__m128i *) (pi2_src + 16)); // a00 a01 a02 a03 a04 a05 a06 a07 a08 -- + // the source matrix 2nd row + scalemat_r0 = + _mm_loadu_si128((__m128i *) (pu2_iscal_mat + 16)); // b00 b01 b02 b03 b04 b05 b06 b07 b08 + // -- the scaling matrix 2nd row + dequant_r0 = _mm_loadu_si128((__m128i *) (&pu2_weigh_mat[16])); // q0 q1 q2 q3 q4 q5 + // q6 q7 -- all 16 bits + src_r0_1 = _mm_unpacklo_epi16(src_r0, zero_8x16b); // a00 0 a01 0 a02 0 a03 0 -- 16 bit long + src_r0_2 = _mm_unpackhi_epi16(src_r0, zero_8x16b); // a04 0 a05 0 a06 0 a07 0 -- 16 bit long + temp10 = _mm_mullo_epi16(scalemat_r0, + dequant_r0); // b00*q0 b01*q1 b02*q2 b03*q3 b04*q4 + // b05*q5 b06*q6 b07*q7 -- 16 bit result + scalemat_r0_1 = + _mm_unpacklo_epi16(temp10, + zero_8x16b); // b00*q0 0 b01*q1 0 b02*q2 0 b03*q3 0 -- 16 bit long + scalemat_r0_2 = + _mm_unpackhi_epi16(temp10, + zero_8x16b); // b04*q4 0 b05*q5 0 b06*q6 0 b07*q7 0 -- 16 bit long + temp5 = _mm_madd_epi16(src_r0_1, + scalemat_r0_1); // a00*b00*q0 a01*b01*q1 a02*b02*q2 + // a03*b03*q3 -- 32 bits long + temp7 = _mm_madd_epi16(src_r0_2, + scalemat_r0_2); // a04*b04*q4 a05*b05*q5 a06*b06*q6 + // a07*b07*q7 -- 32 bits long + if(u4_qp_div_6 >= 6) + { + resq_r2_1 = _mm_slli_epi32(temp5, u4_qp_div_6 - 6); + resq_r2_2 = _mm_slli_epi32(temp7, u4_qp_div_6 - 6); + } + else + { + temp5 = _mm_add_epi32(temp5, add_rshift); + temp7 = _mm_add_epi32(temp7, add_rshift); + resq_r2_1 = _mm_srai_epi32(temp5, 6 - u4_qp_div_6); + resq_r2_2 = _mm_srai_epi32(temp7, 6 - u4_qp_div_6); + } + resq_r2_1 = + _mm_packs_epi32(resq_r2_1, + resq_r2_2); // a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 a04*b04*q4 + // a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 16 bit long + // Row 3 processing + src_r0 = _mm_loadu_si128((__m128i *) (pi2_src + 24)); // a00 a01 a02 a03 a04 a05 a06 a07 a08 -- + // the source matrix 3rd row + scalemat_r0 = + _mm_loadu_si128((__m128i *) (pu2_iscal_mat + 24)); // b00 b01 b02 b03 b04 b05 b06 b07 b08 + // -- the scaling matrix 3rd row + dequant_r0 = _mm_loadu_si128((__m128i *) (&pu2_weigh_mat[24])); // q0 q1 q2 q3 q4 q5 + // q6 q7 -- all 16 bits + src_r0_1 = _mm_unpacklo_epi16(src_r0, zero_8x16b); // a00 0 a01 0 a02 0 a03 0 -- 16 bit long + src_r0_2 = _mm_unpackhi_epi16(src_r0, zero_8x16b); // a04 0 a05 0 a06 0 a07 0 -- 16 bit long + temp10 = _mm_mullo_epi16(scalemat_r0, + dequant_r0); // b00*q0 b01*q1 b02*q2 b03*q3 b04*q4 + // b05*q5 b06*q6 b07*q7 -- 16 bit result + scalemat_r0_1 = + _mm_unpacklo_epi16(temp10, + zero_8x16b); // b00*q0 0 b01*q1 0 b02*q2 0 b03*q3 0 -- 16 bit long + scalemat_r0_2 = + _mm_unpackhi_epi16(temp10, + zero_8x16b); // b04*q4 0 b05*q5 0 b06*q6 0 b07*q7 0 -- 16 bit long + temp5 = _mm_madd_epi16(src_r0_1, + scalemat_r0_1); // a00*b00*q0 a01*b01*q1 a02*b02*q2 + // a03*b03*q3 - 32 bits long + temp7 = _mm_madd_epi16(src_r0_2, + scalemat_r0_2); // a04*b04*q4 a05*b05*q5 a06*b06*q6 + // a07*b07*q7 -- 32 bits long + if(u4_qp_div_6 >= 6) + { + resq_r3_1 = _mm_slli_epi32(temp5, u4_qp_div_6 - 6); + resq_r3_2 = _mm_slli_epi32(temp7, u4_qp_div_6 - 6); + } + else + { + temp5 = _mm_add_epi32(temp5, add_rshift); + temp7 = _mm_add_epi32(temp7, add_rshift); + resq_r3_1 = _mm_srai_epi32(temp5, 6 - u4_qp_div_6); + resq_r3_2 = _mm_srai_epi32(temp7, 6 - u4_qp_div_6); + } + resq_r3_1 = + _mm_packs_epi32(resq_r3_1, + resq_r3_2); // a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 a04*b04*q4 + // a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 16 bit long + // Row 4 processing + src_r0 = _mm_loadu_si128((__m128i *) (pi2_src + 32)); // a00 a01 a02 a03 a04 a05 a06 a07 a08 -- + // the source matrix 4th row + scalemat_r0 = + _mm_loadu_si128((__m128i *) (pu2_iscal_mat + 32)); // b00 b01 b02 b03 b04 b05 b06 b07 b08 + // -- the scaling matrix 4th row + dequant_r0 = _mm_loadu_si128((__m128i *) (&pu2_weigh_mat[32])); // q0 q1 q2 q3 q4 q5 + // q6 q7 -- all 16 bits + src_r0_1 = _mm_unpacklo_epi16(src_r0, zero_8x16b); // a00 0 a01 0 a02 0 a03 0 -- 16 bit long + src_r0_2 = _mm_unpackhi_epi16(src_r0, zero_8x16b); // a04 0 a05 0 a06 0 a07 0 -- 16 bit long + temp10 = _mm_mullo_epi16(scalemat_r0, + dequant_r0); // b00*q0 b01*q1 b02*q2 b03*q3 b04*q4 + // b05*q5 b06*q6 b07*q7 -- 16 bit result + scalemat_r0_1 = + _mm_unpacklo_epi16(temp10, + zero_8x16b); // b00*q0 0 b01*q1 0 b02*q2 0 b03*q3 0 -- 16 bit long + scalemat_r0_2 = + _mm_unpackhi_epi16(temp10, + zero_8x16b); // b04*q4 0 b05*q5 0 b06*q6 0 b07*q7 0 -- 16 bit long + temp5 = _mm_madd_epi16(src_r0_1, + scalemat_r0_1); // a00*b00*q0 a01*b01*q1 a02*b02*q2 + // a03*b03*q3 -- 32 bits long + temp7 = _mm_madd_epi16(src_r0_2, + scalemat_r0_2); // a04*b04*q4 a05*b05*q5 a06*b06*q6 + // a07*b07*q7 -- 32 bits long + if(u4_qp_div_6 >= 6) + { + resq_r4_1 = _mm_slli_epi32(temp5, u4_qp_div_6 - 6); + resq_r4_2 = _mm_slli_epi32(temp7, u4_qp_div_6 - 6); + } + else + { + temp5 = _mm_add_epi32(temp5, add_rshift); + temp7 = _mm_add_epi32(temp7, add_rshift); + resq_r4_1 = _mm_srai_epi32(temp5, 6 - u4_qp_div_6); + resq_r4_2 = _mm_srai_epi32(temp7, 6 - u4_qp_div_6); + } + resq_r4_1 = + _mm_packs_epi32(resq_r4_1, + resq_r4_2); // a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 a04*b04*q4 + // a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 16 bit long + // Row 5 processing + src_r0 = _mm_loadu_si128((__m128i *) (pi2_src + 40)); // a00 a01 a02 a03 a04 a05 a06 a07 a08 -- + // the source matrix 5th row + scalemat_r0 = + _mm_loadu_si128((__m128i *) (pu2_iscal_mat + 40)); // b00 b01 b02 b03 b04 b05 b06 b07 b08 + // -- the scaling matrix 5th row + dequant_r0 = _mm_loadu_si128((__m128i *) (&pu2_weigh_mat[40])); // q0 q1 q2 q3 q4 q5 + // q6 q7 -- all 16 bits + src_r0_1 = _mm_unpacklo_epi16(src_r0, zero_8x16b); // a00 0 a01 0 a02 0 a03 0 -- 16 bit long + src_r0_2 = _mm_unpackhi_epi16(src_r0, zero_8x16b); // a04 0 a05 0 a06 0 a07 0 -- 16 bit long + temp10 = _mm_mullo_epi16(scalemat_r0, + dequant_r0); // b00*q0 b01*q1 b02*q2 b03*q3 b04*q4 + // b05*q5 b06*q6 b07*q7 -- 16 bit result + scalemat_r0_1 = + _mm_unpacklo_epi16(temp10, + zero_8x16b); // b00*q0 0 b01*q1 0 b02*q2 0 b03*q3 0 -- 16 bit long + scalemat_r0_2 = + _mm_unpackhi_epi16(temp10, + zero_8x16b); // b04*q4 0 b05*q5 0 b06*q6 0 b07*q7 0 -- 16 bit long + temp5 = _mm_madd_epi16(src_r0_1, + scalemat_r0_1); // a00*b00*q0 a01*b01*q1 a02*b02*q2 + // a03*b03*q3 -- 32 bits long + temp7 = _mm_madd_epi16(src_r0_2, + scalemat_r0_2); // a04*b04*q4 a05*b05*q5 a06*b06*q6 + // a07*b07*q7 -- 32 bits long + if(u4_qp_div_6 >= 6) + { + resq_r5_1 = _mm_slli_epi32(temp5, u4_qp_div_6 - 6); + resq_r5_2 = _mm_slli_epi32(temp7, u4_qp_div_6 - 6); + // resq_r5_1 = _mm_and_si128(resq_r5_1,one_zero_mask); + // resq_r5_2 = _mm_and_si128(resq_r5_2,one_zero_mask); + } + else + { + temp5 = _mm_add_epi32(temp5, add_rshift); + temp7 = _mm_add_epi32(temp7, add_rshift); + resq_r5_1 = _mm_srai_epi32(temp5, 6 - u4_qp_div_6); + resq_r5_2 = _mm_srai_epi32(temp7, 6 - u4_qp_div_6); + } + resq_r5_1 = + _mm_packs_epi32(resq_r5_1, + resq_r5_2); // a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 a04*b04*q4 + // a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 16 bit long + // Row 6 processing + src_r0 = _mm_loadu_si128((__m128i *) (pi2_src + 48)); // a00 a01 a02 a03 a04 a05 a06 a07 a08 -- + // the source matrix 6th row + scalemat_r0 = + _mm_loadu_si128((__m128i *) (pu2_iscal_mat + 48)); // b00 b01 b02 b03 b04 b05 b06 b07 b08 + // -- the scaling matrix 6th row + dequant_r0 = _mm_loadu_si128((__m128i *) (&pu2_weigh_mat[48])); // q0 q1 q2 q3 q4 q5 + // q6 q7 -- all 16 bits + src_r0_1 = _mm_unpacklo_epi16(src_r0, zero_8x16b); // a00 0 a01 0 a02 0 a03 0 -- 16 bit long + src_r0_2 = _mm_unpackhi_epi16(src_r0, zero_8x16b); // a04 0 a05 0 a06 0 a07 0 -- 16 bit long + temp10 = _mm_mullo_epi16(scalemat_r0, + dequant_r0); // b00*q0 b01*q1 b02*q2 b03*q3 b04*q4 + // b05*q5 b06*q6 b07*q7 -- 16 bit result + scalemat_r0_1 = + _mm_unpacklo_epi16(temp10, + zero_8x16b); // b00*q0 0 b01*q1 0 b02*q2 0 b03*q3 0 -- 16 bit long + scalemat_r0_2 = + _mm_unpackhi_epi16(temp10, + zero_8x16b); // b04*q4 0 b05*q5 0 b06*q6 0 b07*q7 0 -- 16 bit long + temp5 = _mm_madd_epi16(src_r0_1, + scalemat_r0_1); // a00*b00*q0 a01*b01*q1 a02*b02*q2 + // a03*b03*q3 -- 32 bits long + temp7 = _mm_madd_epi16(src_r0_2, + scalemat_r0_2); // a04*b04*q4 a05*b05*q5 a06*b06*q6 + // a07*b07*q7 -- 32 bits long + if(u4_qp_div_6 >= 6) + { + resq_r6_1 = _mm_slli_epi32(temp5, u4_qp_div_6 - 6); + resq_r6_2 = _mm_slli_epi32(temp7, u4_qp_div_6 - 6); + // resq_r6_1 = _mm_and_si128(resq_r6_1,one_zero_mask); + // resq_r6_2 = _mm_and_si128(resq_r6_2,one_zero_mask); + } + else + { + temp5 = _mm_add_epi32(temp5, add_rshift); + temp7 = _mm_add_epi32(temp7, add_rshift); + resq_r6_1 = _mm_srai_epi32(temp5, 6 - u4_qp_div_6); + resq_r6_2 = _mm_srai_epi32(temp7, 6 - u4_qp_div_6); + // resq_r6_1 = _mm_and_si128(resq_r6_1,one_zero_mask); + // resq_r6_2 = _mm_and_si128(resq_r6_2,one_zero_mask); + } + resq_r6_1 = + _mm_packs_epi32(resq_r6_1, + resq_r6_2); // a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 a04*b04*q4 + // a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 16 bit long + // Row 7 processing + src_r0 = _mm_loadu_si128((__m128i *) (pi2_src + 56)); // a00 a01 a02 a03 a04 a05 a06 a07 a08 -- + // the source matrix 7th row + scalemat_r0 = + _mm_loadu_si128((__m128i *) (pu2_iscal_mat + 56)); // b00 b01 b02 b03 b04 b05 b06 b07 b08 + // -- the scaling matrix 7th row + dequant_r0 = _mm_loadu_si128((__m128i *) (&pu2_weigh_mat[56])); // q0 q1 q2 q3 q4 q5 + // q6 q7 -- all 16 bits + src_r0_1 = _mm_unpacklo_epi16(src_r0, zero_8x16b); // a00 0 a01 0 a02 0 a03 0 -- 16 bit long + src_r0_2 = _mm_unpackhi_epi16(src_r0, zero_8x16b); // a04 0 a05 0 a06 0 a07 0 -- 16 bit long + temp10 = _mm_mullo_epi16(scalemat_r0, + dequant_r0); // b00*q0 b01*q1 b02*q2 b03*q3 b04*q4 + // b05*q5 b06*q6 b07*q7 -- 16 bit result + scalemat_r0_1 = + _mm_unpacklo_epi16(temp10, + zero_8x16b); // b00*q0 0 b01*q1 0 b02*q2 0 b03*q3 0 -- 16 bit long + scalemat_r0_2 = + _mm_unpackhi_epi16(temp10, + zero_8x16b); // b04*q4 0 b05*q5 0 b06*q6 0 b07*q7 0 -- 16 bit long + temp5 = _mm_madd_epi16(src_r0_1, + scalemat_r0_1); // a00*b00*q0 a01*b01*q1 a02*b02*q2 + // a03*b03*q3 -- 32 bits long + temp7 = _mm_madd_epi16(src_r0_2, + scalemat_r0_2); // a04*b04*q4 a05*b05*q5 a06*b06*q6 + // a07*b07*q7 -- 32 bits long + if(u4_qp_div_6 >= 6) + { + resq_r7_1 = _mm_slli_epi32(temp5, u4_qp_div_6 - 6); + resq_r7_2 = _mm_slli_epi32(temp7, u4_qp_div_6 - 6); + } + else + { + temp5 = _mm_add_epi32(temp5, add_rshift); + temp7 = _mm_add_epi32(temp7, add_rshift); + resq_r7_1 = _mm_srai_epi32(temp5, 6 - u4_qp_div_6); + resq_r7_2 = _mm_srai_epi32(temp7, 6 - u4_qp_div_6); + } + resq_r7_1 = + _mm_packs_epi32(resq_r7_1, + resq_r7_2); // a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 a04*b04*q4 + // a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 16 bit long + /* Perform Inverse transform */ + /*--------------------------------------------------------------------*/ + /* IDCT [ Horizontal transformation ] */ + /*--------------------------------------------------------------------*/ + // Matrix transpose + /* + * a0 a1 a2 a3 a4 a5 a6 a7 + * b0 b1 b2 b3 b4 b5 b6 b7 + * c0 c1 c2 c3 c4 c5 c6 c7 + * d0 d1 d2 d3 d4 d5 d6 d7 + */ + temp1 = _mm_unpacklo_epi16(resq_r0_1, resq_r1_1); // a0 b0 a1 b1 a2 b2 a3 b3 + temp3 = _mm_unpacklo_epi16(resq_r2_1, resq_r3_1); // c0 d0 c1 d1 c2 d2 c3 d3 + temp2 = _mm_unpackhi_epi16(resq_r0_1, resq_r1_1); // a4 b4 a5 b5 a6 b6 a7 b7 + temp4 = _mm_unpackhi_epi16(resq_r2_1, resq_r3_1); // c4 d4 c5 d5 c6 d6 c7 d7 + resq_r0_1 = _mm_unpacklo_epi32(temp1, temp3); // a0 b0 c0 d0 a1 b1 c1 d1 + resq_r1_1 = _mm_unpackhi_epi32(temp1, temp3); // a2 b2 c2 d2 a3 b3 c3 d3 + resq_r2_1 = _mm_unpacklo_epi32(temp2, temp4); // a4 b4 c4 d4 a5 b5 c5 d5 + resq_r3_1 = _mm_unpackhi_epi32(temp2, temp4); // a6 b6 c6 d6 a7 b7 c7 d7 + /* + * e0 e1 e2 e3 e4 e5 e6 e7 + * f0 f1 f2 f3 f4 f5 f6 f7 + * g0 g1 g2 g3 g4 g5 g6 g7 + * h0 h1 h2 h3 h4 h5 h6 h7 + */ + temp1 = _mm_unpacklo_epi16(resq_r4_1, resq_r5_1); // e0 f0 e1 f1 e2 f2 e2 f3 + temp3 = _mm_unpacklo_epi16(resq_r6_1, resq_r7_1); // g0 h0 g1 h1 g2 h2 g3 h3 + temp2 = _mm_unpackhi_epi16(resq_r4_1, resq_r5_1); // e4 f4 e5 f5 e6 f6 e7 f7 + temp4 = _mm_unpackhi_epi16(resq_r6_1, resq_r7_1); // g4 h4 g5 h5 g6 h6 g7 h7 + resq_r4_1 = _mm_unpacklo_epi32(temp1, temp3); // e0 f0 g0 h0 e1 f1 g1 h1 + resq_r5_1 = _mm_unpackhi_epi32(temp1, temp3); // e2 f2 g2 h2 e3 f3 g3 h3 + resq_r6_1 = _mm_unpacklo_epi32(temp2, temp4); // e4 f4 g4 h4 e5 f5 g5 h5 + resq_r7_1 = _mm_unpackhi_epi32(temp2, temp4); // e6 f6 g6 h6 e7 f7 g7 h7 + /* + * a0 b0 c0 d0 a1 b1 c1 d1 + * a2 b2 c2 d2 a3 b3 c3 d3 + * a4 b4 c4 d4 a5 b5 c5 d5 + * a6 b6 c6 d6 a7 b7 c7 d7 + * e0 f0 g0 h0 e1 f1 g1 h1 + * e2 f2 g2 h2 e3 f3 g3 h3 + * e4 f4 g4 h4 e5 f5 g5 h5 + * e6 f6 g6 h6 e7 f7 g7 h7 + */ + resq_r0_2 = _mm_unpacklo_epi64(resq_r0_1, resq_r4_1); // a0 b0 c0 d0 e0 f0 g0 h0 + resq_r1_2 = _mm_unpackhi_epi64(resq_r0_1, resq_r4_1); // a1 b1 c1 d1 e1 f1 g1 h1 + resq_r2_2 = _mm_unpacklo_epi64(resq_r1_1, resq_r5_1); // a2 b2 c2 d2 e2 f2 g2 h2 + resq_r3_2 = _mm_unpackhi_epi64(resq_r1_1, resq_r5_1); // a3 b3 c3 d3 e3 f3 g3 h3 + resq_r4_2 = _mm_unpacklo_epi64(resq_r2_1, resq_r6_1); // a4 b4 c4 d4 e4 f4 g4 h4 + resq_r5_2 = _mm_unpackhi_epi64(resq_r2_1, resq_r6_1); // a5 b5 c5 d5 e5 f5 g5 h5 + resq_r6_2 = _mm_unpacklo_epi64(resq_r3_1, resq_r7_1); // a6 b6 c6 d6 e6 f6 g6 h6 + resq_r7_2 = _mm_unpackhi_epi64(resq_r3_1, resq_r7_1); // a7 b7 c7 d7 e7 f7 g7 h7 + + sign_reg = _mm_cmpgt_epi16(zero_8x16b, resq_r1_2); + resq_r1_1 = _mm_unpacklo_epi16(resq_r1_2, sign_reg); // a1 b1 c1 d1 -- 32 bit + resq_r1_2 = _mm_unpackhi_epi16(resq_r1_2, sign_reg); // e1 f1 g1 h1 -- 32 bit + sign_reg = _mm_cmpgt_epi16(zero_8x16b, resq_r3_2); + resq_r3_1 = _mm_unpacklo_epi16(resq_r3_2, sign_reg); // a3 b3 c3 d3 -- 32 bit + resq_r3_2 = _mm_unpackhi_epi16(resq_r3_2, sign_reg); // e3 f3 g3 h3 -- 32 bit + sign_reg = _mm_cmpgt_epi16(zero_8x16b, resq_r5_2); + resq_r5_1 = _mm_unpacklo_epi16(resq_r5_2, sign_reg); // a5 b5 c5 d5 -- 32 bit + resq_r5_2 = _mm_unpackhi_epi16(resq_r5_2, sign_reg); // e5 f5 g5 h5 -- 32 bit + sign_reg = _mm_cmpgt_epi16(zero_8x16b, resq_r7_2); + resq_r7_1 = _mm_unpacklo_epi16(resq_r7_2, sign_reg); // a7 b7 c7 d7 -- 32 bit + resq_r7_2 = _mm_unpackhi_epi16(resq_r7_2, sign_reg); // e7 f7 g7 h7 -- 32 bit + // Transform starts -- horizontal transform + /*------------------------------------------------------------------*/ + /* y0 = w0 + w4 */ + temp1 = _mm_add_epi16(resq_r0_2, resq_r4_2); + /* y2 = w0 - w4 */ + temp3 = _mm_sub_epi16(resq_r0_2, resq_r4_2); + /* y1 = -w3 + w5 - w7 - (w7 >> 1) */ + temp2 = _mm_sub_epi32(resq_r5_1, resq_r3_1); //-w3+w5 + temp10 = _mm_sub_epi32(resq_r5_2, resq_r3_2); + temp4 = _mm_sub_epi32(temp2, resq_r7_1); //-w3+w5-w7 + temp12 = _mm_sub_epi32(temp10, resq_r7_2); + temp5 = _mm_srai_epi32(resq_r7_1, 1); // w7>>1 + temp13 = _mm_srai_epi32(resq_r7_2, 1); + temp2 = _mm_sub_epi32(temp4, temp5); //-w3+w5-w7 -(w7>>1) + temp10 = _mm_sub_epi32(temp12, temp13); + temp2 = _mm_packs_epi32(temp2, temp10); + /* y3 = w1 + w7 - w3 - (w3 >> 1) */ + temp4 = _mm_add_epi32(resq_r1_1, resq_r7_1); // w1+w7 + temp12 = _mm_add_epi32(resq_r1_2, resq_r7_2); + temp4 = _mm_sub_epi32(temp4, resq_r3_1); // w1+w7-w3 + temp12 = _mm_sub_epi32(temp12, resq_r3_2); + temp5 = _mm_srai_epi32(resq_r3_1, 1); // w3>>1 + temp13 = _mm_srai_epi32(resq_r3_2, 1); + temp4 = _mm_sub_epi32(temp4, temp5); // w1+w7-w3-(w3>>1) + temp12 = _mm_sub_epi32(temp12, temp13); + temp4 = _mm_packs_epi32(temp4, temp12); + /* y4 = (w2 >> 1) - w6 */ + temp5 = _mm_srai_epi16(resq_r2_2, 1); // w2>>1 + temp5 = _mm_sub_epi16(temp5, resq_r6_2); //(w2>>1)-w6 + /* y5 = -w1 + w7 + w5 + (w5 >> 1) */ + temp6 = _mm_sub_epi32(resq_r7_1, resq_r1_1); // w7-w1 + temp14 = _mm_sub_epi32(resq_r7_2, resq_r1_2); + temp6 = _mm_add_epi32(temp6, resq_r5_1); // w7-w1+w5 + temp14 = _mm_add_epi32(temp14, resq_r5_2); + temp7 = _mm_srai_epi32(resq_r5_1, 1); // w5>>1 + temp15 = _mm_srai_epi32(resq_r5_2, 1); + temp6 = _mm_add_epi32(temp6, temp7); // w7-w1_w5+(w5>>1) + temp14 = _mm_add_epi32(temp14, temp15); + temp6 = _mm_packs_epi32(temp6, temp14); + /* y6 = w2 + (w6 >> 1) */ + temp7 = _mm_srai_epi16(resq_r6_2, 1); // w6>>1 + temp7 = _mm_add_epi16(temp7, resq_r2_2); //(w6>>1)+w2 + /* y7 = w3 + w5 + w1 + (w1 >> 1) */ + temp8 = _mm_add_epi32(resq_r3_1, resq_r5_1); // w3+w5 + temp16 = _mm_add_epi32(resq_r3_2, resq_r5_2); + temp8 = _mm_add_epi32(temp8, resq_r1_1); // w3+w5+w1 + temp16 = _mm_add_epi32(temp16, resq_r1_2); + temp17 = _mm_srai_epi32(resq_r1_1, 1); // w1>>1 + temp18 = _mm_srai_epi32(resq_r1_2, 1); + temp8 = _mm_add_epi32(temp8, temp17); // w3+w5+w1+(w1>>1) + temp16 = _mm_add_epi32(temp16, temp18); + temp8 = _mm_packs_epi32(temp8, temp16); + /*------------------------------------------------------------------*/ + /*------------------------------------------------------------------*/ + /* z0 = y0 + y6 */ + resq_r0_1 = _mm_add_epi16(temp1, temp7); + /* z1 = y1 + (y7 >> 2) */ + resq_r1_1 = _mm_srai_epi16(temp8, 2); + resq_r1_1 = _mm_add_epi16(resq_r1_1, temp2); + /* z2 = y2 + y4 */ + resq_r2_1 = _mm_add_epi16(temp3, temp5); + /* z3 = y3 + (y5 >> 2) */ + resq_r3_1 = _mm_srai_epi16(temp6, 2); + resq_r3_1 = _mm_add_epi16(resq_r3_1, temp4); + /* z4 = y2 - y4 */ + resq_r4_1 = _mm_sub_epi16(temp3, temp5); + /* z5 = (y3 >> 2) - y5 */ + resq_r5_1 = _mm_srai_epi16(temp4, 2); + resq_r5_1 = _mm_sub_epi16(resq_r5_1, temp6); + /* z6 = y0 - y6 */ + resq_r6_1 = _mm_sub_epi16(temp1, temp7); + /* z7 = y7 - (y1 >> 2) */ + resq_r7_1 = _mm_srai_epi16(temp2, 2); + resq_r7_1 = _mm_sub_epi16(temp8, resq_r7_1); + /*------------------------------------------------------------------*/ + /*------------------------------------------------------------------*/ + /* x0 = z0 + z7 */ + temp1 = _mm_add_epi16(resq_r0_1, resq_r7_1); + /* x1 = z2 + z5 */ + temp2 = _mm_add_epi16(resq_r2_1, resq_r5_1); + /* x2 = z4 + z3 */ + temp3 = _mm_add_epi16(resq_r4_1, resq_r3_1); + /* x3 = z6 + z1 */ + temp4 = _mm_add_epi16(resq_r6_1, resq_r1_1); + /* x4 = z6 - z1 */ + temp5 = _mm_sub_epi16(resq_r6_1, resq_r1_1); + /* x5 = z4 - z3 */ + temp6 = _mm_sub_epi16(resq_r4_1, resq_r3_1); + /* x6 = z2 - z5 */ + temp7 = _mm_sub_epi16(resq_r2_1, resq_r5_1); + /* x7 = z0 - z7 */ + temp8 = _mm_sub_epi16(resq_r0_1, resq_r7_1); + /*------------------------------------------------------------------*/ + // Matrix transpose + /* + * a0 b0 c0 d0 e0 f0 g0 h0 + * a1 b1 c1 d1 e1 f1 g1 h1 + * a2 b2 c2 d2 e2 f2 g2 h2 + * a3 b3 c3 d3 e3 f3 g3 h3 + */ + temp17 = _mm_unpacklo_epi16(temp1, temp2); // a0 a1 b0 b1 c0 c1 d0 d1 + temp19 = _mm_unpacklo_epi16(temp3, temp4); // a2 a3 b2 b3 c2 c3 d2 d3 + temp18 = _mm_unpackhi_epi16(temp1, temp2); // e0 e1 f0 f1 g0 g1 h0 h1 + temp20 = _mm_unpackhi_epi16(temp3, temp4); // e2 e3 f2 f3 g2 g3 h2 h3 + + resq_r0_1 = _mm_unpacklo_epi32(temp17, temp19); // a0 a1 a2 a3 b0 b1 b2 b3 + resq_r1_1 = _mm_unpackhi_epi32(temp17, temp19); // c0 c1 c2 c3 d0 d1 d2 d3 + resq_r2_1 = _mm_unpacklo_epi32(temp18, temp20); // e0 e1 e2 e3 f0 f1 f2 f3 + resq_r3_1 = _mm_unpackhi_epi32(temp18, temp20); // g0 g2 g2 g3 h0 h1 h2 h3 + /* + * a4 b4 c4 d4 e4 f4 g4 h4 + * a5 b5 c5 d5 e5 f5 g5 h5 + * a6 b6 c6 d6 e6 f6 g6 h6 + * a7 b7 c7 d7 e7 f7 g7 h7 + */ + temp17 = _mm_unpacklo_epi16(temp5, temp6); // a4 a5 b4 b5 c4 c5 d4 d5 + temp19 = _mm_unpacklo_epi16(temp7, temp8); // a6 a7 b6 b7 c6 c7 d6 d7 + temp18 = _mm_unpackhi_epi16(temp5, temp6); // e4 e5 f4 f5 g4 g5 h4 h5 + temp20 = _mm_unpackhi_epi16(temp7, temp8); // e6 e7 f6 f7 g6 g7 h6 h7 + + resq_r4_1 = _mm_unpacklo_epi32(temp17, temp19); // a4 a5 a6 a7 b4 b5 b6 b7 + resq_r5_1 = _mm_unpackhi_epi32(temp17, temp19); // c4 c5 c6 c7 d4 d5 d6 d7 + resq_r6_1 = _mm_unpacklo_epi32(temp18, temp20); // e4 e5 e6 e7 f4 f5 f6 f7 + resq_r7_1 = _mm_unpackhi_epi32(temp18, temp20); // g4 g5 g6 g7 h4 h5 h6 h7 + /* a0 a1 a2 a3 b0 b1 b2 b3 + * c0 c1 c2 c3 d0 d1 d2 d3 + * e0 e1 e2 e3 f0 f1 f2 f3 + * g0 g2 g2 g3 h0 h1 h2 h3 + * a4 a5 a6 a7 b4 b5 b6 b7 + * c4 c5 c6 c7 d4 d5 d6 d7 + * e4 e5 e6 e7 f4 f5 f6 f7 + * g4 g5 g6 g7 h4 h5 h6 h7 + */ + resq_r0_2 = _mm_unpacklo_epi64(resq_r0_1, resq_r4_1); // a0 a1 a2 a3 a4 a5 a6 a7 + resq_r1_2 = _mm_unpackhi_epi64(resq_r0_1, resq_r4_1); // b0 b1 b2 b3 b4 b5 b6 b7 + resq_r2_2 = _mm_unpacklo_epi64(resq_r1_1, resq_r5_1); // c0 c1 c2 c3 c4 c5 c6 c7 + resq_r3_2 = _mm_unpackhi_epi64(resq_r1_1, resq_r5_1); // d0 d1 d2 d3 d4 d5 d6 d7 + resq_r4_2 = _mm_unpacklo_epi64(resq_r2_1, resq_r6_1); // e0 e1 e2 e3 e4 e5 e6 e7 + resq_r5_2 = _mm_unpackhi_epi64(resq_r2_1, resq_r6_1); // f0 f1 f2 f3 f4 f5 f6 f7 + resq_r6_2 = _mm_unpacklo_epi64(resq_r3_1, resq_r7_1); // g0 g1 g2 g3 g4 g5 g6 g7 + resq_r7_2 = _mm_unpackhi_epi64(resq_r3_1, resq_r7_1); // h0 h1 h2 h3 h4 h5 h6 h7 + + sign_reg = _mm_cmpgt_epi16(zero_8x16b, resq_r1_2); + resq_r1_1 = _mm_unpacklo_epi16(resq_r1_2, sign_reg); // a1 b1 c1 d1 -- 32 bit + resq_r1_2 = _mm_unpackhi_epi16(resq_r1_2, sign_reg); // e1 f1 g1 h1 -- 32 bit + sign_reg = _mm_cmpgt_epi16(zero_8x16b, resq_r3_2); + resq_r3_1 = _mm_unpacklo_epi16(resq_r3_2, sign_reg); // a3 b3 c3 d3 -- 32 bit + resq_r3_2 = _mm_unpackhi_epi16(resq_r3_2, sign_reg); // e3 f3 g3 h3 -- 32 bit + sign_reg = _mm_cmpgt_epi16(zero_8x16b, resq_r5_2); + resq_r5_1 = _mm_unpacklo_epi16(resq_r5_2, sign_reg); // a5 b5 c5 d5 -- 32 bit + resq_r5_2 = _mm_unpackhi_epi16(resq_r5_2, sign_reg); // e5 f5 g5 h5 -- 32 bit + sign_reg = _mm_cmpgt_epi16(zero_8x16b, resq_r7_2); + resq_r7_1 = _mm_unpacklo_epi16(resq_r7_2, sign_reg); // a7 b7 c7 d7 -- 32 bit + resq_r7_2 = _mm_unpackhi_epi16(resq_r7_2, sign_reg); // e7 f7 g7 h7 -- 32 bit + + zero_8x16b = _mm_setzero_si128(); // all bits reset to zero + // Load pred buffer row 0 + predload_r = + _mm_loadl_epi64((__m128i *) (&pu1_pred[0])); // p0 p1 p2 p3 p4 p5 p6 p7 0 0 0 0 0 0 0 0 + // -- all 8 bits + pred_r0_1 = + _mm_unpacklo_epi8(predload_r, zero_8x16b); // p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits + // Load pred buffer row 1 + predload_r = + _mm_loadl_epi64((__m128i *) (&pu1_pred[i4_pred_stride])); // p0 p1 p2 p3 p4 p5 p6 p7 0 0 + // 0 0 0 0 0 0 -- all 8 bits + pred_r1_1 = + _mm_unpacklo_epi8(predload_r, zero_8x16b); // p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits + // Load pred buffer row 2 + predload_r = _mm_loadl_epi64( + (__m128i *) (&pu1_pred[2 * i4_pred_stride])); // p0 p1 p2 p3 p4 p5 p6 p7 0 0 + // 0 0 0 0 0 0 -- all 8 bits + pred_r2_1 = + _mm_unpacklo_epi8(predload_r, zero_8x16b); // p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits + // Load pred buffer row 3 + predload_r = _mm_loadl_epi64( + (__m128i *) (&pu1_pred[3 * i4_pred_stride])); // p0 p1 p2 p3 p4 p5 p6 p7 0 0 + // 0 0 0 0 0 0 -- all 8 bits + pred_r3_1 = + _mm_unpacklo_epi8(predload_r, zero_8x16b); // p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits + // Load pred buffer row 4 + predload_r = _mm_loadl_epi64( + (__m128i *) (&pu1_pred[4 * i4_pred_stride])); // p0 p1 p2 p3 p4 p5 p6 p7 0 0 + // 0 0 0 0 0 0 -- all 8 bits + pred_r4_1 = + _mm_unpacklo_epi8(predload_r, zero_8x16b); // p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits + // Load pred buffer row 5 + predload_r = + _mm_loadl_epi64((__m128i *) (&pu1_pred[5 * i4_pred_stride])); // p0 p1 p2 p3 p4 p5 p6 p7 0 + // 0 0 0 0 0 0 0 -- all 8 bit + pred_r5_1 = + _mm_unpacklo_epi8(predload_r, zero_8x16b); // p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits + // Load pred buffer row 6 + predload_r = _mm_loadl_epi64( + (__m128i *) (&pu1_pred[6 * i4_pred_stride])); // p0 p1 p2 p3 p4 p5 p6 p7 0 0 + // 0 0 0 0 0 0 -- all 8 bits + pred_r6_1 = + _mm_unpacklo_epi8(predload_r, zero_8x16b); // p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits + // Load pred buffer row 7 + predload_r = _mm_loadl_epi64( + (__m128i *) (&pu1_pred[7 * i4_pred_stride])); // p0 p1 p2 p3 p4 p5 p6 p7 0 0 + // 0 0 0 0 0 0 -- all 8 bits + pred_r7_1 = + _mm_unpacklo_epi8(predload_r, zero_8x16b); // p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits + + /*--------------------------------------------------------------------*/ + /* IDCT [ Vertical transformation] and Xij = (xij + 32)>>6 */ + /* */ + /* Add the prediction and store it back to reconstructed frame buffer */ + /* [Prediction buffer itself in this case] */ + /*--------------------------------------------------------------------*/ + + /* y0j = w0j + w4j */ + temp1 = _mm_add_epi16(resq_r0_2, resq_r4_2); + /* y2j = w0j - w4j */ + temp3 = _mm_sub_epi16(resq_r0_2, resq_r4_2); + /* y1j = -w3j + w5j - w7j - (w7j >> 1) */ + temp2 = _mm_sub_epi32(resq_r5_1, resq_r3_1); //-w3+w5 + temp10 = _mm_sub_epi32(resq_r5_2, resq_r3_2); + temp4 = _mm_sub_epi32(temp2, resq_r7_1); //-w3+w5-w7 + temp12 = _mm_sub_epi32(temp10, resq_r7_2); + temp5 = _mm_srai_epi32(resq_r7_1, 1); // w7>>1 + temp13 = _mm_srai_epi32(resq_r7_2, 1); + temp2 = _mm_sub_epi32(temp4, temp5); //-w3+w5-w7 -(w7>>1) + temp10 = _mm_sub_epi32(temp12, temp13); + temp2 = _mm_packs_epi32(temp2, temp10); + /* y3j = w1j + w7j - w3j - (w3j >> 1) */ + temp4 = _mm_add_epi32(resq_r1_1, resq_r7_1); // w1+w7 + temp12 = _mm_add_epi32(resq_r1_2, resq_r7_2); + temp4 = _mm_sub_epi32(temp4, resq_r3_1); // w1+w7-w3 + temp12 = _mm_sub_epi32(temp12, resq_r3_2); + temp5 = _mm_srai_epi32(resq_r3_1, 1); // w3>>1 + temp13 = _mm_srai_epi32(resq_r3_2, 1); + temp4 = _mm_sub_epi32(temp4, temp5); // w1+w7-w3-(w3>>1) + temp12 = _mm_sub_epi32(temp12, temp13); + temp4 = _mm_packs_epi32(temp4, temp12); + /* y4j = (w2j >> 1) - w6j */ + temp5 = _mm_srai_epi16(resq_r2_2, 1); // w2>>1 + temp5 = _mm_sub_epi16(temp5, resq_r6_2); //(w2>>1)-w6 + /* y5j = -w1j + w7j + w5j + (w5j >> 1) */ + temp6 = _mm_sub_epi32(resq_r7_1, resq_r1_1); // w7-w1 + temp14 = _mm_sub_epi32(resq_r7_2, resq_r1_2); + temp6 = _mm_add_epi32(temp6, resq_r5_1); // w7-w1+w5 + temp14 = _mm_add_epi32(temp14, resq_r5_2); + temp7 = _mm_srai_epi32(resq_r5_1, 1); // w5>>1 + temp15 = _mm_srai_epi32(resq_r5_2, 1); + temp6 = _mm_add_epi32(temp6, temp7); // w7-w1_w5+(w5>>1) + temp14 = _mm_add_epi32(temp14, temp15); + temp6 = _mm_packs_epi32(temp6, temp14); + /* y6j = w2j + (w6j >> 1) */ + temp7 = _mm_srai_epi16(resq_r6_2, 1); // w6>>1 + temp7 = _mm_add_epi16(temp7, resq_r2_2); //(w6>>1)+w2 + /* y7j = w3j + w5j + w1j + (w1j >> 1) */ + temp8 = _mm_add_epi32(resq_r3_1, resq_r5_1); // w3+w5 + temp16 = _mm_add_epi32(resq_r3_2, resq_r5_2); + temp8 = _mm_add_epi32(temp8, resq_r1_1); // w3+w5+w1 + temp16 = _mm_add_epi32(temp16, resq_r1_2); + temp17 = _mm_srai_epi32(resq_r1_1, 1); // w1>>1 + temp18 = _mm_srai_epi32(resq_r1_2, 1); + temp8 = _mm_add_epi32(temp8, temp17); // w3+w5+w1+(w1>>1) + temp16 = _mm_add_epi32(temp16, temp18); + temp8 = _mm_packs_epi32(temp8, temp16); + /*------------------------------------------------------------------*/ + /*------------------------------------------------------------------*/ + /* z0j = y0j + y6j */ + resq_r0_1 = _mm_add_epi16(temp1, temp7); + /* z1j = y1j + (y7j >> 2) */ + resq_r1_1 = _mm_srai_epi16(temp8, 2); + resq_r1_1 = _mm_add_epi16(resq_r1_1, temp2); + /* z2j = y2j + y4j */ + resq_r2_1 = _mm_add_epi16(temp3, temp5); + /* z3j = y3j + (y5j >> 2) */ + resq_r3_1 = _mm_srai_epi16(temp6, 2); + resq_r3_1 = _mm_add_epi16(resq_r3_1, temp4); + /* z4j = y2j - y4j */ + resq_r4_1 = _mm_sub_epi16(temp3, temp5); + /* z5j = (y3j >> 2) - y5j */ + resq_r5_1 = _mm_srai_epi16(temp4, 2); + resq_r5_1 = _mm_sub_epi16(resq_r5_1, temp6); + /* z6j = y0j - y6j */ + resq_r6_1 = _mm_sub_epi16(temp1, temp7); + /* z7j = y7j - (y1j >> 2) */ + resq_r7_1 = _mm_srai_epi16(temp2, 2); + resq_r7_1 = _mm_sub_epi16(temp8, resq_r7_1); + /*------------------------------------------------------------------*/ + + /*------------------------------------------------------------------*/ + /* x0j = z0j + z7j */ + temp1 = _mm_add_epi16(resq_r0_1, resq_r7_1); + sign_reg = _mm_cmpgt_epi16(zero_8x16b, temp1); + temp10 = _mm_unpacklo_epi16(temp1, sign_reg); + temp11 = _mm_unpackhi_epi16(temp1, sign_reg); + temp10 = _mm_add_epi32(temp10, value_32); + temp11 = _mm_add_epi32(temp11, value_32); + temp10 = _mm_srai_epi32(temp10, 6); + temp11 = _mm_srai_epi32(temp11, 6); + temp10 = _mm_packs_epi32(temp10, temp11); + temp1 = _mm_add_epi16(temp10, pred_r0_1); + /* x1j = z2j + z5j */ + temp2 = _mm_add_epi16(resq_r2_1, resq_r5_1); + sign_reg = _mm_cmpgt_epi16(zero_8x16b, temp2); + temp10 = _mm_unpacklo_epi16(temp2, sign_reg); + temp11 = _mm_unpackhi_epi16(temp2, sign_reg); + temp10 = _mm_add_epi32(temp10, value_32); + temp11 = _mm_add_epi32(temp11, value_32); + temp10 = _mm_srai_epi32(temp10, 6); + temp11 = _mm_srai_epi32(temp11, 6); + temp10 = _mm_packs_epi32(temp10, temp11); + temp2 = _mm_add_epi16(temp10, pred_r1_1); + /* x2j = z4j + z3j */ + temp3 = _mm_add_epi16(resq_r4_1, resq_r3_1); + sign_reg = _mm_cmpgt_epi16(zero_8x16b, temp3); + temp10 = _mm_unpacklo_epi16(temp3, sign_reg); + temp11 = _mm_unpackhi_epi16(temp3, sign_reg); + temp10 = _mm_add_epi32(temp10, value_32); + temp11 = _mm_add_epi32(temp11, value_32); + temp10 = _mm_srai_epi32(temp10, 6); + temp11 = _mm_srai_epi32(temp11, 6); + temp10 = _mm_packs_epi32(temp10, temp11); + temp3 = _mm_add_epi16(temp10, pred_r2_1); + /* x3j = z6j + z1j */ + temp4 = _mm_add_epi16(resq_r6_1, resq_r1_1); + sign_reg = _mm_cmpgt_epi16(zero_8x16b, temp4); + temp10 = _mm_unpacklo_epi16(temp4, sign_reg); + temp11 = _mm_unpackhi_epi16(temp4, sign_reg); + temp10 = _mm_add_epi32(temp10, value_32); + temp11 = _mm_add_epi32(temp11, value_32); + temp10 = _mm_srai_epi32(temp10, 6); + temp11 = _mm_srai_epi32(temp11, 6); + temp10 = _mm_packs_epi32(temp10, temp11); + temp4 = _mm_add_epi16(temp10, pred_r3_1); + /* x4j = z6j - z1j */ + temp5 = _mm_sub_epi16(resq_r6_1, resq_r1_1); + sign_reg = _mm_cmpgt_epi16(zero_8x16b, temp5); + temp10 = _mm_unpacklo_epi16(temp5, sign_reg); + temp11 = _mm_unpackhi_epi16(temp5, sign_reg); + temp10 = _mm_add_epi32(temp10, value_32); + temp11 = _mm_add_epi32(temp11, value_32); + temp10 = _mm_srai_epi32(temp10, 6); + temp11 = _mm_srai_epi32(temp11, 6); + temp10 = _mm_packs_epi32(temp10, temp11); + temp5 = _mm_add_epi16(temp10, pred_r4_1); + /* x5j = z4j - z3j */ + temp6 = _mm_sub_epi16(resq_r4_1, resq_r3_1); + sign_reg = _mm_cmpgt_epi16(zero_8x16b, temp6); + temp10 = _mm_unpacklo_epi16(temp6, sign_reg); + temp11 = _mm_unpackhi_epi16(temp6, sign_reg); + temp10 = _mm_add_epi32(temp10, value_32); + temp11 = _mm_add_epi32(temp11, value_32); + temp10 = _mm_srai_epi32(temp10, 6); + temp11 = _mm_srai_epi32(temp11, 6); + temp10 = _mm_packs_epi32(temp10, temp11); + temp6 = _mm_add_epi16(temp10, pred_r5_1); + /* x6j = z2j - z5j */ + temp7 = _mm_sub_epi16(resq_r2_1, resq_r5_1); + sign_reg = _mm_cmpgt_epi16(zero_8x16b, temp7); + temp10 = _mm_unpacklo_epi16(temp7, sign_reg); + temp11 = _mm_unpackhi_epi16(temp7, sign_reg); + temp10 = _mm_add_epi32(temp10, value_32); + temp11 = _mm_add_epi32(temp11, value_32); + temp10 = _mm_srai_epi32(temp10, 6); + temp11 = _mm_srai_epi32(temp11, 6); + temp10 = _mm_packs_epi32(temp10, temp11); + temp7 = _mm_add_epi16(temp10, pred_r6_1); + /* x7j = z0j - z7j */ + temp8 = _mm_sub_epi16(resq_r0_1, resq_r7_1); + sign_reg = _mm_cmpgt_epi16(zero_8x16b, temp8); + temp10 = _mm_unpacklo_epi16(temp8, sign_reg); + temp11 = _mm_unpackhi_epi16(temp8, sign_reg); + temp10 = _mm_add_epi32(temp10, value_32); + temp11 = _mm_add_epi32(temp11, value_32); + temp10 = _mm_srai_epi32(temp10, 6); + temp11 = _mm_srai_epi32(temp11, 6); + temp10 = _mm_packs_epi32(temp10, temp11); + temp8 = _mm_add_epi16(temp10, pred_r7_1); + /*------------------------------------------------------------------*/ + // Clipping the results to 8 bits + sign_reg = _mm_cmpgt_epi16(temp1, zero_8x16b); // sign check + temp1 = _mm_and_si128(temp1, sign_reg); + sign_reg = _mm_cmpgt_epi16(temp2, zero_8x16b); // sign check + temp2 = _mm_and_si128(temp2, sign_reg); + sign_reg = _mm_cmpgt_epi16(temp3, zero_8x16b); // sign check + temp3 = _mm_and_si128(temp3, sign_reg); + sign_reg = _mm_cmpgt_epi16(temp4, zero_8x16b); // sign check + temp4 = _mm_and_si128(temp4, sign_reg); + sign_reg = _mm_cmpgt_epi16(temp5, zero_8x16b); // sign check + temp5 = _mm_and_si128(temp5, sign_reg); + sign_reg = _mm_cmpgt_epi16(temp6, zero_8x16b); // sign check + temp6 = _mm_and_si128(temp6, sign_reg); + sign_reg = _mm_cmpgt_epi16(temp7, zero_8x16b); // sign check + temp7 = _mm_and_si128(temp7, sign_reg); + sign_reg = _mm_cmpgt_epi16(temp8, zero_8x16b); // sign check + temp8 = _mm_and_si128(temp8, sign_reg); + + resq_r0_2 = _mm_packus_epi16(temp1, zero_8x16b); + resq_r1_2 = _mm_packus_epi16(temp2, zero_8x16b); + resq_r2_2 = _mm_packus_epi16(temp3, zero_8x16b); + resq_r3_2 = _mm_packus_epi16(temp4, zero_8x16b); + resq_r4_2 = _mm_packus_epi16(temp5, zero_8x16b); + resq_r5_2 = _mm_packus_epi16(temp6, zero_8x16b); + resq_r6_2 = _mm_packus_epi16(temp7, zero_8x16b); + resq_r7_2 = _mm_packus_epi16(temp8, zero_8x16b); + + _mm_storel_epi64((__m128i *) (&pu1_out[0]), resq_r0_2); + _mm_storel_epi64((__m128i *) (&pu1_out[i4_out_stride]), resq_r1_2); + _mm_storel_epi64((__m128i *) (&pu1_out[2 * i4_out_stride]), resq_r2_2); + _mm_storel_epi64((__m128i *) (&pu1_out[3 * i4_out_stride]), resq_r3_2); + _mm_storel_epi64((__m128i *) (&pu1_out[4 * i4_out_stride]), resq_r4_2); + _mm_storel_epi64((__m128i *) (&pu1_out[5 * i4_out_stride]), resq_r5_2); + _mm_storel_epi64((__m128i *) (&pu1_out[6 * i4_out_stride]), resq_r6_2); + _mm_storel_epi64((__m128i *) (&pu1_out[7 * i4_out_stride]), resq_r7_2); +} diff --git a/common/x86/svc/isvc_mem_fns_sse42.c b/common/x86/svc/isvc_mem_fns_sse42.c new file mode 100644 index 0000000..cfcd249 --- /dev/null +++ b/common/x86/svc/isvc_mem_fns_sse42.c @@ -0,0 +1,157 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ +/** + + * ******************************************************************************* + + * * @file + * isvc_mem_fns_sse42.c + * + * @brief + * SSE4.2 variants of + * functions used for memory operations + * + + * ******************************************************************************* + + */ +#include +#include + +#include "ih264_typedefs.h" +#include "isvc_mem_fns.h" + +void isvc_memset_2d_sse42(UWORD8 *pu1_dst, WORD32 i4_dst_stride, UWORD8 u1_val, WORD32 i4_blk_wd, + WORD32 i4_blk_ht) +{ + WORD32 i, j; + + if((i4_blk_wd == 4) && (i4_blk_ht == 4)) + { + *((WORD32 *) (pu1_dst)) = _mm_cvtsi128_si32(_mm_set1_epi8(u1_val)); + pu1_dst += i4_dst_stride; + + *((WORD32 *) (pu1_dst)) = _mm_cvtsi128_si32(_mm_set1_epi8(u1_val)); + pu1_dst += i4_dst_stride; + + *((WORD32 *) (pu1_dst)) = _mm_cvtsi128_si32(_mm_set1_epi8(u1_val)); + pu1_dst += i4_dst_stride; + + *((WORD32 *) (pu1_dst)) = _mm_cvtsi128_si32(_mm_set1_epi8(u1_val)); + } + else if((i4_blk_wd == 8) && (i4_blk_ht == 8)) + { + _mm_storel_epi64((__m128i *) (&pu1_dst[0]), _mm_set1_epi8(u1_val)); + pu1_dst += i4_dst_stride; + + _mm_storel_epi64((__m128i *) (&pu1_dst[0]), _mm_set1_epi8(u1_val)); + pu1_dst += i4_dst_stride; + + _mm_storel_epi64((__m128i *) (&pu1_dst[0]), _mm_set1_epi8(u1_val)); + pu1_dst += i4_dst_stride; + + _mm_storel_epi64((__m128i *) (&pu1_dst[0]), _mm_set1_epi8(u1_val)); + pu1_dst += i4_dst_stride; + + _mm_storel_epi64((__m128i *) (&pu1_dst[0]), _mm_set1_epi8(u1_val)); + pu1_dst += i4_dst_stride; + + _mm_storel_epi64((__m128i *) (&pu1_dst[0]), _mm_set1_epi8(u1_val)); + pu1_dst += i4_dst_stride; + + _mm_storel_epi64((__m128i *) (&pu1_dst[0]), _mm_set1_epi8(u1_val)); + pu1_dst += i4_dst_stride; + + _mm_storel_epi64((__m128i *) (&pu1_dst[0]), _mm_set1_epi8(u1_val)); + } + else if((i4_blk_wd % 16 == 0) && (i4_blk_ht % 16 == 0)) + { + UWORD8 *pu1_dst_col_ptr, *pu1_dst_row_ptr; + + WORD32 i4_width_by_16 = i4_blk_wd / 16; + WORD32 i4_height_by_16 = i4_blk_ht / 16; + + for(i = 0; i < i4_height_by_16; i++) + { + pu1_dst_row_ptr = pu1_dst + i * 16 * i4_dst_stride; + + for(j = 0; j < i4_width_by_16; j++) + { + pu1_dst_col_ptr = pu1_dst_row_ptr + (j << 4); + + _mm_storeu_si128((__m128i *) (&pu1_dst_col_ptr[0]), _mm_set1_epi8(u1_val)); + pu1_dst_col_ptr += i4_dst_stride; + + _mm_storeu_si128((__m128i *) (&pu1_dst_col_ptr[0]), _mm_set1_epi8(u1_val)); + pu1_dst_col_ptr += i4_dst_stride; + + _mm_storeu_si128((__m128i *) (&pu1_dst_col_ptr[0]), _mm_set1_epi8(u1_val)); + pu1_dst_col_ptr += i4_dst_stride; + + _mm_storeu_si128((__m128i *) (&pu1_dst_col_ptr[0]), _mm_set1_epi8(u1_val)); + pu1_dst_col_ptr += i4_dst_stride; + + _mm_storeu_si128((__m128i *) (&pu1_dst_col_ptr[0]), _mm_set1_epi8(u1_val)); + pu1_dst_col_ptr += i4_dst_stride; + + _mm_storeu_si128((__m128i *) (&pu1_dst_col_ptr[0]), _mm_set1_epi8(u1_val)); + pu1_dst_col_ptr += i4_dst_stride; + + _mm_storeu_si128((__m128i *) (&pu1_dst_col_ptr[0]), _mm_set1_epi8(u1_val)); + pu1_dst_col_ptr += i4_dst_stride; + + _mm_storeu_si128((__m128i *) (&pu1_dst_col_ptr[0]), _mm_set1_epi8(u1_val)); + pu1_dst_col_ptr += i4_dst_stride; + + _mm_storeu_si128((__m128i *) (&pu1_dst_col_ptr[0]), _mm_set1_epi8(u1_val)); + pu1_dst_col_ptr += i4_dst_stride; + + _mm_storeu_si128((__m128i *) (&pu1_dst_col_ptr[0]), _mm_set1_epi8(u1_val)); + pu1_dst_col_ptr += i4_dst_stride; + + _mm_storeu_si128((__m128i *) (&pu1_dst_col_ptr[0]), _mm_set1_epi8(u1_val)); + pu1_dst_col_ptr += i4_dst_stride; + + _mm_storeu_si128((__m128i *) (&pu1_dst_col_ptr[0]), _mm_set1_epi8(u1_val)); + pu1_dst_col_ptr += i4_dst_stride; + + _mm_storeu_si128((__m128i *) (&pu1_dst_col_ptr[0]), _mm_set1_epi8(u1_val)); + pu1_dst_col_ptr += i4_dst_stride; + + _mm_storeu_si128((__m128i *) (&pu1_dst_col_ptr[0]), _mm_set1_epi8(u1_val)); + pu1_dst_col_ptr += i4_dst_stride; + + _mm_storeu_si128((__m128i *) (&pu1_dst_col_ptr[0]), _mm_set1_epi8(u1_val)); + pu1_dst_col_ptr += i4_dst_stride; + + _mm_storeu_si128((__m128i *) (&pu1_dst_col_ptr[0]), _mm_set1_epi8(u1_val)); + } + } + } + else + { + for(i = 0; i < i4_blk_ht; i++) + { + memset(pu1_dst, u1_val, i4_blk_wd); + + pu1_dst += i4_dst_stride; + } + } +} diff --git a/common/x86/svc/isvc_mem_fns_ssse3.c b/common/x86/svc/isvc_mem_fns_ssse3.c new file mode 100644 index 0000000..6467b8d --- /dev/null +++ b/common/x86/svc/isvc_mem_fns_ssse3.c @@ -0,0 +1,435 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ +/** + ******************************************************************************* + * @file + * isvc_mem_fns_atom_intr.c + * + * @brief + * Functions used for memory operations + * + * @author + * Ittiam + * + * @par List of Functions: + * + * @remarks + * None + * + ******************************************************************************* + */ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ +#include +#include +#include +#include +#include + +#include "ih264_typedefs.h" +#include "isvc_mem_fns.h" + +#include + +/** +******************************************************************************** +* @brief copies a 2d blk from one location to another +* +* @param[out] pu1_dst : dst pointer +* +* @param[in] i4_dst_stride: stride of destination +* +* @param[in] pu1_src : src ptr +* +* @param[in] i4_src_stride: stride of src +* +* @param[in] i4_blk_wd : blk width +* +* @param[in] i4_blk_ht : blk height +* +* @return void +******************************************************************************** +*/ +void isvc_copy_2d_ssse3(UWORD8 *pu1_dst, WORD32 i4_dst_stride, UWORD8 *pu1_src, + WORD32 i4_src_stride, WORD32 i4_blk_wd, WORD32 i4_blk_ht) +{ + WORD32 i, j; + /* all 128 bit registers are named with a suffix mxnb, where m is the */ + /* number of n bits packed in the register */ + + if(0 == (i4_blk_wd & 31)) /* wd multiple of 32 case */ + { + __m128i src0_16x8b, src1_16x8b, src2_16x8b, src3_16x8b; + __m128i src4_16x8b, src5_16x8b, src6_16x8b, src7_16x8b; + + if(0 == (i4_blk_ht & 7)) /* ht multiple of 8 case */ + { + __m128i src8_16x8b, src9_16x8b, src10_16x8b, src11_16x8b; + __m128i src12_16x8b, src13_16x8b, src14_16x8b, src15_16x8b; + + for(i = 0; i < i4_blk_ht; i += 8) + { + for(j = 0; j < i4_blk_wd; j += 32) + { + src0_16x8b = + _mm_loadu_si128((__m128i *) (pu1_src)); // i = 0 + src1_16x8b = + _mm_loadu_si128((__m128i *) (pu1_src + i4_src_stride)); // i = 1 + src2_16x8b = + _mm_loadu_si128((__m128i *) (pu1_src + 2 * i4_src_stride)); // i = 2 + src3_16x8b = + _mm_loadu_si128((__m128i *) (pu1_src + 3 * i4_src_stride)); // i = 3 + src4_16x8b = + _mm_loadu_si128((__m128i *) (pu1_src + 4 * i4_src_stride)); // i = 4 + src5_16x8b = + _mm_loadu_si128((__m128i *) (pu1_src + 5 * i4_src_stride)); // i = 5 + src6_16x8b = + _mm_loadu_si128((__m128i *) (pu1_src + 6 * i4_src_stride)); // i = 6 + src7_16x8b = + _mm_loadu_si128((__m128i *) (pu1_src + 7 * i4_src_stride)); // i = 7 + /* Add 16 as offset */ + src8_16x8b = + _mm_loadu_si128((__m128i *) (pu1_src + 16)); // i = 0 + src9_16x8b = + _mm_loadu_si128((__m128i *) (pu1_src + i4_src_stride + 16)); // i = 1 + src10_16x8b = + _mm_loadu_si128((__m128i *) (pu1_src + 2 * i4_src_stride + 16)); // i = 2 + src11_16x8b = + _mm_loadu_si128((__m128i *) (pu1_src + 3 * i4_src_stride + 16)); // i = 3 + src12_16x8b = + _mm_loadu_si128((__m128i *) (pu1_src + 4 * i4_src_stride + 16)); // i = 4 + src13_16x8b = + _mm_loadu_si128((__m128i *) (pu1_src + 5 * i4_src_stride + 16)); // i = 5 + src14_16x8b = + _mm_loadu_si128((__m128i *) (pu1_src + 6 * i4_src_stride + 16)); // i = 6 + src15_16x8b = + _mm_loadu_si128((__m128i *) (pu1_src + 7 * i4_src_stride + 16)); // i = 7 + + _mm_storeu_si128((__m128i *) (pu1_dst), src0_16x8b); + _mm_storeu_si128((__m128i *) (pu1_dst + i4_dst_stride), src1_16x8b); + _mm_storeu_si128((__m128i *) (pu1_dst + 2 * i4_dst_stride), src2_16x8b); + _mm_storeu_si128((__m128i *) (pu1_dst + 3 * i4_dst_stride), src3_16x8b); + _mm_storeu_si128((__m128i *) (pu1_dst + 4 * i4_dst_stride), src4_16x8b); + _mm_storeu_si128((__m128i *) (pu1_dst + 5 * i4_dst_stride), src5_16x8b); + _mm_storeu_si128((__m128i *) (pu1_dst + 6 * i4_dst_stride), src6_16x8b); + _mm_storeu_si128((__m128i *) (pu1_dst + 7 * i4_dst_stride), src7_16x8b); + + _mm_storeu_si128((__m128i *) (pu1_dst + 16), src8_16x8b); + _mm_storeu_si128((__m128i *) (pu1_dst + i4_dst_stride + 16), src9_16x8b); + _mm_storeu_si128((__m128i *) (pu1_dst + 2 * i4_dst_stride + 16), src10_16x8b); + _mm_storeu_si128((__m128i *) (pu1_dst + 3 * i4_dst_stride + 16), src11_16x8b); + _mm_storeu_si128((__m128i *) (pu1_dst + 4 * i4_dst_stride + 16), src12_16x8b); + _mm_storeu_si128((__m128i *) (pu1_dst + 5 * i4_dst_stride + 16), src13_16x8b); + _mm_storeu_si128((__m128i *) (pu1_dst + 6 * i4_dst_stride + 16), src14_16x8b); + _mm_storeu_si128((__m128i *) (pu1_dst + 7 * i4_dst_stride + 16), src15_16x8b); + + pu1_src += 32; + pu1_dst += 32; + } + + pu1_src = pu1_src - i4_blk_wd + 8 * i4_src_stride; + pu1_dst = pu1_dst - i4_blk_wd + 8 * i4_dst_stride; + } + } + else /* ht multiple of 4 case */ + { + for(i = 0; i < i4_blk_ht; i += 4) + { + for(j = 0; j < i4_blk_wd; j += 32) + { + src0_16x8b = + _mm_loadu_si128((__m128i *) (pu1_src)); // i = 0 + src1_16x8b = + _mm_loadu_si128((__m128i *) (pu1_src + i4_src_stride)); // i = 1 + src2_16x8b = + _mm_loadu_si128((__m128i *) (pu1_src + 2 * i4_src_stride)); // i = 2 + src3_16x8b = + _mm_loadu_si128((__m128i *) (pu1_src + 3 * i4_src_stride)); // i = 3 + /* Add 16 as offset */ + src4_16x8b = + _mm_loadu_si128((__m128i *) (pu1_src + 16)); // i = 0 + src5_16x8b = + _mm_loadu_si128((__m128i *) (pu1_src + i4_src_stride + 16)); // i = 1 + src6_16x8b = + _mm_loadu_si128((__m128i *) (pu1_src + 2 * i4_src_stride + 16)); // i = 2 + src7_16x8b = + _mm_loadu_si128((__m128i *) (pu1_src + 3 * i4_src_stride + 16)); // i = 3 + + _mm_storeu_si128((__m128i *) (pu1_dst), src0_16x8b); + _mm_storeu_si128((__m128i *) (pu1_dst + i4_dst_stride), src1_16x8b); + _mm_storeu_si128((__m128i *) (pu1_dst + 2 * i4_dst_stride), src2_16x8b); + _mm_storeu_si128((__m128i *) (pu1_dst + 3 * i4_dst_stride), src3_16x8b); + _mm_storeu_si128((__m128i *) (pu1_dst + 16), src4_16x8b); + _mm_storeu_si128((__m128i *) (pu1_dst + i4_dst_stride + 16), src5_16x8b); + _mm_storeu_si128((__m128i *) (pu1_dst + 2 * i4_dst_stride + 16), src6_16x8b); + _mm_storeu_si128((__m128i *) (pu1_dst + 3 * i4_dst_stride + 16), src7_16x8b); + + pu1_src += 32; + pu1_dst += 32; + } + + pu1_src = pu1_src - i4_blk_wd + 4 * i4_src_stride; + pu1_dst = pu1_dst - i4_blk_wd + 4 * i4_dst_stride; + } + } + } + else if(0 == (i4_blk_wd & 15)) /* wd multiple of 16 case */ + { + __m128i src0_16x8b, src1_16x8b, src2_16x8b, src3_16x8b; + + if(0 == (i4_blk_ht & 7)) /* ht multiple of 8 case */ + { + __m128i src4_16x8b, src5_16x8b, src6_16x8b, src7_16x8b; + + for(i = 0; i < i4_blk_ht; i += 8) + { + for(j = 0; j < i4_blk_wd; j += 16) + { + src0_16x8b = + _mm_loadu_si128((__m128i *) (pu1_src + 0 * i4_src_stride)); // i = 0 + src1_16x8b = + _mm_loadu_si128((__m128i *) (pu1_src + 1 * i4_src_stride)); // i = 1 + src2_16x8b = + _mm_loadu_si128((__m128i *) (pu1_src + 2 * i4_src_stride)); // i = 2 + src3_16x8b = + _mm_loadu_si128((__m128i *) (pu1_src + 3 * i4_src_stride)); // i = 3 + src4_16x8b = + _mm_loadu_si128((__m128i *) (pu1_src + 4 * i4_src_stride)); // i = 4 + src5_16x8b = + _mm_loadu_si128((__m128i *) (pu1_src + 5 * i4_src_stride)); // i = 5 + src6_16x8b = + _mm_loadu_si128((__m128i *) (pu1_src + 6 * i4_src_stride)); // i = 6 + src7_16x8b = + _mm_loadu_si128((__m128i *) (pu1_src + 7 * i4_src_stride)); // i = 7 + + _mm_storeu_si128((__m128i *) (pu1_dst + 0 * i4_dst_stride), src0_16x8b); + _mm_storeu_si128((__m128i *) (pu1_dst + 1 * i4_dst_stride), src1_16x8b); + _mm_storeu_si128((__m128i *) (pu1_dst + 2 * i4_dst_stride), src2_16x8b); + _mm_storeu_si128((__m128i *) (pu1_dst + 3 * i4_dst_stride), src3_16x8b); + _mm_storeu_si128((__m128i *) (pu1_dst + 4 * i4_dst_stride), src4_16x8b); + _mm_storeu_si128((__m128i *) (pu1_dst + 5 * i4_dst_stride), src5_16x8b); + _mm_storeu_si128((__m128i *) (pu1_dst + 6 * i4_dst_stride), src6_16x8b); + _mm_storeu_si128((__m128i *) (pu1_dst + 7 * i4_dst_stride), src7_16x8b); + + pu1_src += 16; + pu1_dst += 16; + } + + pu1_src = pu1_src - i4_blk_wd + 8 * i4_src_stride; + pu1_dst = pu1_dst - i4_blk_wd + 8 * i4_dst_stride; + } + } + else /* ht multiple of 4 case */ + { + for(i = 0; i < i4_blk_ht; i += 4) + { + for(j = 0; j < i4_blk_wd; j += 16) + { + src0_16x8b = + _mm_loadu_si128((__m128i *) (pu1_src + 0 * i4_src_stride)); // i = 0 + src1_16x8b = + _mm_loadu_si128((__m128i *) (pu1_src + 1 * i4_src_stride)); // i = 1 + src2_16x8b = + _mm_loadu_si128((__m128i *) (pu1_src + 2 * i4_src_stride)); // i = 2 + src3_16x8b = + _mm_loadu_si128((__m128i *) (pu1_src + 3 * i4_src_stride)); // i = 3 + + _mm_storeu_si128((__m128i *) (pu1_dst + 0 * i4_dst_stride), src0_16x8b); + _mm_storeu_si128((__m128i *) (pu1_dst + 1 * i4_dst_stride), src1_16x8b); + _mm_storeu_si128((__m128i *) (pu1_dst + 2 * i4_dst_stride), src2_16x8b); + _mm_storeu_si128((__m128i *) (pu1_dst + 3 * i4_dst_stride), src3_16x8b); + + pu1_src += 16; + pu1_dst += 16; + } + + pu1_src = pu1_src - i4_blk_wd + 4 * i4_src_stride; + pu1_dst = pu1_dst - i4_blk_wd + 4 * i4_dst_stride; + } + } + } + else if(0 == (i4_blk_wd & 7)) /* wd multiple of 8 case */ + { + __m128i src0_16x8b, src1_16x8b, src2_16x8b, src3_16x8b; + + if(0 == (i4_blk_ht & 7)) /* ht multiple of 8 case */ + { + __m128i src4_16x8b, src5_16x8b, src6_16x8b, src7_16x8b; + + for(i = 0; i < i4_blk_ht; i += 8) + { + for(j = 0; j < i4_blk_wd; j += 8) + { + src0_16x8b = + _mm_loadl_epi64((__m128i *) (pu1_src + 0 * i4_src_stride)); // i = 0 + src1_16x8b = + _mm_loadl_epi64((__m128i *) (pu1_src + 1 * i4_src_stride)); // i = 1 + src2_16x8b = + _mm_loadl_epi64((__m128i *) (pu1_src + 2 * i4_src_stride)); // i = 2 + src3_16x8b = + _mm_loadl_epi64((__m128i *) (pu1_src + 3 * i4_src_stride)); // i = 3 + src4_16x8b = + _mm_loadl_epi64((__m128i *) (pu1_src + 4 * i4_src_stride)); // i = 4 + src5_16x8b = + _mm_loadl_epi64((__m128i *) (pu1_src + 5 * i4_src_stride)); // i = 5 + src6_16x8b = + _mm_loadl_epi64((__m128i *) (pu1_src + 6 * i4_src_stride)); // i = 6 + src7_16x8b = + _mm_loadl_epi64((__m128i *) (pu1_src + 7 * i4_src_stride)); // i = 7 + + _mm_storel_epi64((__m128i *) (pu1_dst + 0 * i4_dst_stride), src0_16x8b); + _mm_storel_epi64((__m128i *) (pu1_dst + 1 * i4_dst_stride), src1_16x8b); + _mm_storel_epi64((__m128i *) (pu1_dst + 2 * i4_dst_stride), src2_16x8b); + _mm_storel_epi64((__m128i *) (pu1_dst + 3 * i4_dst_stride), src3_16x8b); + _mm_storel_epi64((__m128i *) (pu1_dst + 4 * i4_dst_stride), src4_16x8b); + _mm_storel_epi64((__m128i *) (pu1_dst + 5 * i4_dst_stride), src5_16x8b); + _mm_storel_epi64((__m128i *) (pu1_dst + 6 * i4_dst_stride), src6_16x8b); + _mm_storel_epi64((__m128i *) (pu1_dst + 7 * i4_dst_stride), src7_16x8b); + + pu1_src += 8; + pu1_dst += 8; + } + + pu1_src = pu1_src - i4_blk_wd + 8 * i4_src_stride; + pu1_dst = pu1_dst - i4_blk_wd + 8 * i4_dst_stride; + } + } + else /* ht multiple of 4 case */ + { + for(i = 0; i < i4_blk_ht; i += 4) + { + for(j = 0; j < i4_blk_wd; j += 8) + { + src0_16x8b = + _mm_loadl_epi64((__m128i *) (pu1_src + 0 * i4_src_stride)); // i = 0 + src1_16x8b = + _mm_loadl_epi64((__m128i *) (pu1_src + 1 * i4_src_stride)); // i = 1 + src2_16x8b = + _mm_loadl_epi64((__m128i *) (pu1_src + 2 * i4_src_stride)); // i = 2 + src3_16x8b = + _mm_loadl_epi64((__m128i *) (pu1_src + 3 * i4_src_stride)); // i = 3 + + _mm_storel_epi64((__m128i *) (pu1_dst + 0 * i4_dst_stride), src0_16x8b); + _mm_storel_epi64((__m128i *) (pu1_dst + 1 * i4_dst_stride), src1_16x8b); + _mm_storel_epi64((__m128i *) (pu1_dst + 2 * i4_dst_stride), src2_16x8b); + _mm_storel_epi64((__m128i *) (pu1_dst + 3 * i4_dst_stride), src3_16x8b); + + pu1_src += 8; + pu1_dst += 8; + } + + pu1_src = pu1_src - i4_blk_wd + 4 * i4_src_stride; + pu1_dst = pu1_dst - i4_blk_wd + 4 * i4_dst_stride; + } + } + } + else /* wd multiple of 4 case */ + { + __m128i src0_16x8b, src1_16x8b, src2_16x8b, src3_16x8b; + WORD32 src0, src1, src2, src3; + if(0 == (i4_blk_ht & 7)) /* ht multiple of 8 case */ + { + __m128i src4_16x8b, src5_16x8b, src6_16x8b, src7_16x8b; + WORD32 src4, src5, src6, src7; + + for(i = 0; i < i4_blk_ht; i += 8) + { + for(j = 0; j < i4_blk_wd; j += 4) + { + src0_16x8b = + _mm_loadl_epi64((__m128i *) (pu1_src + 0 * i4_src_stride)); // i = 0 + src1_16x8b = + _mm_loadl_epi64((__m128i *) (pu1_src + 1 * i4_src_stride)); // i = 1 + src2_16x8b = + _mm_loadl_epi64((__m128i *) (pu1_src + 2 * i4_src_stride)); // i = 2 + src3_16x8b = + _mm_loadl_epi64((__m128i *) (pu1_src + 3 * i4_src_stride)); // i = 3 + src4_16x8b = + _mm_loadl_epi64((__m128i *) (pu1_src + 4 * i4_src_stride)); // i = 4 + src5_16x8b = + _mm_loadl_epi64((__m128i *) (pu1_src + 5 * i4_src_stride)); // i = 5 + src6_16x8b = + _mm_loadl_epi64((__m128i *) (pu1_src + 6 * i4_src_stride)); // i = 6 + src7_16x8b = + _mm_loadl_epi64((__m128i *) (pu1_src + 7 * i4_src_stride)); // i = 7 + + src0 = _mm_cvtsi128_si32(src0_16x8b); + src1 = _mm_cvtsi128_si32(src1_16x8b); + src2 = _mm_cvtsi128_si32(src2_16x8b); + src3 = _mm_cvtsi128_si32(src3_16x8b); + src4 = _mm_cvtsi128_si32(src4_16x8b); + src5 = _mm_cvtsi128_si32(src5_16x8b); + src6 = _mm_cvtsi128_si32(src6_16x8b); + src7 = _mm_cvtsi128_si32(src7_16x8b); + + *(WORD32 *) (&pu1_dst[0 * i4_dst_stride]) = src0; + *(WORD32 *) (&pu1_dst[1 * i4_dst_stride]) = src1; + *(WORD32 *) (&pu1_dst[2 * i4_dst_stride]) = src2; + *(WORD32 *) (&pu1_dst[3 * i4_dst_stride]) = src3; + *(WORD32 *) (&pu1_dst[4 * i4_dst_stride]) = src4; + *(WORD32 *) (&pu1_dst[5 * i4_dst_stride]) = src5; + *(WORD32 *) (&pu1_dst[6 * i4_dst_stride]) = src6; + *(WORD32 *) (&pu1_dst[7 * i4_dst_stride]) = src7; + + pu1_src += 4; + pu1_dst += 4; + } + + pu1_src = pu1_src - i4_blk_wd + 8 * i4_src_stride; + pu1_dst = pu1_dst - i4_blk_wd + 8 * i4_dst_stride; + } + } + else /* ht multiple of 4 case */ + { + for(i = 0; i < i4_blk_ht; i += 4) + { + for(j = 0; j < i4_blk_wd; j += 4) + { + src0_16x8b = + _mm_loadl_epi64((__m128i *) (pu1_src + 0 * i4_src_stride)); // i = 0 + src1_16x8b = + _mm_loadl_epi64((__m128i *) (pu1_src + 1 * i4_src_stride)); // i = 1 + src2_16x8b = + _mm_loadl_epi64((__m128i *) (pu1_src + 2 * i4_src_stride)); // i = 2 + src3_16x8b = + _mm_loadl_epi64((__m128i *) (pu1_src + 3 * i4_src_stride)); // i = 3 + + src0 = _mm_cvtsi128_si32(src0_16x8b); + src1 = _mm_cvtsi128_si32(src1_16x8b); + src2 = _mm_cvtsi128_si32(src2_16x8b); + src3 = _mm_cvtsi128_si32(src3_16x8b); + + *(WORD32 *) (&pu1_dst[0 * i4_dst_stride]) = src0; + *(WORD32 *) (&pu1_dst[1 * i4_dst_stride]) = src1; + *(WORD32 *) (&pu1_dst[2 * i4_dst_stride]) = src2; + *(WORD32 *) (&pu1_dst[3 * i4_dst_stride]) = src3; + + pu1_src += 4; + pu1_dst += 4; + } + + pu1_src = pu1_src - i4_blk_wd + 4 * i4_src_stride; + pu1_dst = pu1_dst - i4_blk_wd + 4 * i4_dst_stride; + } + } + } +} diff --git a/common/x86/svc/isvc_padding_ssse3.c b/common/x86/svc/isvc_padding_ssse3.c new file mode 100644 index 0000000..3866301 --- /dev/null +++ b/common/x86/svc/isvc_padding_ssse3.c @@ -0,0 +1,294 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ +/** +******************************************************************************* +* @file +* ih264_padding_atom_intr.c +* +* @brief +* Contains function definitions for Padding +* +* @author +* Srinivas T +* +* @par List of Functions: +* - isvc_pad_left_luma_ssse3() +* - isvc_pad_left_chroma_ssse3() +* - isvc_pad_right_luma_ssse3() +* - isvc_pad_right_chroma_ssse3() +* +* @remarks +* None +* +******************************************************************************* +*/ + +#include +#include +#include "ih264_typedefs.h" +#include "ih264_platform_macros.h" +#include "isvc_mem_fns.h" +#include "ih264_debug.h" + +#include + +/** +******************************************************************************* +* +* @brief +* Padding (luma block) at the left of a 2d array +* +* @par Description: +* The left column of a 2d array is replicated for pad_size times at the left +* +* +* @param[in] pu1_src +* UWORD8 pointer to the source +* +* @param[in] src_strd +* integer source stride +* +* @param[in] ht +* integer height of the array +* +* @param[in] wd +* integer width of the array +* +* @param[in] pad_size +* integer -padding size of the array +* +* @param[in] ht +* integer height of the array +* +* @param[in] wd +* integer width of the array +* +* @returns +* +* @remarks +* None +* +******************************************************************************* +*/ + +void isvc_pad_left_luma_ssse3(UWORD8 *pu1_src, WORD32 src_strd, WORD32 ht, WORD32 pad_size) +{ + WORD32 row; + WORD32 i; + UWORD8 *pu1_dst; + + ASSERT(pad_size % 8 == 0); + + for(row = 0; row < ht; row++) + { + __m128i src_temp0_16x8b; + + pu1_dst = pu1_src - pad_size; + src_temp0_16x8b = _mm_set1_epi8(*pu1_src); + for(i = 0; i < pad_size; i += 8) + { + _mm_storel_epi64((__m128i *) (pu1_dst + i), src_temp0_16x8b); + } + pu1_src += src_strd; + } +} + +/** +******************************************************************************* +* +* @brief +* Padding (chroma block) at the left of a 2d array +* +* @par Description: +* The left column of a 2d array is replicated for pad_size times at the left +* +* +* @param[in] pu1_src +* UWORD8 pointer to the source +* +* @param[in] src_strd +* integer source stride +* +* @param[in] ht +* integer height of the array +* +* @param[in] wd +* integer width of the array (each colour component) +* +* @param[in] pad_size +* integer -padding size of the array +* +* @param[in] ht +* integer height of the array +* +* @param[in] wd +* integer width of the array +* +* @returns +* +* @remarks +* None +* +******************************************************************************* +*/ + +void isvc_pad_left_chroma_ssse3(UWORD8 *pu1_src, WORD32 src_strd, WORD32 ht, WORD32 pad_size) +{ + WORD32 row; + WORD32 col; + UWORD8 *pu1_dst; + + ASSERT(pad_size % 8 == 0); + for(row = 0; row < ht; row++) + { + __m128i src_temp0_16x8b; + + pu1_dst = pu1_src - pad_size; + src_temp0_16x8b = _mm_set1_epi16(*((UWORD16 *) pu1_src)); + for(col = 0; col < pad_size; col += 8) + { + _mm_storel_epi64((__m128i *) (pu1_dst + col), src_temp0_16x8b); + } + pu1_src += src_strd; + } +} + +/** +******************************************************************************* +* +* @brief +* Padding (luma block) at the right of a 2d array +* +* @par Description: +* The right column of a 2d array is replicated for pad_size times at the right +* +* +* @param[in] pu1_src +* UWORD8 pointer to the source +* +* @param[in] src_strd +* integer source stride +* +* @param[in] ht +* integer height of the array +* +* @param[in] wd +* integer width of the array +* +* @param[in] pad_size +* integer -padding size of the array +* +* @param[in] ht +* integer height of the array +* +* @param[in] wd +* integer width of the array +* +* @returns +* +* @remarks +* None +* +******************************************************************************* +*/ + +void isvc_pad_right_luma_ssse3(UWORD8 *pu1_src, WORD32 src_strd, WORD32 ht, WORD32 pad_size) +{ + WORD32 row; + WORD32 col; + UWORD8 *pu1_dst; + + ASSERT(pad_size % 8 == 0); + + for(row = 0; row < ht; row++) + { + __m128i src_temp0_16x8b; + + pu1_dst = pu1_src; + src_temp0_16x8b = _mm_set1_epi8(*(pu1_src - 1)); + for(col = 0; col < pad_size; col += 8) + { + _mm_storel_epi64((__m128i *) (pu1_dst + col), src_temp0_16x8b); + } + pu1_src += src_strd; + } +} + +/** +******************************************************************************* +* +* @brief +* Padding (chroma block) at the right of a 2d array +* +* @par Description: +* The right column of a 2d array is replicated for pad_size times at the right +* +* +* @param[in] pu1_src +* UWORD8 pointer to the source +* +* @param[in] src_strd +* integer source stride +* +* @param[in] ht +* integer height of the array +* +* @param[in] wd +* integer width of the array (each colour component) +* +* @param[in] pad_size +* integer -padding size of the array +* +* @param[in] ht +* integer height of the array +* +* @param[in] wd +* integer width of the array +* +* @returns +* +* @remarks +* None +* +******************************************************************************* +*/ + +void isvc_pad_right_chroma_ssse3(UWORD8 *pu1_src, WORD32 src_strd, WORD32 ht, WORD32 pad_size) +{ + WORD32 row; + WORD32 col; + UWORD8 *pu1_dst; + + ASSERT(pad_size % 8 == 0); + + for(row = 0; row < ht; row++) + { + __m128i src_temp0_16x8b; + + pu1_dst = pu1_src; + src_temp0_16x8b = _mm_set1_epi16(*((UWORD16 *) (pu1_src - 2))); + for(col = 0; col < pad_size; col += 8) + { + _mm_storel_epi64((__m128i *) (pu1_dst + col), src_temp0_16x8b); + } + + pu1_src += src_strd; + } +} diff --git a/common/x86/svc/isvc_resi_trans_quant_sse42.c b/common/x86/svc/isvc_resi_trans_quant_sse42.c new file mode 100644 index 0000000..d9832f0 --- /dev/null +++ b/common/x86/svc/isvc_resi_trans_quant_sse42.c @@ -0,0 +1,1881 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ +/** + + * ******************************************************************************* + + * * @file + * isvc_resi_trans_quant_sse42.c + * + * @brief + * Contains function + * definitions single stage forward transform for H.264 + * It will calculate + * the residue, do the cf and then do quantization + * + * @author + * Mohit + * [100664] + * + * @par List of Functions: + * - + * isvc_resi_trans_quant_4x4_sse42() + * - + * isvc_resi_trans_quant_chroma_4x4_sse42() + * + * @remarks + * None + * + + * ******************************************************************************* + + */ +#include + +#include "ih264_typedefs.h" +#include "ih264_debug.h" +#include "ih264_defs.h" +#include "ih264_trans_macros.h" +#include "ih264_macros.h" +#include "ih264_platform_macros.h" +#include "ih264_trans_data.h" +#include "ih264_size_defs.h" +#include "isvc_structs.h" +#include "isvc_trans_quant_itrans_iquant.h" + +/**| +******************************************************************************* +* +* +* @brief +* This function performs forward transform and quantization on a 4*4 +* block +* +* @par Description: +* The function accepts source buffer and +* estimation buffer. From these, it +* computes the residue. This is residue +* is then transformed and quantized. +* The transform and quantization are in +* placed computed. They use the residue +* buffer for this. +* +* @param[in] +* pu1_src +* Pointer to source sub-block +* +* @param[in] pu1_pred +* Pointer +* to prediction sub-block +* +* @param[in] pi2_out +* Pointer to residual +* sub-block +* +* @param[in] i4_src_stride +* Source stride +* +* @param[in] +* i4_pred_stride +* Prediction stride +* +* @param[in] dst_strd +* Destination +* stride +* +* @param[in] u4_qbits +* QP_BITS_h264_4x4 + floor(QP/6) +* +* +* @param[in] pu2_threshold_matrix +* Pointer to Forward Quant Threshold +* Matrix +* +* @param[in] pu2_scale_matrix +* Pointer to Forward Quant Scale +* Matrix +* +* @param[in] u4_round_factor +* Quantization Round factor +* +* +* @param[out] pu1_nnz +* Total non-zero coefficients in the current +* sub-block +* +* @returns +* +* @remarks +* +* None +* +******************************************************************************* +*/ +void isvc_resi_trans_quant_4x4_sse42(buffer_container_t *ps_src, buffer_container_t *ps_pred, + buffer_container_t *ps_out, + buffer_container_t *ps_upsampled_res, + resi_trans_quant_constants_t *ps_quant_constants, + UWORD8 *pu1_nnz, WORD16 *pi2_dc_out, + UWORD8 u1_use_upsampled_res) +{ + const UWORD16 *pu2_scale_matrix = ps_quant_constants->pu2_scale_matrix; + const UWORD16 *pu2_threshold_matrix = ps_quant_constants->pu2_threshold_matrix; + UWORD32 u4_qbits = ps_quant_constants->u4_qbits; + UWORD32 u4_round_factor = ps_quant_constants->u4_round_factor; + WORD32 tmp_dc, u4_zero_coeff, u4_nonzero_coeff = 0; + WORD32 mask0, mask1; + __m128i sum0, sum1, sum2, cmp0, cmp1; + __m128i rnd_fact = _mm_set1_epi32(u4_round_factor); + __m128i temp_2 = _mm_set1_epi16(2); + __m128i temp_1 = _mm_set1_epi16(1); + __m128i src_r0, src_r1, src_r2, src_r3; + __m128i pred_r0, pred_r1, pred_r2, pred_r3; + __m128i temp0, temp1, temp2, temp3; + /* all bits reset to zero */ + __m128i zero_8x16b = _mm_setzero_si128(); + __m128i sign_reg0, sign_reg2; + __m128i scalemat_r0_r1, scalemat_r2_r3; + __m128i threshold_r0_r1, threshold_r2_r3; + __m128i threshold_mask_r0_r1, threshold_mask_r2_r3; + + UWORD8 *pu1_src = (UWORD8 *) ps_src->pv_data; + UWORD8 *pu1_pred = (UWORD8 *) ps_pred->pv_data; + WORD16 *pi2_out = (WORD16 *) ps_out->pv_data; + WORD32 i4_src_stride = ps_src->i4_data_stride; + WORD32 i4_pred_stride = ps_pred->i4_data_stride; + WORD32 i4_out_stride = ps_out->i4_data_stride; + + ASSERT(0 == u1_use_upsampled_res); + ASSERT(4 == i4_out_stride); + UNUSED(u1_use_upsampled_res); + UNUSED(i4_out_stride); + UNUSED(ps_upsampled_res); + + /* b00 b01 b02 b03 b10 b11 b12 b13 + -- the scaling matrix 0th,1st row */ + scalemat_r0_r1 = _mm_loadu_si128((__m128i *) (pu2_scale_matrix)); + + /* b20 b21 b22 b23 b30 b31 b32 b33 + -- the scaling matrix 2nd,3rd row */ + scalemat_r2_r3 = _mm_loadu_si128((__m128i *) (pu2_scale_matrix + 8)); + + /* b00 b01 b02 b03 b10 b11 b12 b13 + -- the treshold matrix 0th,1st row */ + threshold_r0_r1 = _mm_loadu_si128((__m128i *) (pu2_threshold_matrix)); + + /* b20 b21 b22 b23 b30 b31 b32 b33 + -- the threshold matrix 2nd,3rd row */ + threshold_r2_r3 = _mm_loadu_si128((__m128i *) (pu2_threshold_matrix + 8)); + + /* a00 a01 a02 a03 0 0 0 0 0 + 0 0 0 -- all 8 bits */ + src_r0 = _mm_loadl_epi64((__m128i *) (&pu1_src[0])); + + /* a10 a11 a12 a13 0 0 0 0 0 0 0 + 0 -- all 8 bits */ + src_r1 = _mm_loadl_epi64((__m128i *) (&pu1_src[i4_src_stride])); + + /* a20 a21 a22 a23 0 0 0 0 0 0 0 + 0 -- all 8 bits */ + src_r2 = _mm_loadl_epi64((__m128i *) (&pu1_src[2 * i4_src_stride])); + + /* a30 a31 a32 a33 0 0 0 0 0 0 0 + 0 -- all 8 bits */ + src_r3 = _mm_loadl_epi64((__m128i *) (&pu1_src[3 * i4_src_stride])); + + src_r0 = _mm_cvtepu8_epi16(src_r0); + src_r1 = _mm_cvtepu8_epi16(src_r1); + src_r2 = _mm_cvtepu8_epi16(src_r2); + src_r3 = _mm_cvtepu8_epi16(src_r3); + + /* p00 p01 p02 p03 0 0 0 0 0 + 0 0 0 -- all 8 bits */ + pred_r0 = _mm_loadl_epi64((__m128i *) (&pu1_pred[0])); + + /* p10 p11 p12 p13 0 0 0 0 0 + 0 0 0 -- all 8 bits */ + pred_r1 = _mm_loadl_epi64((__m128i *) (&pu1_pred[i4_pred_stride])); + + /* p20 p21 p22 p23 0 0 0 0 0 + 0 0 0 -- all 8 bits */ + pred_r2 = _mm_loadl_epi64((__m128i *) (&pu1_pred[2 * i4_pred_stride])); + + /* p30 p31 p32 p33 0 0 0 0 0 + 0 0 0 -- all 8 bits */ + pred_r3 = _mm_loadl_epi64((__m128i *) (&pu1_pred[3 * i4_pred_stride])); + + pred_r0 = _mm_cvtepu8_epi16(pred_r0); + pred_r1 = _mm_cvtepu8_epi16(pred_r1); + pred_r2 = _mm_cvtepu8_epi16(pred_r2); + pred_r3 = _mm_cvtepu8_epi16(pred_r3); + + src_r0 = _mm_sub_epi16(src_r0, pred_r0); + src_r1 = _mm_sub_epi16(src_r1, pred_r1); + src_r2 = _mm_sub_epi16(src_r2, pred_r2); + src_r3 = _mm_sub_epi16(src_r3, pred_r3); + + /* Perform Forward transform */ + /*-------------------------------------------------------------*/ + /* DCT [ Horizontal transformation ] */ + /*-------------------------------------------------------------*/ + // Matrix transpose + /* + * a0 a1 a2 a3 + * b0 b1 b2 b3 + * c0 c1 c2 c3 + * d0 d1 d2 d3 + */ + /* a0 b0 a1 b1 a2 b2 a3 b3 */ + temp0 = _mm_unpacklo_epi16(src_r0, src_r1); + /* c0 d0 c1 d1 c2 d2 c3 d3 */ + temp2 = _mm_unpacklo_epi16(src_r2, src_r3); + /* a0 b0 c0 d0 a1 b1 c1 d1 */ + temp1 = _mm_unpacklo_epi32(temp0, temp2); + /* a2 b2 c2 d2 a3 b3 c3 d3 */ + temp3 = _mm_unpackhi_epi32(temp0, temp2); + + /* a0 b0 c0 d0 */ + src_r0 = _mm_unpacklo_epi64(temp1, zero_8x16b); + /* a1 b1 c1 d1 */ + src_r1 = _mm_unpackhi_epi64(temp1, zero_8x16b); + /* a2 b2 c2 d2 */ + src_r2 = _mm_unpacklo_epi64(temp3, zero_8x16b); + /* a3 b3 c3 d3 */ + src_r3 = _mm_unpackhi_epi64(temp3, zero_8x16b); + + /*----------------------------------------------------------*/ + /* x0 = z0 + z3 */ + temp0 = _mm_add_epi16(src_r0, src_r3); + /* x1 = z1 + z2 */ + temp1 = _mm_add_epi16(src_r1, src_r2); + /* x2 = z1 - z2 */ + temp2 = _mm_sub_epi16(src_r1, src_r2); + /* x3 = z0 - z3 */ + temp3 = _mm_sub_epi16(src_r0, src_r3); + + /* z0 = x0 + x1 */ + src_r0 = _mm_add_epi16(temp0, temp1); + /* z1 = (x3 << 1) + x2 */ + src_r1 = _mm_slli_epi16(temp3, 1); + src_r1 = _mm_add_epi16(src_r1, temp2); + /* z2 = x0 - x1 */ + src_r2 = _mm_sub_epi16(temp0, temp1); + /* z3 = x3 - (x2 << 1) */ + src_r3 = _mm_slli_epi16(temp2, 1); + src_r3 = _mm_sub_epi16(temp3, src_r3); + + // Matrix transpose + /* + * a0 b0 c0 d0 + * a1 b1 c1 d1 + * a2 b2 c2 d2 + * a3 b3 c3 d3 + */ + /* a0 a1 b0 b1 c0 c1 d0 d1 */ + temp0 = _mm_unpacklo_epi16(src_r0, src_r1); + /* a2 a3 b2 b3 c2 c3 d2 d3 */ + temp2 = _mm_unpacklo_epi16(src_r2, src_r3); + /* a0 a1 a2 a3 b0 b1 b2 b3 */ + temp1 = _mm_unpacklo_epi32(temp0, temp2); + /* c0 c1 c2 c3 d0 d1 d2 d3 */ + temp3 = _mm_unpackhi_epi32(temp0, temp2); + + /* a0 a1 a2 a3 */ + src_r0 = _mm_unpacklo_epi64(temp1, zero_8x16b); + /* b0 b1 b2 b3 */ + src_r1 = _mm_unpackhi_epi64(temp1, zero_8x16b); + /* c0 c1 c2 c3 */ + src_r2 = _mm_unpacklo_epi64(temp3, zero_8x16b); + /* d0 d1 d2 d3 */ + src_r3 = _mm_unpackhi_epi64(temp3, zero_8x16b); + + /*----------------------------------------------------------*/ + /* x0 = z0 + z3 */ + temp0 = _mm_add_epi16(src_r0, src_r3); + /* x1 = z1 + z2 */ + temp1 = _mm_add_epi16(src_r1, src_r2); + /* x2 = z1 - z2 */ + temp2 = _mm_sub_epi16(src_r1, src_r2); + /* x3 = z0 - z3 */ + temp3 = _mm_sub_epi16(src_r0, src_r3); + + /* z0 = x0 + x1 */ + src_r0 = _mm_add_epi16(temp0, temp1); + /* z1 = (x3 << 1) + x2 */ + src_r1 = _mm_slli_epi16(temp3, 1); + src_r1 = _mm_add_epi16(src_r1, temp2); + /* z2 = x0 - x1 */ + src_r2 = _mm_sub_epi16(temp0, temp1); + /* z3 = x3 - (x2 << 1) */ + src_r3 = _mm_slli_epi16(temp2, 1); + src_r3 = _mm_sub_epi16(temp3, src_r3); + + /* get the first 16 bits from the register */ + tmp_dc = _mm_extract_epi16(src_r0, 0); + *pi2_dc_out = tmp_dc; + + /* a0 a1 a2 a3 b0 b1 b2 b3 */ + src_r0 = _mm_unpacklo_epi64(src_r0, src_r1); + /* c0 c1 c2 c3 d0 d1 d2 d3 */ + src_r2 = _mm_unpacklo_epi64(src_r2, src_r3); + sign_reg0 = _mm_cmpgt_epi16(zero_8x16b, src_r0); + sign_reg2 = _mm_cmpgt_epi16(zero_8x16b, src_r2); + + sign_reg0 = _mm_mullo_epi16(temp_2, sign_reg0); + sign_reg2 = _mm_mullo_epi16(temp_2, sign_reg2); + + sign_reg0 = _mm_add_epi16(temp_1, sign_reg0); + sign_reg2 = _mm_add_epi16(temp_1, sign_reg2); + + src_r0 = _mm_abs_epi16(src_r0); + src_r2 = _mm_abs_epi16(src_r2); + + threshold_mask_r0_r1 = _mm_cmpgt_epi16(threshold_r0_r1, src_r0); + threshold_mask_r2_r3 = _mm_cmpgt_epi16(threshold_r2_r3, src_r2); + + src_r1 = _mm_srli_si128(src_r0, 8); + src_r0 = _mm_cvtepu16_epi32(src_r0); + src_r1 = _mm_cvtepu16_epi32(src_r1); + src_r3 = _mm_srli_si128(src_r2, 8); + src_r2 = _mm_cvtepu16_epi32(src_r2); + src_r3 = _mm_cvtepu16_epi32(src_r3); + + temp0 = _mm_cvtepu16_epi32(scalemat_r0_r1); + scalemat_r0_r1 = _mm_srli_si128(scalemat_r0_r1, 8); + temp2 = _mm_cvtepu16_epi32(scalemat_r2_r3); + scalemat_r2_r3 = _mm_srli_si128(scalemat_r2_r3, 8); + temp1 = _mm_cvtepu16_epi32(scalemat_r0_r1); + temp3 = _mm_cvtepu16_epi32(scalemat_r2_r3); + + temp0 = _mm_mullo_epi32(temp0, src_r0); + temp1 = _mm_mullo_epi32(temp1, src_r1); + temp2 = _mm_mullo_epi32(temp2, src_r2); + temp3 = _mm_mullo_epi32(temp3, src_r3); + + temp0 = _mm_add_epi32(temp0, rnd_fact); + temp1 = _mm_add_epi32(temp1, rnd_fact); + temp2 = _mm_add_epi32(temp2, rnd_fact); + temp3 = _mm_add_epi32(temp3, rnd_fact); + + temp0 = _mm_srli_epi32(temp0, u4_qbits); + temp1 = _mm_srli_epi32(temp1, u4_qbits); + temp2 = _mm_srli_epi32(temp2, u4_qbits); + temp3 = _mm_srli_epi32(temp3, u4_qbits); + + temp0 = _mm_packs_epi32(temp0, temp1); + temp2 = _mm_packs_epi32(temp2, temp3); + + temp0 = _mm_sign_epi16(temp0, sign_reg0); + temp2 = _mm_sign_epi16(temp2, sign_reg2); + + temp0 = _mm_andnot_si128(threshold_mask_r0_r1, temp0); + temp2 = _mm_andnot_si128(threshold_mask_r2_r3, temp2); + + _mm_storeu_si128((__m128i *) (&pi2_out[0]), temp0); + _mm_storeu_si128((__m128i *) (&pi2_out[8]), temp2); + + cmp0 = _mm_cmpeq_epi16(temp0, zero_8x16b); + cmp1 = _mm_cmpeq_epi16(temp2, zero_8x16b); + + mask0 = _mm_movemask_epi8(cmp0); + mask1 = _mm_movemask_epi8(cmp1); + u4_zero_coeff = 0; + + if(mask0) + { + if(mask0 == 0xffff) + u4_zero_coeff += 8; + else + { + cmp0 = _mm_and_si128(temp_1, cmp0); + sum0 = _mm_hadd_epi16(cmp0, zero_8x16b); + sum1 = _mm_hadd_epi16(sum0, zero_8x16b); + sum2 = _mm_hadd_epi16(sum1, zero_8x16b); + u4_zero_coeff += _mm_cvtsi128_si32(sum2); + } + } + if(mask1) + { + if(mask1 == 0xffff) + u4_zero_coeff += 8; + else + { + cmp1 = _mm_and_si128(temp_1, cmp1); + sum0 = _mm_hadd_epi16(cmp1, zero_8x16b); + sum1 = _mm_hadd_epi16(sum0, zero_8x16b); + sum2 = _mm_hadd_epi16(sum1, zero_8x16b); + u4_zero_coeff += _mm_cvtsi128_si32(sum2); + } + } + + /* Return total nonzero coefficients in the current sub block */ + u4_nonzero_coeff = 16 - u4_zero_coeff; + *pu1_nnz = u4_nonzero_coeff; +} + +void isvc_resi_trans_quant_4x4_with_res_pred_sse42( + buffer_container_t *ps_src, buffer_container_t *ps_pred, buffer_container_t *ps_out, + buffer_container_t *ps_upsampled_res, resi_trans_quant_constants_t *ps_quant_constants, + UWORD8 *pu1_nnz, WORD16 *pi2_dc_out, UWORD8 u1_use_upsampled_res) +{ + const UWORD16 *pu2_scale_matrix = ps_quant_constants->pu2_scale_matrix; + const UWORD16 *pu2_threshold_matrix = ps_quant_constants->pu2_threshold_matrix; + UWORD32 u4_qbits = ps_quant_constants->u4_qbits; + UWORD32 u4_round_factor = ps_quant_constants->u4_round_factor; + WORD32 tmp_dc, u4_zero_coeff, u4_nonzero_coeff = 0; + WORD32 mask0, mask1; + __m128i sum0, sum1, sum2, cmp0, cmp1; + __m128i rnd_fact = _mm_set1_epi32(u4_round_factor); + __m128i temp_2 = _mm_set1_epi16(2); + __m128i temp_1 = _mm_set1_epi16(1); + __m128i src_r0, src_r1, src_r2, src_r3; + __m128i pred_r0, pred_r1, pred_r2, pred_r3; + __m128i temp0, temp1, temp2, temp3; + /* all bits reset to zero */ + __m128i zero_8x16b = _mm_setzero_si128(); + __m128i neg_255_8x16b = _mm_set1_epi16(-((WORD16) UINT8_MAX)); + __m128i pos_255_8x16b = _mm_set1_epi16(((WORD16) UINT8_MAX)); + __m128i sign_reg0, sign_reg2; + __m128i scalemat_r0_r1, scalemat_r2_r3; + __m128i upsampled_res0, upsampled_res1, upsampled_res2, upsampled_res3; + __m128i threshold_r0_r1, threshold_r2_r3; + __m128i threshold_mask_r0_r1, threshold_mask_r2_r3; + + UWORD8 *pu1_src = (UWORD8 *) ps_src->pv_data; + UWORD8 *pu1_pred = (UWORD8 *) ps_pred->pv_data; + WORD16 *pi2_out = (WORD16 *) ps_out->pv_data; + WORD16 *pi2_upsampled_res = ps_upsampled_res ? (WORD16 *) ps_upsampled_res->pv_data : NULL; + WORD32 i4_src_stride = ps_src->i4_data_stride; + WORD32 i4_pred_stride = ps_pred->i4_data_stride; + WORD32 i4_out_stride = ps_out->i4_data_stride; + WORD32 i4_upsampled_res_stride = ps_upsampled_res ? ps_upsampled_res->i4_data_stride : 0; + + ASSERT(1 == u1_use_upsampled_res); + ASSERT(4 == i4_out_stride); + UNUSED(u1_use_upsampled_res); + UNUSED(i4_out_stride); + UNUSED(ps_upsampled_res); + + /* b00 b01 b02 b03 b10 b11 b12 b13 + -- the scaling matrix 0th,1st row */ + scalemat_r0_r1 = _mm_loadu_si128((__m128i *) (pu2_scale_matrix)); + + /* b20 b21 b22 b23 b30 b31 b32 b33 + -- the scaling matrix 2nd,3rd row */ + scalemat_r2_r3 = _mm_loadu_si128((__m128i *) (pu2_scale_matrix + 8)); + + /* b00 b01 b02 b03 b10 b11 b12 b13 + -- the treshold matrix 0th,1st row */ + threshold_r0_r1 = _mm_loadu_si128((__m128i *) (pu2_threshold_matrix)); + + /* b20 b21 b22 b23 b30 b31 b32 b33 + -- the threshold matrix 2nd,3rd row */ + threshold_r2_r3 = _mm_loadu_si128((__m128i *) (pu2_threshold_matrix + 8)); + + /* a00 a01 a02 a03 0 0 0 0 0 + 0 0 0 -- all 8 bits */ + src_r0 = _mm_loadl_epi64((__m128i *) (&pu1_src[0])); + + /* a10 a11 a12 a13 0 0 0 0 0 0 0 + 0 -- all 8 bits */ + src_r1 = _mm_loadl_epi64((__m128i *) (&pu1_src[i4_src_stride])); + + /* a20 a21 a22 a23 0 0 0 0 0 0 0 + 0 -- all 8 bits */ + src_r2 = _mm_loadl_epi64((__m128i *) (&pu1_src[2 * i4_src_stride])); + + /* a30 a31 a32 a33 0 0 0 0 0 0 0 + 0 -- all 8 bits */ + src_r3 = _mm_loadl_epi64((__m128i *) (&pu1_src[3 * i4_src_stride])); + + src_r0 = _mm_cvtepu8_epi16(src_r0); + src_r1 = _mm_cvtepu8_epi16(src_r1); + src_r2 = _mm_cvtepu8_epi16(src_r2); + src_r3 = _mm_cvtepu8_epi16(src_r3); + + /* p00 p01 p02 p03 0 0 0 0 0 + 0 0 0 -- all 8 bits */ + pred_r0 = _mm_loadl_epi64((__m128i *) (&pu1_pred[0])); + + /* p10 p11 p12 p13 0 0 0 0 0 + 0 0 0 -- all 8 bits */ + pred_r1 = _mm_loadl_epi64((__m128i *) (&pu1_pred[i4_pred_stride])); + + /* p20 p21 p22 p23 0 0 0 0 0 + 0 0 0 -- all 8 bits */ + pred_r2 = _mm_loadl_epi64((__m128i *) (&pu1_pred[2 * i4_pred_stride])); + + /* p30 p31 p32 p33 0 0 0 0 0 + 0 0 0 -- all 8 bits */ + pred_r3 = _mm_loadl_epi64((__m128i *) (&pu1_pred[3 * i4_pred_stride])); + + pred_r0 = _mm_cvtepu8_epi16(pred_r0); + pred_r1 = _mm_cvtepu8_epi16(pred_r1); + pred_r2 = _mm_cvtepu8_epi16(pred_r2); + pred_r3 = _mm_cvtepu8_epi16(pred_r3); + + src_r0 = _mm_sub_epi16(src_r0, pred_r0); + src_r1 = _mm_sub_epi16(src_r1, pred_r1); + src_r2 = _mm_sub_epi16(src_r2, pred_r2); + src_r3 = _mm_sub_epi16(src_r3, pred_r3); + + /* load upsampled residual values and subtract from + the previous residue */ + upsampled_res0 = _mm_loadu_si128((__m128i *) (&pi2_upsampled_res[0])); + + upsampled_res1 = _mm_loadu_si128((__m128i *) (&pi2_upsampled_res[i4_upsampled_res_stride])); + + upsampled_res2 = _mm_loadu_si128((__m128i *) (&pi2_upsampled_res[2 * i4_upsampled_res_stride])); + + upsampled_res3 = _mm_loadu_si128((__m128i *) (&pi2_upsampled_res[3 * i4_upsampled_res_stride])); + + src_r0 = _mm_sub_epi16(src_r0, upsampled_res0); + src_r1 = _mm_sub_epi16(src_r1, upsampled_res1); + src_r2 = _mm_sub_epi16(src_r2, upsampled_res2); + src_r3 = _mm_sub_epi16(src_r3, upsampled_res3); + + src_r1 = _mm_unpacklo_epi16(src_r0, src_r1); + src_r3 = _mm_unpacklo_epi16(src_r2, src_r3); + + /* Saturate all values < -255 to -255 and retain the rest as it is */ + src_r1 = _mm_max_epi16(src_r1, neg_255_8x16b); + /* Saturate all values > 255 to 255 and retain the rest as it is */ + temp0 = _mm_min_epi16(src_r1, pos_255_8x16b); + + /* Saturate all values < -255 to -255 and retain the rest as it is */ + src_r3 = _mm_max_epi16(src_r3, neg_255_8x16b); + /* Saturate all values > 255 to 255 and retain the rest as it is */ + temp2 = _mm_min_epi16(src_r3, pos_255_8x16b); + + /* Perform Forward transform */ + /*-------------------------------------------------------------*/ + /* DCT [ Horizontal transformation ] */ + /*-------------------------------------------------------------*/ + // Matrix transpose + /* + * a0 a1 a2 a3 + * b0 b1 b2 b3 + * c0 c1 c2 c3 + * d0 d1 d2 d3 + */ + /* a0 b0 c0 d0 a1 b1 c1 d1 */ + temp1 = _mm_unpacklo_epi32(temp0, temp2); + /* a2 b2 c2 d2 a3 b3 c3 d3 */ + temp3 = _mm_unpackhi_epi32(temp0, temp2); + + /* a0 b0 c0 d0 */ + src_r0 = _mm_unpacklo_epi64(temp1, zero_8x16b); + /* a1 b1 c1 d1 */ + src_r1 = _mm_unpackhi_epi64(temp1, zero_8x16b); + /* a2 b2 c2 d2 */ + src_r2 = _mm_unpacklo_epi64(temp3, zero_8x16b); + /* a3 b3 c3 d3 */ + src_r3 = _mm_unpackhi_epi64(temp3, zero_8x16b); + + /*----------------------------------------------------------*/ + /* x0 = z0 + z3 */ + temp0 = _mm_add_epi16(src_r0, src_r3); + /* x1 = z1 + z2 */ + temp1 = _mm_add_epi16(src_r1, src_r2); + /* x2 = z1 - z2 */ + temp2 = _mm_sub_epi16(src_r1, src_r2); + /* x3 = z0 - z3 */ + temp3 = _mm_sub_epi16(src_r0, src_r3); + + /* z0 = x0 + x1 */ + src_r0 = _mm_add_epi16(temp0, temp1); + /* z1 = (x3 << 1) + x2 */ + src_r1 = _mm_slli_epi16(temp3, 1); + src_r1 = _mm_add_epi16(src_r1, temp2); + /* z2 = x0 - x1 */ + src_r2 = _mm_sub_epi16(temp0, temp1); + /* z3 = x3 - (x2 << 1) */ + src_r3 = _mm_slli_epi16(temp2, 1); + src_r3 = _mm_sub_epi16(temp3, src_r3); + + // Matrix transpose + /* + * a0 b0 c0 d0 + * a1 b1 c1 d1 + * a2 b2 c2 d2 + * a3 b3 c3 d3 + */ + /* a0 a1 b0 b1 c0 c1 d0 d1 */ + temp0 = _mm_unpacklo_epi16(src_r0, src_r1); + /* a2 a3 b2 b3 c2 c3 d2 d3 */ + temp2 = _mm_unpacklo_epi16(src_r2, src_r3); + /* a0 a1 a2 a3 b0 b1 b2 b3 */ + temp1 = _mm_unpacklo_epi32(temp0, temp2); + /* c0 c1 c2 c3 d0 d1 d2 d3 */ + temp3 = _mm_unpackhi_epi32(temp0, temp2); + + /* a0 a1 a2 a3 */ + src_r0 = _mm_unpacklo_epi64(temp1, zero_8x16b); + /* b0 b1 b2 b3 */ + src_r1 = _mm_unpackhi_epi64(temp1, zero_8x16b); + /* c0 c1 c2 c3 */ + src_r2 = _mm_unpacklo_epi64(temp3, zero_8x16b); + /* d0 d1 d2 d3 */ + src_r3 = _mm_unpackhi_epi64(temp3, zero_8x16b); + + /*----------------------------------------------------------*/ + /* x0 = z0 + z3 */ + temp0 = _mm_add_epi16(src_r0, src_r3); + /* x1 = z1 + z2 */ + temp1 = _mm_add_epi16(src_r1, src_r2); + /* x2 = z1 - z2 */ + temp2 = _mm_sub_epi16(src_r1, src_r2); + /* x3 = z0 - z3 */ + temp3 = _mm_sub_epi16(src_r0, src_r3); + + /* z0 = x0 + x1 */ + src_r0 = _mm_add_epi16(temp0, temp1); + /* z1 = (x3 << 1) + x2 */ + src_r1 = _mm_slli_epi16(temp3, 1); + src_r1 = _mm_add_epi16(src_r1, temp2); + /* z2 = x0 - x1 */ + src_r2 = _mm_sub_epi16(temp0, temp1); + /* z3 = x3 - (x2 << 1) */ + src_r3 = _mm_slli_epi16(temp2, 1); + src_r3 = _mm_sub_epi16(temp3, src_r3); + + /* get the first 16 bits from the register */ + tmp_dc = _mm_extract_epi16(src_r0, 0); + *pi2_dc_out = tmp_dc; + + /* a0 a1 a2 a3 b0 b1 b2 b3 */ + src_r0 = _mm_unpacklo_epi64(src_r0, src_r1); + /* c0 c1 c2 c3 d0 d1 d2 d3 */ + src_r2 = _mm_unpacklo_epi64(src_r2, src_r3); + sign_reg0 = _mm_cmpgt_epi16(zero_8x16b, src_r0); + sign_reg2 = _mm_cmpgt_epi16(zero_8x16b, src_r2); + + sign_reg0 = _mm_mullo_epi16(temp_2, sign_reg0); + sign_reg2 = _mm_mullo_epi16(temp_2, sign_reg2); + + sign_reg0 = _mm_add_epi16(temp_1, sign_reg0); + sign_reg2 = _mm_add_epi16(temp_1, sign_reg2); + + src_r0 = _mm_abs_epi16(src_r0); + src_r2 = _mm_abs_epi16(src_r2); + + threshold_mask_r0_r1 = _mm_cmpgt_epi16(threshold_r0_r1, src_r0); + threshold_mask_r2_r3 = _mm_cmpgt_epi16(threshold_r2_r3, src_r2); + + src_r1 = _mm_srli_si128(src_r0, 8); + src_r0 = _mm_cvtepu16_epi32(src_r0); + src_r1 = _mm_cvtepu16_epi32(src_r1); + src_r3 = _mm_srli_si128(src_r2, 8); + src_r2 = _mm_cvtepu16_epi32(src_r2); + src_r3 = _mm_cvtepu16_epi32(src_r3); + + temp0 = _mm_cvtepu16_epi32(scalemat_r0_r1); + scalemat_r0_r1 = _mm_srli_si128(scalemat_r0_r1, 8); + temp2 = _mm_cvtepu16_epi32(scalemat_r2_r3); + scalemat_r2_r3 = _mm_srli_si128(scalemat_r2_r3, 8); + temp1 = _mm_cvtepu16_epi32(scalemat_r0_r1); + temp3 = _mm_cvtepu16_epi32(scalemat_r2_r3); + + temp0 = _mm_mullo_epi32(temp0, src_r0); + temp1 = _mm_mullo_epi32(temp1, src_r1); + temp2 = _mm_mullo_epi32(temp2, src_r2); + temp3 = _mm_mullo_epi32(temp3, src_r3); + + temp0 = _mm_add_epi32(temp0, rnd_fact); + temp1 = _mm_add_epi32(temp1, rnd_fact); + temp2 = _mm_add_epi32(temp2, rnd_fact); + temp3 = _mm_add_epi32(temp3, rnd_fact); + + temp0 = _mm_srli_epi32(temp0, u4_qbits); + temp1 = _mm_srli_epi32(temp1, u4_qbits); + temp2 = _mm_srli_epi32(temp2, u4_qbits); + temp3 = _mm_srli_epi32(temp3, u4_qbits); + + temp0 = _mm_packs_epi32(temp0, temp1); + temp2 = _mm_packs_epi32(temp2, temp3); + + temp0 = _mm_sign_epi16(temp0, sign_reg0); + temp2 = _mm_sign_epi16(temp2, sign_reg2); + + temp0 = _mm_andnot_si128(threshold_mask_r0_r1, temp0); + temp2 = _mm_andnot_si128(threshold_mask_r2_r3, temp2); + + _mm_storeu_si128((__m128i *) (&pi2_out[0]), temp0); + _mm_storeu_si128((__m128i *) (&pi2_out[8]), temp2); + + cmp0 = _mm_cmpeq_epi16(temp0, zero_8x16b); + cmp1 = _mm_cmpeq_epi16(temp2, zero_8x16b); + + mask0 = _mm_movemask_epi8(cmp0); + mask1 = _mm_movemask_epi8(cmp1); + u4_zero_coeff = 0; + if(mask0) + { + if(mask0 == 0xffff) + u4_zero_coeff += 8; + else + { + cmp0 = _mm_and_si128(temp_1, cmp0); + sum0 = _mm_hadd_epi16(cmp0, zero_8x16b); + sum1 = _mm_hadd_epi16(sum0, zero_8x16b); + sum2 = _mm_hadd_epi16(sum1, zero_8x16b); + u4_zero_coeff += _mm_cvtsi128_si32(sum2); + } + } + if(mask1) + { + if(mask1 == 0xffff) + u4_zero_coeff += 8; + else + { + cmp1 = _mm_and_si128(temp_1, cmp1); + sum0 = _mm_hadd_epi16(cmp1, zero_8x16b); + sum1 = _mm_hadd_epi16(sum0, zero_8x16b); + sum2 = _mm_hadd_epi16(sum1, zero_8x16b); + u4_zero_coeff += _mm_cvtsi128_si32(sum2); + } + } + + /* Return total nonzero coefficients in the current sub block */ + u4_nonzero_coeff = 16 - u4_zero_coeff; + *pu1_nnz = u4_nonzero_coeff; +} + +/** + + * ******************************************************************************* + + * * + * @brief + * This function performs forward transform and quantization on + * a 4*4 chroma + *block + * + * @par Description: + * The function accepts source + * buffer and estimation buffer. From these, it + * computes the residue. This + * is residue is then transformed and quantized. + * The transform and + * quantization are in placed computed. They use the residue + * buffer for + * this. + * + * @param[in] pu1_src + * Pointer to source sub-block + * + * + * @param[in] pu1_pred + * Pointer to prediction sub-block + * + * @param[in] + * pi2_out + * Pointer to residual sub-block + * + * @param[in] i4_src_stride + * + * Source stride + * + * @param[in] i4_pred_stride + * Prediction stride + * + * + * @param[in] dst_strd + * Destination stride + * + * @param[in] u4_qbits + * + * QP_BITS_h264_4x4 + floor(QP/6) + * + * @param[in] pu2_threshold_matrix + * + * Pointer to Forward Quant Threshold Matrix + * + * @param[in] pu2_scale_matrix + + * * Pointer to Forward Quant Scale Matrix + * + * @param[in] u4_round_factor + * + * Quantization Round factor + * + * @param[out] pu1_nnz + * Total non-zero + * coefficients in the current sub-block + * + * @returns + * + * @remarks + * + * None + * + + * ******************************************************************************* + + */ +void isvc_resi_trans_quant_chroma_4x4_sse42(buffer_container_t *ps_src, buffer_container_t *ps_pred, + buffer_container_t *ps_out, + buffer_container_t *ps_upsampled_res, + resi_trans_quant_constants_t *ps_quant_constants, + UWORD8 *pu1_nnz, WORD16 *pi2_dc_out, + UWORD8 u1_use_upsampled_res) +{ + UWORD8 *pu1_src = (UWORD8 *) ps_src->pv_data; + UWORD8 *pu1_pred = (UWORD8 *) ps_pred->pv_data; + WORD16 *pi2_out = (WORD16 *) ps_out->pv_data; + WORD32 i4_src_stride = ps_src->i4_data_stride; + WORD32 i4_pred_stride = ps_pred->i4_data_stride; + WORD32 i4_out_stride = ps_out->i4_data_stride; + const UWORD16 *pu2_scale_matrix = ps_quant_constants->pu2_scale_matrix; + const UWORD16 *pu2_threshold_matrix = ps_quant_constants->pu2_threshold_matrix; + UWORD32 u4_qbits = ps_quant_constants->u4_qbits; + UWORD32 u4_round_factor = ps_quant_constants->u4_round_factor; + WORD32 tmp_dc, u4_zero_coeff, u4_nonzero_coeff = 0; + WORD32 mask0, mask1; + __m128i cmp0, cmp1, sum0, sum1, sum2; + __m128i rnd_fact = _mm_set1_epi32(u4_round_factor); + __m128i temp_2 = _mm_set1_epi16(2); + __m128i temp_1 = _mm_set1_epi16(1); + __m128i src_r0, src_r1, src_r2, src_r3; + __m128i pred_r0, pred_r1, pred_r2, pred_r3; + __m128i temp0, temp1, temp2, temp3; + /* all bits reset to zero */ + __m128i zero_8x16b = _mm_setzero_si128(); + __m128i sign_reg0, sign_reg2; + __m128i scalemat_r0_r1, scalemat_r2_r3; + __m128i threshold_r0_r1, threshold_r2_r3; + __m128i threshold_mask_r0_r1, threshold_mask_r2_r3; + __m128i chroma_mask = _mm_set1_epi16(0xFF); + + ASSERT(0 == u1_use_upsampled_res); + ASSERT(4 == i4_out_stride); + UNUSED(u1_use_upsampled_res); + UNUSED(i4_out_stride); + UNUSED(ps_upsampled_res); + + /* b00 b01 b02 b03 b10 b11 b12 b13 + -- the scaling matrix 0th,1st row */ + scalemat_r0_r1 = _mm_loadu_si128((__m128i *) (pu2_scale_matrix)); + + /* b20 b21 b22 b23 b30 b31 b32 b33 + -- the scaling matrix 2nd,3rd row */ + scalemat_r2_r3 = _mm_loadu_si128((__m128i *) (pu2_scale_matrix + 8)); + + /* b00 b01 b02 b03 b10 b11 b12 b13 + -- the treshold matrix 0th,1st row */ + threshold_r0_r1 = _mm_loadu_si128((__m128i *) (pu2_threshold_matrix)); + + /* b20 b21 b22 b23 b30 b31 b32 b33 + -- the threshold matrix 2nd,3rd row */ + threshold_r2_r3 = _mm_loadu_si128((__m128i *) (pu2_threshold_matrix + 8)); + + /* a00 a01 a02 a03 0 0 0 0 0 + 0 0 0 -- all 8 bits */ + src_r0 = _mm_loadl_epi64((__m128i *) (&pu1_src[0])); + /* a10 a11 a12 a13 0 0 0 0 0 0 0 + 0 -- all 8 bits */ + src_r1 = _mm_loadl_epi64((__m128i *) (&pu1_src[i4_src_stride])); + /* a20 a21 a22 a23 0 0 0 0 0 0 0 + 0 -- all 8 bits */ + src_r2 = _mm_loadl_epi64((__m128i *) (&pu1_src[2 * i4_src_stride])); + /* a30 a31 a32 a33 0 0 0 0 0 0 0 + 0 -- all 8 bits */ + src_r3 = _mm_loadl_epi64((__m128i *) (&pu1_src[3 * i4_src_stride])); + + src_r0 = _mm_and_si128(src_r0, chroma_mask); + src_r1 = _mm_and_si128(src_r1, chroma_mask); + src_r2 = _mm_and_si128(src_r2, chroma_mask); + src_r3 = _mm_and_si128(src_r3, chroma_mask); + + /* p00 p01 p02 p03 0 0 0 0 0 + 0 0 0 -- all 8 bits */ + pred_r0 = _mm_loadl_epi64((__m128i *) (&pu1_pred[0])); + /* p10 p11 p12 p13 0 0 0 0 0 + 0 0 0 -- all 8 bits */ + pred_r1 = _mm_loadl_epi64((__m128i *) (&pu1_pred[i4_pred_stride])); + /* p20 p21 p22 p23 0 0 0 0 0 + 0 0 0 -- all 8 bits */ + pred_r2 = _mm_loadl_epi64((__m128i *) (&pu1_pred[2 * i4_pred_stride])); + /* p30 p31 p32 p33 0 0 0 0 0 + 0 0 0 -- all 8 bits */ + pred_r3 = _mm_loadl_epi64((__m128i *) (&pu1_pred[3 * i4_pred_stride])); + + pred_r0 = _mm_and_si128(pred_r0, chroma_mask); + pred_r1 = _mm_and_si128(pred_r1, chroma_mask); + pred_r2 = _mm_and_si128(pred_r2, chroma_mask); + pred_r3 = _mm_and_si128(pred_r3, chroma_mask); + + src_r0 = _mm_sub_epi16(src_r0, pred_r0); + src_r1 = _mm_sub_epi16(src_r1, pred_r1); + src_r2 = _mm_sub_epi16(src_r2, pred_r2); + src_r3 = _mm_sub_epi16(src_r3, pred_r3); + + /* Perform Forward transform */ + /*-------------------------------------------------------------*/ + /* DCT [ Horizontal transformation ] */ + /*-------------------------------------------------------------*/ + // Matrix transpose + /* + * a0 a1 a2 a3 + * b0 b1 b2 b3 + * c0 c1 c2 c3 + * d0 d1 d2 d3 + */ + /* a0 b0 a1 b1 a2 b2 a3 b3 */ + temp0 = _mm_unpacklo_epi16(src_r0, src_r1); + /* c0 d0 c1 d1 c2 d2 c3 d3 */ + temp2 = _mm_unpacklo_epi16(src_r2, src_r3); + /* a0 b0 c0 d0 a1 b1 c1 d1 */ + temp1 = _mm_unpacklo_epi32(temp0, temp2); + /* a2 b2 c2 d2 a3 b3 c3 d3 */ + temp3 = _mm_unpackhi_epi32(temp0, temp2); + + /* a0 b0 c0 d0 */ + src_r0 = _mm_unpacklo_epi64(temp1, zero_8x16b); + /* a1 b1 c1 d1 */ + src_r1 = _mm_unpackhi_epi64(temp1, zero_8x16b); + /* a2 b2 c2 d2 */ + src_r2 = _mm_unpacklo_epi64(temp3, zero_8x16b); + /* a3 b3 c3 d3 */ + src_r3 = _mm_unpackhi_epi64(temp3, zero_8x16b); + + /*----------------------------------------------------------*/ + /* x0 = z0 + z3 */ + temp0 = _mm_add_epi16(src_r0, src_r3); + /* x1 = z1 + z2 */ + temp1 = _mm_add_epi16(src_r1, src_r2); + /* x2 = z1 - z2 */ + temp2 = _mm_sub_epi16(src_r1, src_r2); + /* x3 = z0 - z3 */ + temp3 = _mm_sub_epi16(src_r0, src_r3); + + /* z0 = x0 + x1 */ + src_r0 = _mm_add_epi16(temp0, temp1); + /* z1 = (x3 << 1) + x2 */ + src_r1 = _mm_slli_epi16(temp3, 1); + src_r1 = _mm_add_epi16(src_r1, temp2); + /* z2 = x0 - x1 */ + src_r2 = _mm_sub_epi16(temp0, temp1); + /* z3 = x3 - (x2 << 1) */ + src_r3 = _mm_slli_epi16(temp2, 1); + src_r3 = _mm_sub_epi16(temp3, src_r3); + + // Matrix transpose + /* + * a0 b0 c0 d0 + * a1 b1 c1 d1 + * a2 b2 c2 d2 + * a3 b3 c3 d3 + */ + /* a0 a1 b0 b1 c0 c1 d0 d1 */ + temp0 = _mm_unpacklo_epi16(src_r0, src_r1); + /* a2 a3 b2 b3 c2 c3 d2 d3 */ + temp2 = _mm_unpacklo_epi16(src_r2, src_r3); + /* a0 a1 a2 a3 b0 b1 b2 b3 */ + temp1 = _mm_unpacklo_epi32(temp0, temp2); + /* c0 c1 c2 c3 d0 d1 d2 d3 */ + temp3 = _mm_unpackhi_epi32(temp0, temp2); + + /* a0 a1 a2 a3 */ + src_r0 = _mm_unpacklo_epi64(temp1, zero_8x16b); + /* b0 b1 b2 b3 */ + src_r1 = _mm_unpackhi_epi64(temp1, zero_8x16b); + /* c0 c1 c2 c3 */ + src_r2 = _mm_unpacklo_epi64(temp3, zero_8x16b); + /* d0 d1 d2 d3 */ + src_r3 = _mm_unpackhi_epi64(temp3, zero_8x16b); + + /*----------------------------------------------------------*/ + /* x0 = z0 + z3 */ + temp0 = _mm_add_epi16(src_r0, src_r3); + /* x1 = z1 + z2 */ + temp1 = _mm_add_epi16(src_r1, src_r2); + /* x2 = z1 - z2 */ + temp2 = _mm_sub_epi16(src_r1, src_r2); + /* x3 = z0 - z3 */ + temp3 = _mm_sub_epi16(src_r0, src_r3); + + /* z0 = x0 + x1 */ + src_r0 = _mm_add_epi16(temp0, temp1); + /* z1 = (x3 << 1) + x2 */ + src_r1 = _mm_slli_epi16(temp3, 1); + src_r1 = _mm_add_epi16(src_r1, temp2); + /* z2 = x0 - x1 */ + src_r2 = _mm_sub_epi16(temp0, temp1); + /* z3 = x3 - (x2 << 1) */ + src_r3 = _mm_slli_epi16(temp2, 1); + src_r3 = _mm_sub_epi16(temp3, src_r3); + + /* get the first 16 bits from the register */ + tmp_dc = _mm_extract_epi16(src_r0, 0); + *pi2_dc_out = tmp_dc; + + /* a0 a1 a2 a3 b0 b1 b2 b3 */ + src_r0 = _mm_unpacklo_epi64(src_r0, src_r1); + /* c0 c1 c2 c3 d0 d1 d2 d3 */ + src_r2 = _mm_unpacklo_epi64(src_r2, src_r3); + sign_reg0 = _mm_cmpgt_epi16(zero_8x16b, src_r0); + sign_reg2 = _mm_cmpgt_epi16(zero_8x16b, src_r2); + + sign_reg0 = _mm_mullo_epi16(temp_2, sign_reg0); + sign_reg2 = _mm_mullo_epi16(temp_2, sign_reg2); + + sign_reg0 = _mm_add_epi16(temp_1, sign_reg0); + sign_reg2 = _mm_add_epi16(temp_1, sign_reg2); + + src_r0 = _mm_abs_epi16(src_r0); + src_r2 = _mm_abs_epi16(src_r2); + + threshold_mask_r0_r1 = _mm_cmpgt_epi16(threshold_r0_r1, src_r0); + threshold_mask_r2_r3 = _mm_cmpgt_epi16(threshold_r2_r3, src_r2); + + src_r1 = _mm_srli_si128(src_r0, 8); + src_r0 = _mm_cvtepu16_epi32(src_r0); + src_r1 = _mm_cvtepu16_epi32(src_r1); + src_r3 = _mm_srli_si128(src_r2, 8); + src_r2 = _mm_cvtepu16_epi32(src_r2); + src_r3 = _mm_cvtepu16_epi32(src_r3); + + temp0 = _mm_cvtepu16_epi32(scalemat_r0_r1); + scalemat_r0_r1 = _mm_srli_si128(scalemat_r0_r1, 8); + temp2 = _mm_cvtepu16_epi32(scalemat_r2_r3); + scalemat_r2_r3 = _mm_srli_si128(scalemat_r2_r3, 8); + temp1 = _mm_cvtepu16_epi32(scalemat_r0_r1); + temp3 = _mm_cvtepu16_epi32(scalemat_r2_r3); + + temp0 = _mm_mullo_epi32(temp0, src_r0); + temp1 = _mm_mullo_epi32(temp1, src_r1); + temp2 = _mm_mullo_epi32(temp2, src_r2); + temp3 = _mm_mullo_epi32(temp3, src_r3); + + temp0 = _mm_add_epi32(temp0, rnd_fact); + temp1 = _mm_add_epi32(temp1, rnd_fact); + temp2 = _mm_add_epi32(temp2, rnd_fact); + temp3 = _mm_add_epi32(temp3, rnd_fact); + + temp0 = _mm_srli_epi32(temp0, u4_qbits); + temp1 = _mm_srli_epi32(temp1, u4_qbits); + temp2 = _mm_srli_epi32(temp2, u4_qbits); + temp3 = _mm_srli_epi32(temp3, u4_qbits); + + temp0 = _mm_packs_epi32(temp0, temp1); + temp2 = _mm_packs_epi32(temp2, temp3); + + temp0 = _mm_sign_epi16(temp0, sign_reg0); + temp2 = _mm_sign_epi16(temp2, sign_reg2); + + temp0 = _mm_andnot_si128(threshold_mask_r0_r1, temp0); + temp2 = _mm_andnot_si128(threshold_mask_r2_r3, temp2); + + _mm_storeu_si128((__m128i *) (&pi2_out[0]), temp0); + _mm_storeu_si128((__m128i *) (&pi2_out[8]), temp2); + + cmp0 = _mm_cmpeq_epi16(temp0, zero_8x16b); + cmp1 = _mm_cmpeq_epi16(temp2, zero_8x16b); + + mask0 = _mm_movemask_epi8(cmp0); + mask1 = _mm_movemask_epi8(cmp1); + u4_zero_coeff = 0; + if(mask0) + { + if(mask0 == 0xffff) + u4_zero_coeff += 8; + else + { + cmp0 = _mm_and_si128(temp_1, cmp0); + sum0 = _mm_hadd_epi16(cmp0, zero_8x16b); + sum1 = _mm_hadd_epi16(sum0, zero_8x16b); + sum2 = _mm_hadd_epi16(sum1, zero_8x16b); + u4_zero_coeff += _mm_cvtsi128_si32(sum2); + } + } + if(mask1) + { + if(mask1 == 0xffff) + u4_zero_coeff += 8; + else + { + cmp1 = _mm_and_si128(temp_1, cmp1); + sum0 = _mm_hadd_epi16(cmp1, zero_8x16b); + sum1 = _mm_hadd_epi16(sum0, zero_8x16b); + sum2 = _mm_hadd_epi16(sum1, zero_8x16b); + u4_zero_coeff += _mm_cvtsi128_si32(sum2); + } + } + + /* Return total nonzero coefficients in the current sub block */ + u4_nonzero_coeff = 16 - u4_zero_coeff; + *pu1_nnz = u4_nonzero_coeff; +} + +void isvc_resi_trans_quant_chroma_4x4_with_res_pred_sse42( + buffer_container_t *ps_src, buffer_container_t *ps_pred, buffer_container_t *ps_out, + buffer_container_t *ps_upsampled_res, resi_trans_quant_constants_t *ps_quant_constants, + UWORD8 *pu1_nnz, WORD16 *pi2_dc_out, UWORD8 u1_use_upsampled_res) +{ + UWORD8 *pu1_src = (UWORD8 *) ps_src->pv_data; + UWORD8 *pu1_pred = (UWORD8 *) ps_pred->pv_data; + WORD16 *pi2_out = (WORD16 *) ps_out->pv_data; + WORD16 *pi2_upsampled_res = ps_upsampled_res ? (WORD16 *) ps_upsampled_res->pv_data : NULL; + WORD32 i4_src_stride = ps_src->i4_data_stride; + WORD32 i4_pred_stride = ps_pred->i4_data_stride; + WORD32 i4_out_stride = ps_out->i4_data_stride; + WORD32 i4_upsampled_res_stride = ps_upsampled_res ? ps_upsampled_res->i4_data_stride : 0; + const UWORD16 *pu2_scale_matrix = ps_quant_constants->pu2_scale_matrix; + const UWORD16 *pu2_threshold_matrix = ps_quant_constants->pu2_threshold_matrix; + UWORD32 u4_qbits = ps_quant_constants->u4_qbits; + UWORD32 u4_round_factor = ps_quant_constants->u4_round_factor; + WORD32 tmp_dc, u4_zero_coeff, u4_nonzero_coeff = 0; + WORD32 mask0, mask1; + __m128i cmp0, cmp1, sum0, sum1, sum2; + __m128i rnd_fact = _mm_set1_epi32(u4_round_factor); + __m128i temp_2 = _mm_set1_epi16(2); + __m128i temp_1 = _mm_set1_epi16(1); + __m128i src_r0, src_r1, src_r2, src_r3; + __m128i pred_r0, pred_r1, pred_r2, pred_r3; + __m128i temp0, temp1, temp2, temp3; + /* all bits reset to zero */ + __m128i zero_8x16b = _mm_setzero_si128(); + __m128i neg_255_8x16b = _mm_set1_epi16(-((WORD16) UINT8_MAX)); + __m128i pos_255_8x16b = _mm_set1_epi16(((WORD16) UINT8_MAX)); + __m128i sign_reg0, sign_reg2; + __m128i scalemat_r0_r1, scalemat_r2_r3; + __m128i upsampled_res0, upsampled_res1, upsampled_res2, upsampled_res3; + __m128i threshold_r0_r1, threshold_r2_r3; + __m128i threshold_mask_r0_r1, threshold_mask_r2_r3; + __m128i chroma_mask = _mm_set1_epi16(0xFF); + + ASSERT(1 == u1_use_upsampled_res); + ASSERT(4 == i4_out_stride); + UNUSED(u1_use_upsampled_res); + UNUSED(i4_out_stride); + UNUSED(ps_upsampled_res); + + /* b00 b01 b02 b03 b10 b11 b12 b13 + -- the scaling matrix 0th,1st row */ + scalemat_r0_r1 = _mm_loadu_si128((__m128i *) (pu2_scale_matrix)); + + /* b20 b21 b22 b23 b30 b31 b32 b33 + -- the scaling matrix 2nd,3rd row */ + scalemat_r2_r3 = _mm_loadu_si128((__m128i *) (pu2_scale_matrix + 8)); + + /* b00 b01 b02 b03 b10 b11 b12 b13 + -- the treshold matrix 0th,1st row */ + threshold_r0_r1 = _mm_loadu_si128((__m128i *) (pu2_threshold_matrix)); + + /* b20 b21 b22 b23 b30 b31 b32 b33 + -- the threshold matrix 2nd,3rd row */ + threshold_r2_r3 = _mm_loadu_si128((__m128i *) (pu2_threshold_matrix + 8)); + + /* a00 a01 a02 a03 0 0 0 0 0 + 0 0 0 -- all 8 bits */ + src_r0 = _mm_loadl_epi64((__m128i *) (&pu1_src[0])); + /* a10 a11 a12 a13 0 0 0 0 0 0 0 + 0 -- all 8 bits */ + src_r1 = _mm_loadl_epi64((__m128i *) (&pu1_src[i4_src_stride])); + /* a20 a21 a22 a23 0 0 0 0 0 0 0 + 0 -- all 8 bits */ + src_r2 = _mm_loadl_epi64((__m128i *) (&pu1_src[2 * i4_src_stride])); + /* a30 a31 a32 a33 0 0 0 0 0 0 0 + 0 -- all 8 bits */ + src_r3 = _mm_loadl_epi64((__m128i *) (&pu1_src[3 * i4_src_stride])); + + src_r0 = _mm_and_si128(src_r0, chroma_mask); + src_r1 = _mm_and_si128(src_r1, chroma_mask); + src_r2 = _mm_and_si128(src_r2, chroma_mask); + src_r3 = _mm_and_si128(src_r3, chroma_mask); + + /* p00 p01 p02 p03 0 0 0 0 0 + 0 0 0 -- all 8 bits */ + pred_r0 = _mm_loadl_epi64((__m128i *) (&pu1_pred[0])); + /* p10 p11 p12 p13 0 0 0 0 0 + 0 0 0 -- all 8 bits */ + pred_r1 = _mm_loadl_epi64((__m128i *) (&pu1_pred[i4_pred_stride])); + /* p20 p21 p22 p23 0 0 0 0 0 + 0 0 0 -- all 8 bits */ + pred_r2 = _mm_loadl_epi64((__m128i *) (&pu1_pred[2 * i4_pred_stride])); + /* p30 p31 p32 p33 0 0 0 0 0 + 0 0 0 -- all 8 bits */ + pred_r3 = _mm_loadl_epi64((__m128i *) (&pu1_pred[3 * i4_pred_stride])); + + pred_r0 = _mm_and_si128(pred_r0, chroma_mask); + pred_r1 = _mm_and_si128(pred_r1, chroma_mask); + pred_r2 = _mm_and_si128(pred_r2, chroma_mask); + pred_r3 = _mm_and_si128(pred_r3, chroma_mask); + + src_r0 = _mm_sub_epi16(src_r0, pred_r0); + src_r1 = _mm_sub_epi16(src_r1, pred_r1); + src_r2 = _mm_sub_epi16(src_r2, pred_r2); + src_r3 = _mm_sub_epi16(src_r3, pred_r3); + + /* load upsampled residual values and subtract from + the previous residue */ + upsampled_res0 = _mm_loadu_si128((__m128i *) (&pi2_upsampled_res[0])); + + upsampled_res1 = _mm_loadu_si128((__m128i *) (&pi2_upsampled_res[i4_upsampled_res_stride])); + + upsampled_res2 = _mm_loadu_si128((__m128i *) (&pi2_upsampled_res[2 * i4_upsampled_res_stride])); + + upsampled_res3 = _mm_loadu_si128((__m128i *) (&pi2_upsampled_res[3 * i4_upsampled_res_stride])); + + src_r0 = _mm_sub_epi16(src_r0, upsampled_res0); + src_r1 = _mm_sub_epi16(src_r1, upsampled_res1); + src_r2 = _mm_sub_epi16(src_r2, upsampled_res2); + src_r3 = _mm_sub_epi16(src_r3, upsampled_res3); + + src_r1 = _mm_unpacklo_epi16(src_r0, src_r1); + src_r3 = _mm_unpacklo_epi16(src_r2, src_r3); + + /* Saturate all values < -255 to -255 and retain the rest as it is */ + src_r1 = _mm_max_epi16(src_r1, neg_255_8x16b); + /* Saturate all values > 255 to 255 and retain the rest as it is */ + temp0 = _mm_min_epi16(src_r1, pos_255_8x16b); + + /* Saturate all values < -255 to -255 and retain the rest as it is */ + src_r3 = _mm_max_epi16(src_r3, neg_255_8x16b); + /* Saturate all values > 255 to 255 and retain the rest as it is */ + temp2 = _mm_min_epi16(src_r3, pos_255_8x16b); + + /* Perform Forward transform */ + /*-------------------------------------------------------------*/ + /* DCT [ Horizontal transformation ] */ + /*-------------------------------------------------------------*/ + // Matrix transpose + /* + * a0 a1 a2 a3 + * b0 b1 b2 b3 + * c0 c1 c2 c3 + * d0 d1 d2 d3 + */ + temp1 = _mm_unpacklo_epi32(temp0, temp2); + /* a2 b2 c2 d2 a3 b3 c3 d3 */ + temp3 = _mm_unpackhi_epi32(temp0, temp2); + + /* a0 b0 c0 d0 */ + src_r0 = _mm_unpacklo_epi64(temp1, zero_8x16b); + /* a1 b1 c1 d1 */ + src_r1 = _mm_unpackhi_epi64(temp1, zero_8x16b); + /* a2 b2 c2 d2 */ + src_r2 = _mm_unpacklo_epi64(temp3, zero_8x16b); + /* a3 b3 c3 d3 */ + src_r3 = _mm_unpackhi_epi64(temp3, zero_8x16b); + + /*----------------------------------------------------------*/ + /* x0 = z0 + z3 */ + temp0 = _mm_add_epi16(src_r0, src_r3); + /* x1 = z1 + z2 */ + temp1 = _mm_add_epi16(src_r1, src_r2); + /* x2 = z1 - z2 */ + temp2 = _mm_sub_epi16(src_r1, src_r2); + /* x3 = z0 - z3 */ + temp3 = _mm_sub_epi16(src_r0, src_r3); + + /* z0 = x0 + x1 */ + src_r0 = _mm_add_epi16(temp0, temp1); + /* z1 = (x3 << 1) + x2 */ + src_r1 = _mm_slli_epi16(temp3, 1); + src_r1 = _mm_add_epi16(src_r1, temp2); + /* z2 = x0 - x1 */ + src_r2 = _mm_sub_epi16(temp0, temp1); + /* z3 = x3 - (x2 << 1) */ + src_r3 = _mm_slli_epi16(temp2, 1); + src_r3 = _mm_sub_epi16(temp3, src_r3); + + // Matrix transpose + /* + * a0 b0 c0 d0 + * a1 b1 c1 d1 + * a2 b2 c2 d2 + * a3 b3 c3 d3 + */ + /* a0 a1 b0 b1 c0 c1 d0 d1 */ + temp0 = _mm_unpacklo_epi16(src_r0, src_r1); + /* a2 a3 b2 b3 c2 c3 d2 d3 */ + temp2 = _mm_unpacklo_epi16(src_r2, src_r3); + /* a0 a1 a2 a3 b0 b1 b2 b3 */ + temp1 = _mm_unpacklo_epi32(temp0, temp2); + /* c0 c1 c2 c3 d0 d1 d2 d3 */ + temp3 = _mm_unpackhi_epi32(temp0, temp2); + + /* a0 a1 a2 a3 */ + src_r0 = _mm_unpacklo_epi64(temp1, zero_8x16b); + /* b0 b1 b2 b3 */ + src_r1 = _mm_unpackhi_epi64(temp1, zero_8x16b); + /* c0 c1 c2 c3 */ + src_r2 = _mm_unpacklo_epi64(temp3, zero_8x16b); + /* d0 d1 d2 d3 */ + src_r3 = _mm_unpackhi_epi64(temp3, zero_8x16b); + + /*----------------------------------------------------------*/ + /* x0 = z0 + z3 */ + temp0 = _mm_add_epi16(src_r0, src_r3); + /* x1 = z1 + z2 */ + temp1 = _mm_add_epi16(src_r1, src_r2); + /* x2 = z1 - z2 */ + temp2 = _mm_sub_epi16(src_r1, src_r2); + /* x3 = z0 - z3 */ + temp3 = _mm_sub_epi16(src_r0, src_r3); + + /* z0 = x0 + x1 */ + src_r0 = _mm_add_epi16(temp0, temp1); + /* z1 = (x3 << 1) + x2 */ + src_r1 = _mm_slli_epi16(temp3, 1); + src_r1 = _mm_add_epi16(src_r1, temp2); + /* z2 = x0 - x1 */ + src_r2 = _mm_sub_epi16(temp0, temp1); + /* z3 = x3 - (x2 << 1) */ + src_r3 = _mm_slli_epi16(temp2, 1); + src_r3 = _mm_sub_epi16(temp3, src_r3); + + /* get the first 16 bits from the register */ + tmp_dc = _mm_extract_epi16(src_r0, 0); + *pi2_dc_out = tmp_dc; + + /* a0 a1 a2 a3 b0 b1 b2 b3 */ + src_r0 = _mm_unpacklo_epi64(src_r0, src_r1); + /* c0 c1 c2 c3 d0 d1 d2 d3 */ + src_r2 = _mm_unpacklo_epi64(src_r2, src_r3); + sign_reg0 = _mm_cmpgt_epi16(zero_8x16b, src_r0); + sign_reg2 = _mm_cmpgt_epi16(zero_8x16b, src_r2); + + sign_reg0 = _mm_mullo_epi16(temp_2, sign_reg0); + sign_reg2 = _mm_mullo_epi16(temp_2, sign_reg2); + + sign_reg0 = _mm_add_epi16(temp_1, sign_reg0); + sign_reg2 = _mm_add_epi16(temp_1, sign_reg2); + + src_r0 = _mm_abs_epi16(src_r0); + src_r2 = _mm_abs_epi16(src_r2); + + threshold_mask_r0_r1 = _mm_cmpgt_epi16(threshold_r0_r1, src_r0); + threshold_mask_r2_r3 = _mm_cmpgt_epi16(threshold_r2_r3, src_r2); + + src_r1 = _mm_srli_si128(src_r0, 8); + src_r0 = _mm_cvtepu16_epi32(src_r0); + src_r1 = _mm_cvtepu16_epi32(src_r1); + src_r3 = _mm_srli_si128(src_r2, 8); + src_r2 = _mm_cvtepu16_epi32(src_r2); + src_r3 = _mm_cvtepu16_epi32(src_r3); + + temp0 = _mm_cvtepu16_epi32(scalemat_r0_r1); + scalemat_r0_r1 = _mm_srli_si128(scalemat_r0_r1, 8); + temp2 = _mm_cvtepu16_epi32(scalemat_r2_r3); + scalemat_r2_r3 = _mm_srli_si128(scalemat_r2_r3, 8); + temp1 = _mm_cvtepu16_epi32(scalemat_r0_r1); + temp3 = _mm_cvtepu16_epi32(scalemat_r2_r3); + + temp0 = _mm_mullo_epi32(temp0, src_r0); + temp1 = _mm_mullo_epi32(temp1, src_r1); + temp2 = _mm_mullo_epi32(temp2, src_r2); + temp3 = _mm_mullo_epi32(temp3, src_r3); + + temp0 = _mm_add_epi32(temp0, rnd_fact); + temp1 = _mm_add_epi32(temp1, rnd_fact); + temp2 = _mm_add_epi32(temp2, rnd_fact); + temp3 = _mm_add_epi32(temp3, rnd_fact); + + temp0 = _mm_srli_epi32(temp0, u4_qbits); + temp1 = _mm_srli_epi32(temp1, u4_qbits); + temp2 = _mm_srli_epi32(temp2, u4_qbits); + temp3 = _mm_srli_epi32(temp3, u4_qbits); + + temp0 = _mm_packs_epi32(temp0, temp1); + temp2 = _mm_packs_epi32(temp2, temp3); + + temp0 = _mm_sign_epi16(temp0, sign_reg0); + temp2 = _mm_sign_epi16(temp2, sign_reg2); + + temp0 = _mm_andnot_si128(threshold_mask_r0_r1, temp0); + temp2 = _mm_andnot_si128(threshold_mask_r2_r3, temp2); + + _mm_storeu_si128((__m128i *) (&pi2_out[0]), temp0); + _mm_storeu_si128((__m128i *) (&pi2_out[8]), temp2); + + cmp0 = _mm_cmpeq_epi16(temp0, zero_8x16b); + cmp1 = _mm_cmpeq_epi16(temp2, zero_8x16b); + + mask0 = _mm_movemask_epi8(cmp0); + mask1 = _mm_movemask_epi8(cmp1); + u4_zero_coeff = 0; + if(mask0) + { + if(mask0 == 0xffff) + u4_zero_coeff += 8; + else + { + cmp0 = _mm_and_si128(temp_1, cmp0); + sum0 = _mm_hadd_epi16(cmp0, zero_8x16b); + sum1 = _mm_hadd_epi16(sum0, zero_8x16b); + sum2 = _mm_hadd_epi16(sum1, zero_8x16b); + u4_zero_coeff += _mm_cvtsi128_si32(sum2); + } + } + if(mask1) + { + if(mask1 == 0xffff) + u4_zero_coeff += 8; + else + { + cmp1 = _mm_and_si128(temp_1, cmp1); + sum0 = _mm_hadd_epi16(cmp1, zero_8x16b); + sum1 = _mm_hadd_epi16(sum0, zero_8x16b); + sum2 = _mm_hadd_epi16(sum1, zero_8x16b); + u4_zero_coeff += _mm_cvtsi128_si32(sum2); + } + } + + /* Return total nonzero coefficients in the current sub block */ + u4_nonzero_coeff = 16 - u4_zero_coeff; + *pu1_nnz = u4_nonzero_coeff; +} + +/** + + * ******************************************************************************* + + * * + * @brief + * This function performs forward hadamard transform and + * quantization on a 4*4 + *block + * + * @par Description: + * The function + * accepts source buffer and estimation buffer. From these, it + * computes the + * residue. This is residue is then transformed and quantized. + * The + * transform and quantization are in placed computed. They use the residue + * + * buffer for this. + * + * @param[in] pu1_src + * Pointer to source sub-block + + * * + * @param[in] pu1_pred + * Pointer to prediction sub-block + * + * + * @param[in] pi2_out + * Pointer to residual sub-block + * + * @param[in] + * i4_src_stride + * Source stride + * + * @param[in] i4_pred_stride + * + * Prediction stride + * + * @param[in] dst_strd + * Destination stride + * + * + * @param[in] u4_qbits + * QP_BITS_h264_4x4 + floor(QP/6) + * + * @param[in] + * pu2_threshold_matrix + * Pointer to Forward Quant Threshold Matrix + * + * + * @param[in] pu2_scale_matrix + * Pointer to Forward Quant Scale Matrix + * + * + * @param[in] u4_round_factor + * Quantization Round factor + * + * @param[out] + * pu1_nnz + * Total non-zero coefficients in the current sub-block + * + * + * @returns + * + * @remarks + * None + * + */ + +void isvc_hadamard_quant_4x4_sse42(WORD16 *pi2_src, WORD16 *pi2_dst, + resi_trans_quant_constants_t *ps_quant_constants, + UWORD8 *pu1_nnz) +{ + const UWORD16 *pu2_scale_matrix = ps_quant_constants->pu2_scale_matrix; + const UWORD16 *pu2_threshold_matrix = ps_quant_constants->pu2_threshold_matrix; + UWORD32 u4_qbits = ps_quant_constants->u4_qbits; + UWORD32 u4_round_factor = ps_quant_constants->u4_round_factor; + WORD32 u4_zero_coeff, u4_nonzero_coeff = 0; + __m128i cmp0, cmp1, sum0, sum1, sum2; + WORD32 mask0, mask1; + __m128i src_r0_r1, src_r2_r3, sign_reg; + __m128i src_r0, src_r1, src_r2, src_r3; + __m128i zero_8x16b = _mm_setzero_si128(); + __m128i temp0, temp1, temp2, temp3; + __m128i sign_reg0, sign_reg1, sign_reg2, sign_reg3; + __m128i temp_1 = _mm_set1_epi16(1); + __m128i rnd_fact = _mm_set1_epi32(u4_round_factor); + __m128i scale_val = _mm_set1_epi32(pu2_scale_matrix[0]); + + UNUSED(pu2_threshold_matrix); + + src_r0_r1 = _mm_loadu_si128((__m128i *) (pi2_src)); // a00 a01 a02 a03 a10 a11 a12 a13 -- the + // source matrix 0th,1st row + src_r2_r3 = _mm_loadu_si128((__m128i *) (pi2_src + 8)); // a20 a21 a22 a23 a30 a31 a32 a33 -- + // the source matrix 2nd,3rd row + sign_reg = _mm_cmpgt_epi16(zero_8x16b, src_r0_r1); + src_r0 = _mm_unpacklo_epi16(src_r0_r1, sign_reg); // a0 a1 a2 a3 + src_r1 = _mm_unpackhi_epi16(src_r0_r1, sign_reg); // b0 b1 b2 b3 + sign_reg = _mm_cmpgt_epi16(zero_8x16b, src_r2_r3); + src_r2 = _mm_unpacklo_epi16(src_r2_r3, sign_reg); // c0 c1 c2 c3 + src_r3 = _mm_unpackhi_epi16(src_r2_r3, sign_reg); // d0 d1 d2 d3 + + /* Perform Inverse transform */ + /*-------------------------------------------------------------*/ + /* Forward DC transform [ Horizontal transformation ] */ + /*-------------------------------------------------------------*/ + // Matrix transpose + /* + * a0 a1 a2 a3 + * b0 b1 b2 b3 + * c0 c1 c2 c3 + * d0 d1 d2 d3 + */ + temp0 = _mm_unpacklo_epi32(src_r0, src_r1); // a0 b0 a1 b1 + temp2 = _mm_unpacklo_epi32(src_r2, src_r3); // c0 d0 c1 d1 + temp1 = _mm_unpackhi_epi32(src_r0, src_r1); // a2 b2 a3 b3 + temp3 = _mm_unpackhi_epi32(src_r2, src_r3); // c2 d2 c3 d3 + src_r0 = _mm_unpacklo_epi64(temp0, temp2); // a0 b0 c0 d0 + src_r1 = _mm_unpackhi_epi64(temp0, temp2); // a1 b1 c1 d1 + src_r2 = _mm_unpacklo_epi64(temp1, temp3); // a2 b2 c2 d2 + src_r3 = _mm_unpackhi_epi64(temp1, temp3); // a3 b3 c3 d3 + + temp0 = _mm_add_epi32(src_r0, src_r3); + temp1 = _mm_add_epi32(src_r1, src_r2); + temp2 = _mm_sub_epi32(src_r1, src_r2); + temp3 = _mm_sub_epi32(src_r0, src_r3); + + src_r0 = _mm_add_epi32(temp0, temp1); + src_r1 = _mm_add_epi32(temp2, temp3); + src_r2 = _mm_sub_epi32(temp0, temp1); + src_r3 = _mm_sub_epi32(temp3, temp2); + + /*-------------------------------------------------------------*/ + /* Forward DC transform [ Vertical transformation ] */ + /*-------------------------------------------------------------*/ + // Matrix transpose + /* + * a0 b0 c0 d0 + * a1 b1 c1 d1 + * a2 b2 c2 d2 + * a3 b3 c3 d3 + */ + temp0 = _mm_unpacklo_epi32(src_r0, src_r1); // a0 a1 b0 b1 + temp2 = _mm_unpacklo_epi32(src_r2, src_r3); // a2 a3 b2 b3 + temp1 = _mm_unpackhi_epi32(src_r0, src_r1); // c0 c1 d0 d1 + temp3 = _mm_unpackhi_epi32(src_r2, src_r3); // c2 c3 d2 d3 + src_r0 = _mm_unpacklo_epi64(temp0, temp2); // a0 a1 a2 a3 + src_r1 = _mm_unpackhi_epi64(temp0, temp2); // b0 b1 b2 b3 + src_r2 = _mm_unpacklo_epi64(temp1, temp3); // c0 c1 c2 c3 + src_r3 = _mm_unpackhi_epi64(temp1, temp3); // d0 d1 d2 d3 + + temp0 = _mm_add_epi32(src_r0, src_r3); + temp1 = _mm_add_epi32(src_r1, src_r2); + temp2 = _mm_sub_epi32(src_r1, src_r2); + temp3 = _mm_sub_epi32(src_r0, src_r3); + + src_r0 = _mm_add_epi32(temp0, temp1); + src_r1 = _mm_add_epi32(temp2, temp3); + src_r2 = _mm_sub_epi32(temp0, temp1); + src_r3 = _mm_sub_epi32(temp3, temp2); + + src_r0 = _mm_srai_epi32(src_r0, 1); + src_r1 = _mm_srai_epi32(src_r1, 1); + src_r2 = _mm_srai_epi32(src_r2, 1); + src_r3 = _mm_srai_epi32(src_r3, 1); + + // Quantization + sign_reg0 = + _mm_cmpgt_epi32(zero_8x16b, src_r0); // Find sign of each value for later restoration + sign_reg1 = _mm_cmpgt_epi32(zero_8x16b, src_r1); + sign_reg2 = _mm_cmpgt_epi32(zero_8x16b, src_r2); + sign_reg3 = _mm_cmpgt_epi32(zero_8x16b, src_r3); + + sign_reg0 = _mm_packs_epi32(sign_reg0, + sign_reg1); // Sign = -1 or 0 depending on <0 or >0 respectively + sign_reg2 = _mm_packs_epi32(sign_reg2, sign_reg3); + + sign_reg0 = _mm_slli_epi16(sign_reg0, 1); // Sign = -2 or 0 depending on <0 or >0 respectively + sign_reg2 = _mm_slli_epi16(sign_reg2, 1); + + sign_reg0 = + _mm_add_epi16(temp_1, sign_reg0); // Sign = -1 or 1 depending on <0 or >0 respectively + sign_reg2 = _mm_add_epi16(temp_1, sign_reg2); + + src_r0 = _mm_abs_epi32(src_r0); // Absolute values + src_r1 = _mm_abs_epi32(src_r1); + src_r2 = _mm_abs_epi32(src_r2); + src_r3 = _mm_abs_epi32(src_r3); + + temp0 = _mm_mullo_epi32(scale_val, src_r0); // multiply by + // pu2_scale_matrix[0] + temp1 = _mm_mullo_epi32(scale_val, src_r1); + temp2 = _mm_mullo_epi32(scale_val, src_r2); + temp3 = _mm_mullo_epi32(scale_val, src_r3); + + temp0 = _mm_add_epi32(temp0, rnd_fact); // Add round factor + temp1 = _mm_add_epi32(temp1, rnd_fact); + temp2 = _mm_add_epi32(temp2, rnd_fact); + temp3 = _mm_add_epi32(temp3, rnd_fact); + + temp0 = _mm_srli_epi32(temp0, + u4_qbits); // RIght shift by qbits, unsigned variable, + // so shift right immediate works + temp1 = _mm_srli_epi32(temp1, u4_qbits); + temp2 = _mm_srli_epi32(temp2, u4_qbits); + temp3 = _mm_srli_epi32(temp3, u4_qbits); + + temp0 = _mm_packs_epi32(temp0, temp1); // Final values are 16-bits only. + temp2 = _mm_packs_epi32(temp2, temp3); + + temp0 = _mm_sign_epi16(temp0, sign_reg0); // Sign restoration + temp2 = _mm_sign_epi16(temp2, sign_reg2); + + _mm_storeu_si128((__m128i *) (&pi2_dst[0]), temp0); + _mm_storeu_si128((__m128i *) (&pi2_dst[8]), temp2); + + cmp0 = _mm_cmpeq_epi16(temp0, zero_8x16b); + cmp1 = _mm_cmpeq_epi16(temp2, zero_8x16b); + + mask0 = _mm_movemask_epi8(cmp0); + mask1 = _mm_movemask_epi8(cmp1); + u4_zero_coeff = 0; + if(mask0) + { + if(mask0 == 0xffff) + u4_zero_coeff += 8; + else + { + cmp0 = _mm_and_si128(temp_1, cmp0); + sum0 = _mm_hadd_epi16(cmp0, zero_8x16b); + sum1 = _mm_hadd_epi16(sum0, zero_8x16b); + sum2 = _mm_hadd_epi16(sum1, zero_8x16b); + u4_zero_coeff += _mm_cvtsi128_si32(sum2); + } + } + if(mask1) + { + if(mask1 == 0xffff) + u4_zero_coeff += 8; + else + { + cmp1 = _mm_and_si128(temp_1, cmp1); + sum0 = _mm_hadd_epi16(cmp1, zero_8x16b); + sum1 = _mm_hadd_epi16(sum0, zero_8x16b); + sum2 = _mm_hadd_epi16(sum1, zero_8x16b); + u4_zero_coeff += _mm_cvtsi128_si32(sum2); + } + } + + /* Return total nonzero coefficients in the current sub block */ + u4_nonzero_coeff = 16 - u4_zero_coeff; + pu1_nnz[0] = u4_nonzero_coeff; +} + +/** + + * ******************************************************************************* + + * * + * @brief + * This function performs forward hadamard transform and + * quantization on a 2*2 + *block for both U and V planes + * + * @par + * Description: + * The function accepts source buffer and estimation buffer. + * From these, it + * computes the residue. This is residue is then transformed + * and quantized. + * The transform and quantization are in placed computed. + * They use the residue + * buffer for this. + * + * @param[in] pu1_src + * + * Pointer to source sub-block + * + * @param[in] pu1_pred + * Pointer to + * prediction sub-block + * + * @param[in] pi2_out + * Pointer to residual + * sub-block + * + * @param[in] i4_src_stride + * Source stride + * + * @param[in] + * i4_pred_stride + * Prediction stride + * + * @param[in] dst_strd + * + * Destination stride + * + * @param[in] u4_qbits + * QP_BITS_h264_4x4 + + * floor(QP/6) + * + * @param[in] pu2_threshold_matrix + * Pointer to Forward + * Quant Threshold Matrix + * + * @param[in] pu2_scale_matrix + * Pointer to + * Forward Quant Scale Matrix + * + * @param[in] u4_round_factor + * Quantization + * Round factor + * + * @param[out] pu1_nnz + * Total non-zero coefficients in + * the current sub-block + * + * @returns + * + * @remarks + * NNZ for dc is + * populated at 0 and 5th position of pu1_nnz + * + */ + +void isvc_hadamard_quant_2x2_uv_sse42(WORD16 *pi2_src, WORD16 *pi2_dst, + resi_trans_quant_constants_t *ps_quant_constants, + UWORD8 *pu1_nnz) +{ + const UWORD16 *pu2_scale_matrix = ps_quant_constants->pu2_scale_matrix; + const UWORD16 *pu2_threshold_matrix = ps_quant_constants->pu2_threshold_matrix; + UWORD32 u4_qbits = ps_quant_constants->u4_qbits; + UWORD32 u4_round_factor = ps_quant_constants->u4_round_factor; + WORD32 val, nonzero_coeff_0 = 0, nonzero_coeff_1 = 0; + __m128i cmp, cmp0, cmp1; + __m128i sum0, sum1; + WORD32 mask, mask0, mask1; + __m128i src, plane_0, plane_1, temp0, temp1, sign_reg; + __m128i zero_8x16b = _mm_setzero_si128(); + __m128i scale_val = _mm_set1_epi32(pu2_scale_matrix[0]); + __m128i sign_reg0, sign_reg1; + __m128i temp_1 = _mm_set1_epi16(1); + __m128i rnd_fact = _mm_set1_epi32(u4_round_factor); + + UNUSED(pu2_threshold_matrix); + + src = _mm_loadu_si128((__m128i *) pi2_src); // a0 a1 a2 a3 b0 b1 b2 b3 + sign_reg = _mm_cmpgt_epi16(zero_8x16b, src); + plane_0 = _mm_unpacklo_epi16(src, sign_reg); // a0 a1 a2 a3 -- 32 bits + plane_1 = _mm_unpackhi_epi16(src, sign_reg); // b0 b1 b2 b3 -- 32 bits + + temp0 = _mm_hadd_epi32(plane_0, plane_1); // a0+a1 a2+a3 b0+b1 b2+b3 + temp1 = _mm_hsub_epi32(plane_0, plane_1); // a0-a1 a2-a3 b0-b1 b2-b3 + + plane_0 = _mm_hadd_epi32(temp0, temp1); // a0+a1+a2+a3 b0+b1+b2+b3 a0-a1+a2-a3 b0-b1+b2-b3 + plane_1 = _mm_hsub_epi32(temp0, temp1); // a0+a1-a2-a3 b0+b1-b2-b3 a0-a1-a2+a3 b0-b1-b2+b3 + + temp0 = + _mm_unpacklo_epi32(plane_0, plane_1); // a0+a1+a2+a3 a0+a1-a2-a3 b0+b1+b2+b3 b0+b1-b2-b3 + temp1 = + _mm_unpackhi_epi32(plane_0, plane_1); // a0-a1+a2-a3 a0-a1-a2+a3 b0-b1+b2-b3 b0-b1-b2+b3 + + plane_0 = _mm_unpacklo_epi64(temp0, temp1); // a0+a1+a2+a3 a0+a1-a2-a3 a0-a1+a2-a3 a0-a1-a2+a3 + plane_1 = _mm_unpackhi_epi64(temp0, temp1); // b0+b1+b2+b3 b0+b1-b2-b3 b0-b1+b2-b3 b0-b1-b2+b3 + + plane_0 = _mm_shuffle_epi32(plane_0, 0xd8); // a0+a1+a2+a3 a0-a1+a2-a3 a0+a1-a2-a3 a0-a1-a2+a3 + plane_1 = _mm_shuffle_epi32(plane_1, 0xd8); // b0+b1+b2+b3 b0-b1+b2-b3 b0+b1-b2-b3 b0-b1-b2+b3 + // Quantization + sign_reg0 = + _mm_cmpgt_epi32(zero_8x16b, plane_0); // Find sign of each value for later restoration + sign_reg1 = _mm_cmpgt_epi32(zero_8x16b, plane_1); + + sign_reg0 = _mm_packs_epi32(sign_reg0, + sign_reg1); // Sign = -1 or 0 depending on <0 or >0 respectively + sign_reg0 = _mm_slli_epi16(sign_reg0, 1); // Sign = -2 or 0 depending on <0 or >0 respectively + sign_reg0 = + _mm_add_epi16(temp_1, sign_reg0); // Sign = -1 or 1 depending on <0 or >0 respectively + + plane_0 = _mm_abs_epi32(plane_0); // Absolute values + plane_1 = _mm_abs_epi32(plane_1); + + temp0 = _mm_mullo_epi32(scale_val, plane_0); // multiply by pu2_scale_matrix[0] + temp1 = _mm_mullo_epi32(scale_val, plane_1); // multiply by pu2_scale_matrix[0] + + temp0 = _mm_add_epi32(temp0, rnd_fact); // Add round factor + temp1 = _mm_add_epi32(temp1, rnd_fact); + + temp0 = _mm_srli_epi32(temp0, + u4_qbits); // RIght shift by qbits, unsigned variable, + // so shift right immediate works + temp1 = _mm_srli_epi32(temp1, u4_qbits); + + temp0 = _mm_packs_epi32(temp0, temp1); // Final values are 16-bits only. + temp0 = _mm_sign_epi16(temp0, sign_reg0); // Sign restoration + + _mm_storeu_si128((__m128i *) (&pi2_dst[0]), temp0); + + cmp = _mm_cmpeq_epi16(temp0, zero_8x16b); + mask = _mm_movemask_epi8(cmp); + mask0 = mask & 0xff; + mask1 = mask >> 8; + if(mask0) + { + if(mask0 == 0xff) + nonzero_coeff_0 += 4; + else + { + cmp0 = _mm_and_si128(temp_1, cmp); + sum0 = _mm_hadd_epi16(cmp0, zero_8x16b); + sum1 = _mm_hadd_epi16(sum0, zero_8x16b); + val = _mm_cvtsi128_si32(sum1); + val = val & 0xffff; + nonzero_coeff_0 += val; + } + } + if(mask1) + { + if(mask1 == 0xff) + nonzero_coeff_1 += 4; + else + { + cmp1 = _mm_srli_si128(cmp, 8); + cmp1 = _mm_and_si128(temp_1, cmp1); + sum0 = _mm_hadd_epi16(cmp1, zero_8x16b); + sum1 = _mm_hadd_epi16(sum0, zero_8x16b); + nonzero_coeff_1 += _mm_cvtsi128_si32(sum1); + } + } + + pu1_nnz[0] = 4 - nonzero_coeff_0; + pu1_nnz[1] = 4 - nonzero_coeff_1; +} diff --git a/encoder/arm/svc/isvce_downscaler_neon.c b/encoder/arm/svc/isvce_downscaler_neon.c new file mode 100644 index 0000000..9f9bef4 --- /dev/null +++ b/encoder/arm/svc/isvce_downscaler_neon.c @@ -0,0 +1,927 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** +****************************************************************************** +* @file ih264e_downscaler_neon.c +* +* @brief +* This file contains the ARMV8 SIMD version of the function which does +* horizontal scaling and transpose +* +* @author +* Ittiam +* +* @par List of Functions: +* - ih264e_horizontal_downscale_and_transpose_av8() +* +* @remarks +* None +* +******************************************************************************* +*/ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System include files */ +#include +#include +#include + +/* User include files */ +#include "ih264_typedefs.h" +#include "ih264_macros.h" +#include "ih264_platform_macros.h" +#include "isvc_defs.h" +#include "isvce_defs.h" +#include "isvc_structs.h" +#include "isvce_downscaler_private_defs.h" + +void isvce_horizontal_downscale_and_transpose_neon( + downscaler_ctxt_t *ps_scaler, buffer_container_t *ps_src, buffer_container_t *ps_dst, + FILTER_COEFF_ARRAY pai1_filters, UWORD32 u4_blk_wd, UWORD32 u4_blk_ht, UWORD8 u1_is_chroma) +{ + WORD32 i, j; + UWORD8 u1_phase; + UWORD8 *pu1_src_j, *pu1_dst_j; + UWORD8 *pu1_in_pixel; + UWORD8 *pu1_out_pixel; + WORD8 *pi1_filter_grid; + UWORD16 u2_full_pixel_inc; + UWORD32 u4_num_iterations_vertical_by_16, u4_num_iterations_vertical_by_8; + UWORD32 u4_rem_vert_loop_by_8, u4_rem_vert_loop_by_4; + UWORD32 u4_rem_vert_loop; + UWORD32 u4_height_finished; + + uint8x8_t reg_8x8_src_r0, reg_8x8_src_r1, reg_8x8_src_r2, reg_8x8_src_r3, reg_8x8_src_r4, + reg_8x8_src_r5, reg_8x8_src_r6, reg_8x8_src_r7; + + uint16x8_t reg_16x8_src_r0, reg_16x8_src_r1, reg_16x8_src_r2, reg_16x8_src_r3, reg_16x8_src_r4, + reg_16x8_src_r5, reg_16x8_src_r6, reg_16x8_src_r7; + + int16x8_t reg_16x8_mul_r0, reg_16x8_mul_r1, reg_16x8_mul_r2, reg_16x8_mul_r3, reg_16x8_mul_r4, + reg_16x8_mul_r5, reg_16x8_mul_r6, reg_16x8_mul_r7; + + int32x4_t reg_32x4_sum_r0, reg_32x4_sum_r1, reg_32x4_sum_r2, reg_32x4_sum_r3, reg_32x4_sum_r4, + reg_32x4_sum_r5, reg_32x4_sum_r6, reg_32x4_sum_r7; + + int32x4_t reg_32x4_sum_r01, reg_32x4_sum_r23, reg_32x4_sum_r45, reg_32x4_sum_r67, + reg_32x4_sum_r89, reg_32x4_sum_r1011, reg_32x4_sum_r1213, reg_32x4_sum_r1415; + + uint8x8_t reg_8x8_src_r8, reg_8x8_src_r9, reg_8x8_src_r10, reg_8x8_src_r11, reg_8x8_src_r12, + reg_8x8_src_r13, reg_8x8_src_r14, reg_8x8_src_r15; + + uint16x8_t reg_16x8_src_r8, reg_16x8_src_r9, reg_16x8_src_r10, reg_16x8_src_r11, + reg_16x8_src_r12, reg_16x8_src_r13, reg_16x8_src_r14, reg_16x8_src_r15; + + int16x8_t reg_16x8_mul_r8, reg_16x8_mul_r9, reg_16x8_mul_r10, reg_16x8_mul_r11, + reg_16x8_mul_r12, reg_16x8_mul_r13, reg_16x8_mul_r14, reg_16x8_mul_r15; + + int32x4_t reg_32x4_sum_r8, reg_32x4_sum_r9, reg_32x4_sum_r10, reg_32x4_sum_r11, + reg_32x4_sum_r12, reg_32x4_sum_r13, reg_32x4_sum_r14, reg_32x4_sum_r15; + + uint8x16_t reg_8x16_src_r0, reg_8x16_src_r1, reg_8x16_src_r2, reg_8x16_src_r3, reg_8x16_src_r4, + reg_8x16_src_r5, reg_8x16_src_r6, reg_8x16_src_r7; + + uint16x8_t reg_16x8_src_cb_r0, reg_16x8_src_cb_r1, reg_16x8_src_cb_r2, reg_16x8_src_cb_r3, + reg_16x8_src_cb_r4, reg_16x8_src_cb_r5, reg_16x8_src_cb_r6, reg_16x8_src_cb_r7; + + uint16x8_t reg_16x8_src_cr_r0, reg_16x8_src_cr_r1, reg_16x8_src_cr_r2, reg_16x8_src_cr_r3, + reg_16x8_src_cr_r4, reg_16x8_src_cr_r5, reg_16x8_src_cr_r6, reg_16x8_src_cr_r7; + + int16x8_t reg_16x8_mul_cb_r0, reg_16x8_mul_cb_r1, reg_16x8_mul_cb_r2, reg_16x8_mul_cb_r3, + reg_16x8_mul_cb_r4, reg_16x8_mul_cb_r5, reg_16x8_mul_cb_r6, reg_16x8_mul_cb_r7; + + int16x8_t reg_16x8_mul_cr_r0, reg_16x8_mul_cr_r1, reg_16x8_mul_cr_r2, reg_16x8_mul_cr_r3, + reg_16x8_mul_cr_r4, reg_16x8_mul_cr_r5, reg_16x8_mul_cr_r6, reg_16x8_mul_cr_r7; + + int32x4_t reg_32x4_sum_cb_r0, reg_32x4_sum_cb_r1, reg_32x4_sum_cb_r2, reg_32x4_sum_cb_r3, + reg_32x4_sum_cb_r4, reg_32x4_sum_cb_r5, reg_32x4_sum_cb_r6, reg_32x4_sum_cb_r7; + + int32x4_t reg_32x4_sum_cr_r0, reg_32x4_sum_cr_r1, reg_32x4_sum_cr_r2, reg_32x4_sum_cr_r3, + reg_32x4_sum_cr_r4, reg_32x4_sum_cr_r5, reg_32x4_sum_cr_r6, reg_32x4_sum_cr_r7; + + int32x4_t reg_32x4_sum_cb_r01, reg_32x4_sum_cb_r23, reg_32x4_sum_cb_r45, reg_32x4_sum_cb_r67; + uint16x4_t reg_16x4_sum_cb_r01_23, reg_16x4_sum_cb_r45_67; + uint16x8_t reg_16x8_sum_cb_r0_r7; + uint8x8_t reg_8x8_sum_cb_r0_r7; + + int32x4_t reg_32x4_sum_cr_r01, reg_32x4_sum_cr_r23, reg_32x4_sum_cr_r45, reg_32x4_sum_cr_r67; + uint16x4_t reg_16x4_sum_cr_r01_23, reg_16x4_sum_cr_r45_67; + uint16x8_t reg_16x8_sum_cr_r0_r7; + uint8x8_t reg_8x8_sum_cr_r0_r7; + uint16x8_t reg_16x8_sum_cb_cr_r0_r3; + uint8x8_t reg_8x8_sum_cb_cr_r0_r3; + + int32x4_t reg_32x4_sum_cb_cr_r0; + uint16x4_t reg_16x4_sum_cb_cr_r0; + + int32x4_t reg_32x4_zero = vdupq_n_s32(0); + + uint16x4_t reg_16x4_sum_r01_23, reg_16x4_sum_r45_67; + uint16x4_t reg_16x4_sum_r8_r11, reg_16x4_sum_r12_r15; + uint16x8_t reg_16x8_sum_r0_r7, reg_16x8_sum_r8_r15; + uint8x8_t reg_8x8_sum_r0_r7, reg_8x8_sum_r8_r15; + uint8x16_t reg_8x16_sum_r0_r15; + int8x8_t reg_8x8_filt_coeff_grid; + int16x8_t reg_16x8_filt_coeff_grid; + int32x4x2_t reg_32x4x2_sum_r01, reg_32x4x2_sum_r23, reg_32x4x2_sum_r45, reg_32x4x2_sum_r67; + int32x4x2_t reg_32x4x2_sum_r89, reg_32x4x2_sum_r1011, reg_32x4x2_sum_r1213, + reg_32x4x2_sum_r1415; + uint8x16x2_t reg_8x16x2_src_r0, reg_8x16x2_src_r1, reg_8x16x2_src_r2, reg_8x16x2_src_r3; + + downscaler_state_t *ps_scaler_state = (downscaler_state_t *) ps_scaler->pv_scaler_state; + + UWORD32 u4_center_pixel_pos = ps_scaler_state->i4_init_offset; + UWORD32 u4_src_vert_increments = ps_scaler_state->u4_vert_increment; + UWORD32 u4_src_horz_increments = ps_scaler_state->u4_horz_increment; + UWORD8 *pu1_src = (UWORD8 *) ps_src->pv_data; + UWORD32 u4_in_stride = ps_src->i4_data_stride; + UWORD8 *pu1_dst = (UWORD8 *) ps_dst->pv_data; + UWORD32 u4_out_stride = ps_dst->i4_data_stride; + UWORD32 u4_center_pixel_pos_src = u4_center_pixel_pos; + + /* Offset the input so that the input pixel to be processed + co-incides with the centre of filter (4th coefficient)*/ + pu1_src += (1 + u1_is_chroma); + + ASSERT((1 << DOWNSCALER_Q) == u4_src_vert_increments); + + if(!u1_is_chroma) + { + u4_num_iterations_vertical_by_16 = u4_blk_ht >> 4; + u4_rem_vert_loop = u4_blk_ht % 16; + + for(j = 0; j < (WORD32) u4_num_iterations_vertical_by_16; j++) + { + pu1_src_j = pu1_src + ((j << 4) * u4_in_stride); + pu1_dst_j = pu1_dst + (j << 4); + + u4_center_pixel_pos = u4_center_pixel_pos_src; + + for(i = 0; i < (WORD32) u4_blk_wd; i++) + { + u1_phase = get_filter_phase(u4_center_pixel_pos); + + pi1_filter_grid = pai1_filters[u1_phase]; + + /* Doing the Calculation for current Loop Count */ + u2_full_pixel_inc = u4_center_pixel_pos >> DOWNSCALER_Q; + + pu1_in_pixel = pu1_src_j + (u2_full_pixel_inc << u1_is_chroma); + + pu1_out_pixel = pu1_dst_j + ((i << u1_is_chroma) * u4_out_stride); + + reg_8x8_filt_coeff_grid = vld1_s8(pi1_filter_grid); + + /******************************************************/ + /* This loop is going vertically in bottom direction */ + /* but the output pixels are stored in horizontal */ + /* direction in transpose manner */ + /******************************************************/ + + /* r0-r7 */ + reg_8x8_src_r0 = vld1_u8(pu1_in_pixel); + reg_8x8_src_r1 = vld1_u8(pu1_in_pixel + u4_in_stride); + reg_8x8_src_r2 = vld1_u8(pu1_in_pixel + 2 * u4_in_stride); + reg_8x8_src_r3 = vld1_u8(pu1_in_pixel + 3 * u4_in_stride); + reg_8x8_src_r4 = vld1_u8(pu1_in_pixel + 4 * u4_in_stride); + reg_8x8_src_r5 = vld1_u8(pu1_in_pixel + 5 * u4_in_stride); + reg_8x8_src_r6 = vld1_u8(pu1_in_pixel + 6 * u4_in_stride); + reg_8x8_src_r7 = vld1_u8(pu1_in_pixel + 7 * u4_in_stride); + + /* r0-r7 */ + reg_16x8_src_r0 = vmovl_u8(reg_8x8_src_r0); + reg_16x8_src_r1 = vmovl_u8(reg_8x8_src_r1); + reg_16x8_src_r2 = vmovl_u8(reg_8x8_src_r2); + reg_16x8_src_r3 = vmovl_u8(reg_8x8_src_r3); + reg_16x8_src_r4 = vmovl_u8(reg_8x8_src_r4); + reg_16x8_src_r5 = vmovl_u8(reg_8x8_src_r5); + reg_16x8_src_r6 = vmovl_u8(reg_8x8_src_r6); + reg_16x8_src_r7 = vmovl_u8(reg_8x8_src_r7); + + /* r8-r15 */ + reg_8x8_src_r8 = vld1_u8(pu1_in_pixel + 8 * u4_in_stride); + reg_8x8_src_r9 = vld1_u8(pu1_in_pixel + 9 * u4_in_stride); + reg_8x8_src_r10 = vld1_u8(pu1_in_pixel + 10 * u4_in_stride); + reg_8x8_src_r11 = vld1_u8(pu1_in_pixel + 11 * u4_in_stride); + reg_8x8_src_r12 = vld1_u8(pu1_in_pixel + 12 * u4_in_stride); + reg_8x8_src_r13 = vld1_u8(pu1_in_pixel + 13 * u4_in_stride); + reg_8x8_src_r14 = vld1_u8(pu1_in_pixel + 14 * u4_in_stride); + reg_8x8_src_r15 = vld1_u8(pu1_in_pixel + 15 * u4_in_stride); + + reg_16x8_filt_coeff_grid = vmovl_s8(reg_8x8_filt_coeff_grid); + + /*r0-r7 */ + reg_16x8_mul_r0 = + vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r0), reg_16x8_filt_coeff_grid); + reg_16x8_mul_r1 = + vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r1), reg_16x8_filt_coeff_grid); + reg_16x8_mul_r2 = + vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r2), reg_16x8_filt_coeff_grid); + reg_16x8_mul_r3 = + vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r3), reg_16x8_filt_coeff_grid); + reg_16x8_mul_r4 = + vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r4), reg_16x8_filt_coeff_grid); + reg_16x8_mul_r5 = + vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r5), reg_16x8_filt_coeff_grid); + reg_16x8_mul_r6 = + vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r6), reg_16x8_filt_coeff_grid); + reg_16x8_mul_r7 = + vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r7), reg_16x8_filt_coeff_grid); + + /* r8-r15 */ + reg_16x8_src_r8 = vmovl_u8(reg_8x8_src_r8); + reg_16x8_src_r9 = vmovl_u8(reg_8x8_src_r9); + reg_16x8_src_r10 = vmovl_u8(reg_8x8_src_r10); + reg_16x8_src_r11 = vmovl_u8(reg_8x8_src_r11); + reg_16x8_src_r12 = vmovl_u8(reg_8x8_src_r12); + reg_16x8_src_r13 = vmovl_u8(reg_8x8_src_r13); + reg_16x8_src_r14 = vmovl_u8(reg_8x8_src_r14); + reg_16x8_src_r15 = vmovl_u8(reg_8x8_src_r15); + + /* r0-r7 */ + reg_32x4_sum_r0 = vpaddlq_s16(reg_16x8_mul_r0); + reg_32x4_sum_r1 = vpaddlq_s16(reg_16x8_mul_r1); + reg_32x4_sum_r2 = vpaddlq_s16(reg_16x8_mul_r2); + reg_32x4_sum_r3 = vpaddlq_s16(reg_16x8_mul_r3); + reg_32x4_sum_r4 = vpaddlq_s16(reg_16x8_mul_r4); + reg_32x4_sum_r5 = vpaddlq_s16(reg_16x8_mul_r5); + reg_32x4_sum_r6 = vpaddlq_s16(reg_16x8_mul_r6); + reg_32x4_sum_r7 = vpaddlq_s16(reg_16x8_mul_r7); + + /* r8-r15 */ + reg_16x8_mul_r8 = + vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r8), reg_16x8_filt_coeff_grid); + reg_16x8_mul_r9 = + vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r9), reg_16x8_filt_coeff_grid); + reg_16x8_mul_r10 = + vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r10), reg_16x8_filt_coeff_grid); + reg_16x8_mul_r11 = + vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r11), reg_16x8_filt_coeff_grid); + reg_16x8_mul_r12 = + vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r12), reg_16x8_filt_coeff_grid); + reg_16x8_mul_r13 = + vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r13), reg_16x8_filt_coeff_grid); + reg_16x8_mul_r14 = + vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r14), reg_16x8_filt_coeff_grid); + reg_16x8_mul_r15 = + vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r15), reg_16x8_filt_coeff_grid); + + /* r0-r7 */ + reg_32x4x2_sum_r01 = vuzpq_s32(reg_32x4_sum_r0, reg_32x4_sum_r1); + reg_32x4x2_sum_r23 = vuzpq_s32(reg_32x4_sum_r2, reg_32x4_sum_r3); + reg_32x4x2_sum_r45 = vuzpq_s32(reg_32x4_sum_r4, reg_32x4_sum_r5); + reg_32x4x2_sum_r67 = vuzpq_s32(reg_32x4_sum_r6, reg_32x4_sum_r7); + + reg_32x4_sum_r01 = vaddq_s32(reg_32x4x2_sum_r01.val[0], reg_32x4x2_sum_r01.val[1]); + reg_32x4_sum_r23 = vaddq_s32(reg_32x4x2_sum_r23.val[0], reg_32x4x2_sum_r23.val[1]); + reg_32x4_sum_r45 = vaddq_s32(reg_32x4x2_sum_r45.val[0], reg_32x4x2_sum_r45.val[1]); + reg_32x4_sum_r67 = vaddq_s32(reg_32x4x2_sum_r67.val[0], reg_32x4x2_sum_r67.val[1]); + + /* r8-r15 */ + reg_32x4_sum_r8 = vpaddlq_s16(reg_16x8_mul_r8); + reg_32x4_sum_r9 = vpaddlq_s16(reg_16x8_mul_r9); + reg_32x4_sum_r10 = vpaddlq_s16(reg_16x8_mul_r10); + reg_32x4_sum_r11 = vpaddlq_s16(reg_16x8_mul_r11); + reg_32x4_sum_r12 = vpaddlq_s16(reg_16x8_mul_r12); + reg_32x4_sum_r13 = vpaddlq_s16(reg_16x8_mul_r13); + reg_32x4_sum_r14 = vpaddlq_s16(reg_16x8_mul_r14); + reg_32x4_sum_r15 = vpaddlq_s16(reg_16x8_mul_r15); + + /* r0-r7 */ + reg_32x4x2_sum_r01 = vuzpq_s32(reg_32x4_sum_r01, reg_32x4_sum_r23); + reg_32x4x2_sum_r45 = vuzpq_s32(reg_32x4_sum_r45, reg_32x4_sum_r67); + reg_32x4_sum_r01 = vaddq_s32(reg_32x4x2_sum_r01.val[0], reg_32x4x2_sum_r01.val[1]); + reg_32x4_sum_r45 = vaddq_s32(reg_32x4x2_sum_r45.val[0], reg_32x4x2_sum_r45.val[1]); + + /* r8-r15 */ + reg_32x4x2_sum_r89 = vuzpq_s32(reg_32x4_sum_r8, reg_32x4_sum_r9); + reg_32x4x2_sum_r1011 = vuzpq_s32(reg_32x4_sum_r10, reg_32x4_sum_r11); + reg_32x4x2_sum_r1213 = vuzpq_s32(reg_32x4_sum_r12, reg_32x4_sum_r13); + reg_32x4x2_sum_r1415 = vuzpq_s32(reg_32x4_sum_r14, reg_32x4_sum_r15); + + reg_32x4_sum_r89 = vaddq_s32(reg_32x4x2_sum_r89.val[0], reg_32x4x2_sum_r89.val[1]); + reg_32x4_sum_r1011 = + vaddq_s32(reg_32x4x2_sum_r1011.val[0], reg_32x4x2_sum_r1011.val[1]); + reg_32x4_sum_r1213 = + vaddq_s32(reg_32x4x2_sum_r1213.val[0], reg_32x4x2_sum_r1213.val[1]); + reg_32x4_sum_r1415 = + vaddq_s32(reg_32x4x2_sum_r1415.val[0], reg_32x4x2_sum_r1415.val[1]); + + /* r0-r7 */ + reg_16x4_sum_r01_23 = vqrshrun_n_s32(reg_32x4_sum_r01, 7); + reg_16x4_sum_r45_67 = vqrshrun_n_s32(reg_32x4_sum_r45, 7); + + /* r8-r15 */ + reg_32x4x2_sum_r89 = vuzpq_s32(reg_32x4_sum_r89, reg_32x4_sum_r1011); + reg_32x4x2_sum_r1213 = vuzpq_s32(reg_32x4_sum_r1213, reg_32x4_sum_r1415); + reg_32x4_sum_r89 = vaddq_s32(reg_32x4x2_sum_r89.val[0], reg_32x4x2_sum_r89.val[1]); + reg_32x4_sum_r1213 = + vaddq_s32(reg_32x4x2_sum_r1213.val[0], reg_32x4x2_sum_r1213.val[1]); + + /* r0-r7 */ + reg_16x8_sum_r0_r7 = vcombine_u16(reg_16x4_sum_r01_23, reg_16x4_sum_r45_67); + reg_8x8_sum_r0_r7 = vqmovn_u16(reg_16x8_sum_r0_r7); + + reg_16x4_sum_r8_r11 = vqrshrun_n_s32(reg_32x4_sum_r89, 7); + reg_16x4_sum_r12_r15 = vqrshrun_n_s32(reg_32x4_sum_r1213, 7); + + reg_16x8_sum_r8_r15 = vcombine_u16(reg_16x4_sum_r8_r11, reg_16x4_sum_r12_r15); + reg_8x8_sum_r8_r15 = vqmovn_u16(reg_16x8_sum_r8_r15); + + reg_8x16_sum_r0_r15 = vcombine_u8(reg_8x8_sum_r0_r7, reg_8x8_sum_r8_r15); + + /* r0-r7 */ + vst1q_u8(pu1_out_pixel, reg_8x16_sum_r0_r15); + + pu1_out_pixel += 16; + pu1_in_pixel += (u4_src_vert_increments * (u4_in_stride << 4)) >> DOWNSCALER_Q; + + /* Update the context for next Loop Count */ + u4_center_pixel_pos += u4_src_horz_increments; + } + } + + /* Loop for the remaining height less than 16 */ + if(u4_rem_vert_loop) + { + u4_rem_vert_loop_by_8 = u4_rem_vert_loop >> 3; + u4_rem_vert_loop = u4_rem_vert_loop % 8; + + u4_height_finished = (u4_num_iterations_vertical_by_16 << 4); + + pu1_src_j = pu1_src + ((u4_height_finished) *u4_in_stride); + pu1_dst_j = pu1_dst + u4_height_finished; + + u4_center_pixel_pos = u4_center_pixel_pos_src; + + /* 8 <= remaining height < 16 */ + if(u4_rem_vert_loop_by_8) + { + for(i = 0; i < (WORD32) u4_blk_wd; i++) + { + u1_phase = get_filter_phase(u4_center_pixel_pos); + pi1_filter_grid = pai1_filters[u1_phase]; + + u2_full_pixel_inc = u4_center_pixel_pos >> DOWNSCALER_Q; + + pu1_in_pixel = pu1_src_j + (u2_full_pixel_inc << u1_is_chroma); + + pu1_out_pixel = pu1_dst_j + ((i << u1_is_chroma) * u4_out_stride); + + reg_8x8_filt_coeff_grid = vld1_s8(pi1_filter_grid); + + for(j = u4_rem_vert_loop_by_8; j > 0; j--) + { + /******************************************************/ + /* This loop is going vertically in bottom direction */ + /* but the output pixels are stored in horizontal */ + /* direction in transpose manner */ + /******************************************************/ + + reg_8x8_src_r0 = vld1_u8(pu1_in_pixel); + reg_8x8_src_r1 = vld1_u8(pu1_in_pixel + u4_in_stride); + reg_8x8_src_r2 = vld1_u8(pu1_in_pixel + 2 * u4_in_stride); + reg_8x8_src_r3 = vld1_u8(pu1_in_pixel + 3 * u4_in_stride); + reg_8x8_src_r4 = vld1_u8(pu1_in_pixel + 4 * u4_in_stride); + reg_8x8_src_r5 = vld1_u8(pu1_in_pixel + 5 * u4_in_stride); + reg_8x8_src_r6 = vld1_u8(pu1_in_pixel + 6 * u4_in_stride); + reg_8x8_src_r7 = vld1_u8(pu1_in_pixel + 7 * u4_in_stride); + + reg_16x8_src_r0 = vmovl_u8(reg_8x8_src_r0); + reg_16x8_src_r1 = vmovl_u8(reg_8x8_src_r1); + reg_16x8_src_r2 = vmovl_u8(reg_8x8_src_r2); + reg_16x8_src_r3 = vmovl_u8(reg_8x8_src_r3); + reg_16x8_src_r4 = vmovl_u8(reg_8x8_src_r4); + reg_16x8_src_r5 = vmovl_u8(reg_8x8_src_r5); + reg_16x8_src_r6 = vmovl_u8(reg_8x8_src_r6); + reg_16x8_src_r7 = vmovl_u8(reg_8x8_src_r7); + reg_16x8_filt_coeff_grid = vmovl_s8(reg_8x8_filt_coeff_grid); + + reg_16x8_mul_r0 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r0), + reg_16x8_filt_coeff_grid); + reg_16x8_mul_r1 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r1), + reg_16x8_filt_coeff_grid); + reg_16x8_mul_r2 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r2), + reg_16x8_filt_coeff_grid); + reg_16x8_mul_r3 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r3), + reg_16x8_filt_coeff_grid); + reg_16x8_mul_r4 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r4), + reg_16x8_filt_coeff_grid); + reg_16x8_mul_r5 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r5), + reg_16x8_filt_coeff_grid); + reg_16x8_mul_r6 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r6), + reg_16x8_filt_coeff_grid); + reg_16x8_mul_r7 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r7), + reg_16x8_filt_coeff_grid); + + reg_32x4_sum_r0 = vpaddlq_s16(reg_16x8_mul_r0); + reg_32x4_sum_r1 = vpaddlq_s16(reg_16x8_mul_r1); + reg_32x4_sum_r2 = vpaddlq_s16(reg_16x8_mul_r2); + reg_32x4_sum_r3 = vpaddlq_s16(reg_16x8_mul_r3); + reg_32x4_sum_r4 = vpaddlq_s16(reg_16x8_mul_r4); + reg_32x4_sum_r5 = vpaddlq_s16(reg_16x8_mul_r5); + reg_32x4_sum_r6 = vpaddlq_s16(reg_16x8_mul_r6); + reg_32x4_sum_r7 = vpaddlq_s16(reg_16x8_mul_r7); + + reg_32x4x2_sum_r01 = vuzpq_s32(reg_32x4_sum_r0, reg_32x4_sum_r1); + reg_32x4x2_sum_r23 = vuzpq_s32(reg_32x4_sum_r2, reg_32x4_sum_r3); + reg_32x4x2_sum_r45 = vuzpq_s32(reg_32x4_sum_r4, reg_32x4_sum_r5); + reg_32x4x2_sum_r67 = vuzpq_s32(reg_32x4_sum_r6, reg_32x4_sum_r7); + + reg_32x4_sum_r01 = + vaddq_s32(reg_32x4x2_sum_r01.val[0], reg_32x4x2_sum_r01.val[1]); + reg_32x4_sum_r23 = + vaddq_s32(reg_32x4x2_sum_r23.val[0], reg_32x4x2_sum_r23.val[1]); + reg_32x4_sum_r45 = + vaddq_s32(reg_32x4x2_sum_r45.val[0], reg_32x4x2_sum_r45.val[1]); + reg_32x4_sum_r67 = + vaddq_s32(reg_32x4x2_sum_r67.val[0], reg_32x4x2_sum_r67.val[1]); + + reg_32x4x2_sum_r01 = vuzpq_s32(reg_32x4_sum_r01, reg_32x4_sum_r23); + reg_32x4x2_sum_r45 = vuzpq_s32(reg_32x4_sum_r45, reg_32x4_sum_r67); + reg_32x4_sum_r01 = + vaddq_s32(reg_32x4x2_sum_r01.val[0], reg_32x4x2_sum_r01.val[1]); + reg_32x4_sum_r45 = + vaddq_s32(reg_32x4x2_sum_r45.val[0], reg_32x4x2_sum_r45.val[1]); + + reg_16x4_sum_r01_23 = vqrshrun_n_s32(reg_32x4_sum_r01, 7); + reg_16x4_sum_r45_67 = vqrshrun_n_s32(reg_32x4_sum_r45, 7); + + reg_16x8_sum_r0_r7 = vcombine_u16(reg_16x4_sum_r01_23, reg_16x4_sum_r45_67); + reg_8x8_sum_r0_r7 = vqmovn_u16(reg_16x8_sum_r0_r7); + + vst1_u8(pu1_out_pixel, reg_8x8_sum_r0_r7); + + pu1_out_pixel += 8; + pu1_in_pixel += + (u4_src_vert_increments * (u4_in_stride << 3)) >> DOWNSCALER_Q; + } + /* Update the context for next Loop Count */ + u4_center_pixel_pos += u4_src_horz_increments; + } + } + + /* 1 <= remaining height < 8 */ + if(u4_rem_vert_loop) + { + u4_height_finished = + ((u4_num_iterations_vertical_by_16 << 4) + (u4_rem_vert_loop_by_8 << 3)); + pu1_src_j = pu1_src + u4_height_finished * u4_in_stride; + pu1_dst_j = pu1_dst + u4_height_finished; + + u4_center_pixel_pos = u4_center_pixel_pos_src; + + for(i = 0; i < (WORD32) u4_blk_wd; i++) + { + u1_phase = get_filter_phase(u4_center_pixel_pos); + pi1_filter_grid = pai1_filters[u1_phase]; + + u2_full_pixel_inc = u4_center_pixel_pos >> DOWNSCALER_Q; + + pu1_in_pixel = pu1_src_j + (u2_full_pixel_inc << u1_is_chroma); + + pu1_out_pixel = pu1_dst_j + ((i << u1_is_chroma) * u4_out_stride); + + reg_8x8_filt_coeff_grid = vld1_s8(pi1_filter_grid); + + for(j = u4_rem_vert_loop; j > 0; j--) + { + /******************************************************/ + /* This loop is going vertically in bottom direction */ + /* but the output pixels are stored in horizontal */ + /* direction in transpose manner */ + /******************************************************/ + + reg_8x8_src_r0 = vld1_u8(pu1_in_pixel); + reg_16x8_src_r0 = vmovl_u8(reg_8x8_src_r0); + + reg_16x8_filt_coeff_grid = vmovl_s8(reg_8x8_filt_coeff_grid); + + reg_16x8_mul_r0 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r0), + reg_16x8_filt_coeff_grid); + + reg_32x4_sum_r0 = vpaddlq_s16(reg_16x8_mul_r0); + + reg_32x4x2_sum_r01 = vuzpq_s32(reg_32x4_sum_r0, reg_32x4_zero); + reg_32x4_sum_r01 = + vaddq_s32(reg_32x4x2_sum_r01.val[0], reg_32x4x2_sum_r01.val[1]); + reg_32x4x2_sum_r01 = vuzpq_s32(reg_32x4_sum_r01, reg_32x4_zero); + reg_32x4_sum_r01 = + vaddq_s32(reg_32x4x2_sum_r01.val[0], reg_32x4x2_sum_r01.val[1]); + + reg_16x4_sum_r01_23 = vqrshrun_n_s32(reg_32x4_sum_r01, 7); + + vst1_lane_u8(pu1_out_pixel, vreinterpret_u8_u16(reg_16x4_sum_r01_23), 0); + pu1_out_pixel += 1; + pu1_in_pixel += (u4_src_vert_increments * u4_in_stride) >> DOWNSCALER_Q; + } + /* Update the context for next Loop Count */ + u4_center_pixel_pos += u4_src_horz_increments; + } + } + } + } + /* for chroma */ + else + { + u4_num_iterations_vertical_by_8 = u4_blk_ht >> 3; + u4_rem_vert_loop = u4_blk_ht % 8; + + for(j = 0; j < (WORD32) u4_num_iterations_vertical_by_8; j++) + { + pu1_src_j = pu1_src + ((j << 3) * u4_in_stride); + pu1_dst_j = pu1_dst + (j << 3); + + u4_center_pixel_pos = u4_center_pixel_pos_src; + + for(i = 0; i < (WORD32) u4_blk_wd; i++) + { + u1_phase = get_filter_phase(u4_center_pixel_pos); + pi1_filter_grid = pai1_filters[u1_phase]; + + /*Doing the Calculation for current Loop Count */ + u2_full_pixel_inc = u4_center_pixel_pos >> DOWNSCALER_Q; + + pu1_in_pixel = pu1_src_j + (u2_full_pixel_inc << u1_is_chroma); + + pu1_out_pixel = pu1_dst_j + ((i << u1_is_chroma) * u4_out_stride); + + reg_8x8_filt_coeff_grid = vld1_s8(pi1_filter_grid); + + /******************************************************/ + /* This loop is going vertically in bottom direction */ + /* but the output pixels are stored in horizontal */ + /* direction in transpose manner */ + /******************************************************/ + + reg_8x16_src_r0 = vld1q_u8(pu1_in_pixel); + reg_8x16_src_r1 = vld1q_u8(pu1_in_pixel + u4_in_stride); + reg_8x16_src_r2 = vld1q_u8(pu1_in_pixel + 2 * u4_in_stride); + reg_8x16_src_r3 = vld1q_u8(pu1_in_pixel + 3 * u4_in_stride); + reg_8x16_src_r4 = vld1q_u8(pu1_in_pixel + 4 * u4_in_stride); + reg_8x16_src_r5 = vld1q_u8(pu1_in_pixel + 5 * u4_in_stride); + reg_8x16_src_r6 = vld1q_u8(pu1_in_pixel + 6 * u4_in_stride); + reg_8x16_src_r7 = vld1q_u8(pu1_in_pixel + 7 * u4_in_stride); + + reg_8x16x2_src_r0 = vuzpq_u8(reg_8x16_src_r0, reg_8x16_src_r1); + reg_8x16x2_src_r1 = vuzpq_u8(reg_8x16_src_r2, reg_8x16_src_r3); + reg_8x16x2_src_r2 = vuzpq_u8(reg_8x16_src_r4, reg_8x16_src_r5); + reg_8x16x2_src_r3 = vuzpq_u8(reg_8x16_src_r6, reg_8x16_src_r7); + + reg_16x8_src_cb_r0 = vmovl_u8(vget_low_u8(reg_8x16x2_src_r0.val[0])); + reg_16x8_src_cb_r1 = vmovl_u8(vget_high_u8(reg_8x16x2_src_r0.val[0])); + reg_16x8_src_cb_r2 = vmovl_u8(vget_low_u8(reg_8x16x2_src_r1.val[0])); + reg_16x8_src_cb_r3 = vmovl_u8(vget_high_u8(reg_8x16x2_src_r1.val[0])); + reg_16x8_src_cb_r4 = vmovl_u8(vget_low_u8(reg_8x16x2_src_r2.val[0])); + reg_16x8_src_cb_r5 = vmovl_u8(vget_high_u8(reg_8x16x2_src_r2.val[0])); + reg_16x8_src_cb_r6 = vmovl_u8(vget_low_u8(reg_8x16x2_src_r3.val[0])); + reg_16x8_src_cb_r7 = vmovl_u8(vget_high_u8(reg_8x16x2_src_r3.val[0])); + + reg_16x8_src_cr_r0 = vmovl_u8(vget_low_u8(reg_8x16x2_src_r0.val[1])); + reg_16x8_src_cr_r1 = vmovl_u8(vget_high_u8(reg_8x16x2_src_r0.val[1])); + reg_16x8_src_cr_r2 = vmovl_u8(vget_low_u8(reg_8x16x2_src_r1.val[1])); + reg_16x8_src_cr_r3 = vmovl_u8(vget_high_u8(reg_8x16x2_src_r1.val[1])); + reg_16x8_src_cr_r4 = vmovl_u8(vget_low_u8(reg_8x16x2_src_r2.val[1])); + reg_16x8_src_cr_r5 = vmovl_u8(vget_high_u8(reg_8x16x2_src_r2.val[1])); + reg_16x8_src_cr_r6 = vmovl_u8(vget_low_u8(reg_8x16x2_src_r3.val[1])); + reg_16x8_src_cr_r7 = vmovl_u8(vget_high_u8(reg_8x16x2_src_r3.val[1])); + + reg_16x8_filt_coeff_grid = vmovl_s8(reg_8x8_filt_coeff_grid); + + reg_16x8_mul_cb_r0 = + vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cb_r0), reg_16x8_filt_coeff_grid); + reg_16x8_mul_cb_r1 = + vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cb_r1), reg_16x8_filt_coeff_grid); + reg_16x8_mul_cb_r2 = + vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cb_r2), reg_16x8_filt_coeff_grid); + reg_16x8_mul_cb_r3 = + vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cb_r3), reg_16x8_filt_coeff_grid); + reg_16x8_mul_cb_r4 = + vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cb_r4), reg_16x8_filt_coeff_grid); + reg_16x8_mul_cb_r5 = + vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cb_r5), reg_16x8_filt_coeff_grid); + reg_16x8_mul_cb_r6 = + vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cb_r6), reg_16x8_filt_coeff_grid); + reg_16x8_mul_cb_r7 = + vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cb_r7), reg_16x8_filt_coeff_grid); + + reg_16x8_mul_cr_r0 = + vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cr_r0), reg_16x8_filt_coeff_grid); + reg_16x8_mul_cr_r1 = + vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cr_r1), reg_16x8_filt_coeff_grid); + reg_16x8_mul_cr_r2 = + vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cr_r2), reg_16x8_filt_coeff_grid); + reg_16x8_mul_cr_r3 = + vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cr_r3), reg_16x8_filt_coeff_grid); + reg_16x8_mul_cr_r4 = + vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cr_r4), reg_16x8_filt_coeff_grid); + reg_16x8_mul_cr_r5 = + vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cr_r5), reg_16x8_filt_coeff_grid); + reg_16x8_mul_cr_r6 = + vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cr_r6), reg_16x8_filt_coeff_grid); + reg_16x8_mul_cr_r7 = + vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cr_r7), reg_16x8_filt_coeff_grid); + + reg_32x4_sum_cb_r0 = vpaddlq_s16(reg_16x8_mul_cb_r0); + reg_32x4_sum_cb_r1 = vpaddlq_s16(reg_16x8_mul_cb_r1); + reg_32x4_sum_cb_r2 = vpaddlq_s16(reg_16x8_mul_cb_r2); + reg_32x4_sum_cb_r3 = vpaddlq_s16(reg_16x8_mul_cb_r3); + reg_32x4_sum_cb_r4 = vpaddlq_s16(reg_16x8_mul_cb_r4); + reg_32x4_sum_cb_r5 = vpaddlq_s16(reg_16x8_mul_cb_r5); + reg_32x4_sum_cb_r6 = vpaddlq_s16(reg_16x8_mul_cb_r6); + reg_32x4_sum_cb_r7 = vpaddlq_s16(reg_16x8_mul_cb_r7); + + reg_32x4_sum_cr_r0 = vpaddlq_s16(reg_16x8_mul_cr_r0); + reg_32x4_sum_cr_r1 = vpaddlq_s16(reg_16x8_mul_cr_r1); + reg_32x4_sum_cr_r2 = vpaddlq_s16(reg_16x8_mul_cr_r2); + reg_32x4_sum_cr_r3 = vpaddlq_s16(reg_16x8_mul_cr_r3); + reg_32x4_sum_cr_r4 = vpaddlq_s16(reg_16x8_mul_cr_r4); + reg_32x4_sum_cr_r5 = vpaddlq_s16(reg_16x8_mul_cr_r5); + reg_32x4_sum_cr_r6 = vpaddlq_s16(reg_16x8_mul_cr_r6); + reg_32x4_sum_cr_r7 = vpaddlq_s16(reg_16x8_mul_cr_r7); + + reg_32x4x2_sum_r01 = vuzpq_s32(reg_32x4_sum_cb_r0, reg_32x4_sum_cb_r1); + reg_32x4x2_sum_r23 = vuzpq_s32(reg_32x4_sum_cb_r2, reg_32x4_sum_cb_r3); + reg_32x4x2_sum_r45 = vuzpq_s32(reg_32x4_sum_cb_r4, reg_32x4_sum_cb_r5); + reg_32x4x2_sum_r67 = vuzpq_s32(reg_32x4_sum_cb_r6, reg_32x4_sum_cb_r7); + + reg_32x4_sum_cb_r01 = + vaddq_s32(reg_32x4x2_sum_r01.val[0], reg_32x4x2_sum_r01.val[1]); + reg_32x4_sum_cb_r23 = + vaddq_s32(reg_32x4x2_sum_r23.val[0], reg_32x4x2_sum_r23.val[1]); + reg_32x4_sum_cb_r45 = + vaddq_s32(reg_32x4x2_sum_r45.val[0], reg_32x4x2_sum_r45.val[1]); + reg_32x4_sum_cb_r67 = + vaddq_s32(reg_32x4x2_sum_r67.val[0], reg_32x4x2_sum_r67.val[1]); + + reg_32x4x2_sum_r01 = vuzpq_s32(reg_32x4_sum_cb_r01, reg_32x4_sum_cb_r23); + reg_32x4x2_sum_r45 = vuzpq_s32(reg_32x4_sum_cb_r45, reg_32x4_sum_cb_r67); + reg_32x4_sum_cb_r01 = + vaddq_s32(reg_32x4x2_sum_r01.val[0], reg_32x4x2_sum_r01.val[1]); + reg_32x4_sum_cb_r45 = + vaddq_s32(reg_32x4x2_sum_r45.val[0], reg_32x4x2_sum_r45.val[1]); + + reg_32x4x2_sum_r01 = vuzpq_s32(reg_32x4_sum_cr_r0, reg_32x4_sum_cr_r1); + reg_32x4x2_sum_r23 = vuzpq_s32(reg_32x4_sum_cr_r2, reg_32x4_sum_cr_r3); + reg_32x4x2_sum_r45 = vuzpq_s32(reg_32x4_sum_cr_r4, reg_32x4_sum_cr_r5); + reg_32x4x2_sum_r67 = vuzpq_s32(reg_32x4_sum_cr_r6, reg_32x4_sum_cr_r7); + + reg_32x4_sum_cr_r01 = + vaddq_s32(reg_32x4x2_sum_r01.val[0], reg_32x4x2_sum_r01.val[1]); + reg_32x4_sum_cr_r23 = + vaddq_s32(reg_32x4x2_sum_r23.val[0], reg_32x4x2_sum_r23.val[1]); + reg_32x4_sum_cr_r45 = + vaddq_s32(reg_32x4x2_sum_r45.val[0], reg_32x4x2_sum_r45.val[1]); + reg_32x4_sum_cr_r67 = + vaddq_s32(reg_32x4x2_sum_r67.val[0], reg_32x4x2_sum_r67.val[1]); + + reg_32x4x2_sum_r01 = vuzpq_s32(reg_32x4_sum_cr_r01, reg_32x4_sum_cr_r23); + reg_32x4x2_sum_r45 = vuzpq_s32(reg_32x4_sum_cr_r45, reg_32x4_sum_cr_r67); + reg_32x4_sum_cr_r01 = + vaddq_s32(reg_32x4x2_sum_r01.val[0], reg_32x4x2_sum_r01.val[1]); + reg_32x4_sum_cr_r45 = + vaddq_s32(reg_32x4x2_sum_r45.val[0], reg_32x4x2_sum_r45.val[1]); + + reg_16x4_sum_cb_r01_23 = vqrshrun_n_s32(reg_32x4_sum_cb_r01, 7); + reg_16x4_sum_cb_r45_67 = vqrshrun_n_s32(reg_32x4_sum_cb_r45, 7); + + reg_16x4_sum_cr_r01_23 = vqrshrun_n_s32(reg_32x4_sum_cr_r01, 7); + reg_16x4_sum_cr_r45_67 = vqrshrun_n_s32(reg_32x4_sum_cr_r45, 7); + + reg_16x8_sum_cb_r0_r7 = + vcombine_u16(reg_16x4_sum_cb_r01_23, reg_16x4_sum_cb_r45_67); + reg_16x8_sum_cr_r0_r7 = + vcombine_u16(reg_16x4_sum_cr_r01_23, reg_16x4_sum_cr_r45_67); + + reg_8x8_sum_cb_r0_r7 = vqmovn_u16(reg_16x8_sum_cb_r0_r7); + reg_8x8_sum_cr_r0_r7 = vqmovn_u16(reg_16x8_sum_cr_r0_r7); + + vst1_u8(pu1_out_pixel, reg_8x8_sum_cb_r0_r7); + vst1_u8(pu1_out_pixel + u4_out_stride, reg_8x8_sum_cr_r0_r7); + + pu1_out_pixel += 8; + + pu1_in_pixel += (u4_src_vert_increments * (u4_in_stride << 3)) >> DOWNSCALER_Q; + + /* Update the context for next Loop Count */ + u4_center_pixel_pos += u4_src_horz_increments; + } + } + + /* Loop for the remaining height less than 8 */ + if(u4_rem_vert_loop) + { + u4_rem_vert_loop_by_4 = u4_rem_vert_loop >> 2; + u4_rem_vert_loop = u4_rem_vert_loop % 4; + u4_height_finished = (u4_num_iterations_vertical_by_8 << 3); + pu1_src_j = pu1_src + ((u4_height_finished) *u4_in_stride); + pu1_dst_j = pu1_dst + u4_height_finished; + + u4_center_pixel_pos = u4_center_pixel_pos_src; + + /* 4<= remaining height < 8 */ + if(u4_rem_vert_loop_by_4) + { + for(i = 0; i < (WORD32) u4_blk_wd; i++) + { + u1_phase = get_filter_phase(u4_center_pixel_pos); + pi1_filter_grid = pai1_filters[u1_phase]; + + u2_full_pixel_inc = u4_center_pixel_pos >> DOWNSCALER_Q; + + pu1_in_pixel = pu1_src_j + (u2_full_pixel_inc << u1_is_chroma); + + pu1_out_pixel = pu1_dst_j + ((i << u1_is_chroma) * u4_out_stride); + + reg_8x8_filt_coeff_grid = vld1_s8(pi1_filter_grid); + + for(j = u4_rem_vert_loop_by_4; j > 0; j--) + { + /******************************************************/ + /* This loop is going vertically in bottom direction */ + /* but the output pixels are stored in horizontal */ + /* direction in transpose manner */ + /******************************************************/ + + reg_8x16_src_r0 = vld1q_u8(pu1_in_pixel); + reg_8x16_src_r1 = vld1q_u8(pu1_in_pixel + u4_in_stride); + reg_8x16_src_r2 = vld1q_u8(pu1_in_pixel + 2 * u4_in_stride); + reg_8x16_src_r3 = vld1q_u8(pu1_in_pixel + 3 * u4_in_stride); + + reg_8x16x2_src_r0 = vuzpq_u8(reg_8x16_src_r0, reg_8x16_src_r1); + reg_8x16x2_src_r1 = vuzpq_u8(reg_8x16_src_r2, reg_8x16_src_r3); + + reg_16x8_src_cb_r0 = vmovl_u8(vget_low_u8(reg_8x16x2_src_r0.val[0])); + reg_16x8_src_cb_r1 = vmovl_u8(vget_high_u8(reg_8x16x2_src_r0.val[0])); + reg_16x8_src_cb_r2 = vmovl_u8(vget_low_u8(reg_8x16x2_src_r1.val[0])); + reg_16x8_src_cb_r3 = vmovl_u8(vget_high_u8(reg_8x16x2_src_r1.val[0])); + + reg_16x8_src_cr_r0 = vmovl_u8(vget_low_u8(reg_8x16x2_src_r0.val[1])); + reg_16x8_src_cr_r1 = vmovl_u8(vget_high_u8(reg_8x16x2_src_r0.val[1])); + reg_16x8_src_cr_r2 = vmovl_u8(vget_low_u8(reg_8x16x2_src_r1.val[1])); + reg_16x8_src_cr_r3 = vmovl_u8(vget_high_u8(reg_8x16x2_src_r1.val[1])); + + reg_16x8_filt_coeff_grid = vmovl_s8(reg_8x8_filt_coeff_grid); + + reg_16x8_mul_cb_r0 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cb_r0), + reg_16x8_filt_coeff_grid); + reg_16x8_mul_cb_r1 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cb_r1), + reg_16x8_filt_coeff_grid); + reg_16x8_mul_cb_r2 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cb_r2), + reg_16x8_filt_coeff_grid); + reg_16x8_mul_cb_r3 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cb_r3), + reg_16x8_filt_coeff_grid); + + reg_16x8_mul_cr_r0 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cr_r0), + reg_16x8_filt_coeff_grid); + reg_16x8_mul_cr_r1 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cr_r1), + reg_16x8_filt_coeff_grid); + reg_16x8_mul_cr_r2 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cr_r2), + reg_16x8_filt_coeff_grid); + reg_16x8_mul_cr_r3 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cr_r3), + reg_16x8_filt_coeff_grid); + + reg_32x4_sum_cb_r0 = vpaddlq_s16(reg_16x8_mul_cb_r0); + reg_32x4_sum_cb_r1 = vpaddlq_s16(reg_16x8_mul_cb_r1); + reg_32x4_sum_cb_r2 = vpaddlq_s16(reg_16x8_mul_cb_r2); + reg_32x4_sum_cb_r3 = vpaddlq_s16(reg_16x8_mul_cb_r3); + + reg_32x4_sum_cr_r0 = vpaddlq_s16(reg_16x8_mul_cr_r0); + reg_32x4_sum_cr_r1 = vpaddlq_s16(reg_16x8_mul_cr_r1); + reg_32x4_sum_cr_r2 = vpaddlq_s16(reg_16x8_mul_cr_r2); + reg_32x4_sum_cr_r3 = vpaddlq_s16(reg_16x8_mul_cr_r3); + + reg_32x4x2_sum_r01 = vuzpq_s32(reg_32x4_sum_cb_r0, reg_32x4_sum_cb_r1); + reg_32x4x2_sum_r23 = vuzpq_s32(reg_32x4_sum_cb_r2, reg_32x4_sum_cb_r3); + reg_32x4_sum_cb_r01 = + vaddq_s32(reg_32x4x2_sum_r01.val[0], reg_32x4x2_sum_r01.val[1]); + reg_32x4_sum_cb_r23 = + vaddq_s32(reg_32x4x2_sum_r23.val[0], reg_32x4x2_sum_r23.val[1]); + reg_32x4x2_sum_r01 = vuzpq_s32(reg_32x4_sum_cb_r01, reg_32x4_sum_cb_r23); + reg_32x4_sum_cb_r01 = + vaddq_s32(reg_32x4x2_sum_r01.val[0], reg_32x4x2_sum_r01.val[1]); + + reg_32x4x2_sum_r01 = vuzpq_s32(reg_32x4_sum_cr_r0, reg_32x4_sum_cr_r1); + reg_32x4x2_sum_r23 = vuzpq_s32(reg_32x4_sum_cr_r2, reg_32x4_sum_cr_r3); + reg_32x4_sum_cr_r01 = + vaddq_s32(reg_32x4x2_sum_r01.val[0], reg_32x4x2_sum_r01.val[1]); + reg_32x4_sum_cr_r23 = + vaddq_s32(reg_32x4x2_sum_r23.val[0], reg_32x4x2_sum_r23.val[1]); + reg_32x4x2_sum_r01 = vuzpq_s32(reg_32x4_sum_cr_r01, reg_32x4_sum_cr_r23); + reg_32x4_sum_cr_r01 = + vaddq_s32(reg_32x4x2_sum_r01.val[0], reg_32x4x2_sum_r01.val[1]); + + reg_16x4_sum_cb_r01_23 = vqrshrun_n_s32(reg_32x4_sum_cb_r01, 7); + reg_16x4_sum_cr_r01_23 = vqrshrun_n_s32(reg_32x4_sum_cr_r01, 7); + + reg_16x8_sum_cb_cr_r0_r3 = + vcombine_u16(reg_16x4_sum_cb_r01_23, reg_16x4_sum_cr_r01_23); + reg_8x8_sum_cb_cr_r0_r3 = vmovn_u16(reg_16x8_sum_cb_cr_r0_r3); + vst1_lane_u32((uint32_t *) (pu1_out_pixel), + vreinterpret_u32_u8(reg_8x8_sum_cb_cr_r0_r3), 0); + vst1_lane_u32((uint32_t *) (pu1_out_pixel + u4_out_stride), + vreinterpret_u32_u8(reg_8x8_sum_cb_cr_r0_r3), 1); + + pu1_out_pixel += 4; + + pu1_in_pixel += + (u4_src_vert_increments * (u4_in_stride << 2)) >> DOWNSCALER_Q; + } + /* Update the context for next Loop Count */ + u4_center_pixel_pos += u4_src_horz_increments; + } + } + + /* 1<= remaining height < 4 */ + if(u4_rem_vert_loop) + { + u4_height_finished = + ((u4_num_iterations_vertical_by_8 << 3) + (u4_rem_vert_loop_by_4 << 2)); + pu1_src_j = pu1_src + u4_height_finished * u4_in_stride; + pu1_dst_j = pu1_dst + u4_height_finished; + + u4_center_pixel_pos = u4_center_pixel_pos_src; + for(i = 0; i < (WORD32) u4_blk_wd; i++) + { + u1_phase = get_filter_phase(u4_center_pixel_pos); + pi1_filter_grid = pai1_filters[u1_phase]; + + u2_full_pixel_inc = u4_center_pixel_pos >> DOWNSCALER_Q; + + pu1_in_pixel = pu1_src_j + (u2_full_pixel_inc << u1_is_chroma); + + pu1_out_pixel = pu1_dst_j + ((i << u1_is_chroma) * u4_out_stride); + + reg_8x8_filt_coeff_grid = vld1_s8(pi1_filter_grid); + + for(j = u4_rem_vert_loop; j > 0; j--) + { + /******************************************************/ + /* This loop is going vertically in bottom direction */ + /* but the output pixels are stored in horizontal */ + /* direction in transpose manner */ + /******************************************************/ + + reg_8x16_src_r0 = vld1q_u8(pu1_in_pixel); + + reg_8x16x2_src_r0 = vuzpq_u8(reg_8x16_src_r0, reg_8x16_src_r0); + reg_16x8_src_cb_r0 = vmovl_u8(vget_low_u8(reg_8x16x2_src_r0.val[0])); + reg_16x8_src_cr_r0 = vmovl_u8(vget_low_u8(reg_8x16x2_src_r0.val[1])); + + reg_16x8_filt_coeff_grid = vmovl_s8(reg_8x8_filt_coeff_grid); + + reg_16x8_mul_cb_r0 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cb_r0), + reg_16x8_filt_coeff_grid); + reg_16x8_mul_cr_r0 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cr_r0), + reg_16x8_filt_coeff_grid); + + reg_32x4_sum_cb_r0 = vpaddlq_s16(reg_16x8_mul_cb_r0); + reg_32x4_sum_cr_r0 = vpaddlq_s16(reg_16x8_mul_cr_r0); + + reg_32x4x2_sum_r01 = vuzpq_s32(reg_32x4_sum_cb_r0, reg_32x4_sum_cr_r0); + reg_32x4_sum_cb_cr_r0 = + vaddq_s32(reg_32x4x2_sum_r01.val[0], reg_32x4x2_sum_r01.val[1]); + + reg_32x4x2_sum_r01 = vuzpq_s32(reg_32x4_sum_cb_cr_r0, reg_32x4_zero); + reg_32x4_sum_cb_cr_r0 = + vaddq_s32(reg_32x4x2_sum_r01.val[0], reg_32x4x2_sum_r01.val[1]); + + reg_16x4_sum_cb_cr_r0 = vqrshrun_n_s32(reg_32x4_sum_cb_cr_r0, 7); + vst1_lane_u8((pu1_out_pixel), vreinterpret_u8_u16(reg_16x4_sum_cb_cr_r0), + 0); + vst1_lane_u8((pu1_out_pixel + u4_out_stride), + vreinterpret_u8_u16(reg_16x4_sum_cb_cr_r0), 2); + + pu1_out_pixel += 1; + + pu1_in_pixel += (u4_src_vert_increments * (u4_in_stride)) >> DOWNSCALER_Q; + } + + /* Update the context for next Loop Count */ + u4_center_pixel_pos += u4_src_horz_increments; + } + } + } + } +} diff --git a/encoder/arm/svc/isvce_function_selector.c b/encoder/arm/svc/isvce_function_selector.c new file mode 100644 index 0000000..f1dc6f3 --- /dev/null +++ b/encoder/arm/svc/isvce_function_selector.c @@ -0,0 +1,157 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** +******************************************************************************* +* @file +* isvce_function_selector.c +* +* @brief +* Contains functions to initialize function pointers used in h264 +* +* @author +* Ittiam +* +* @par List of Functions: +* +* @remarks +* None +* +******************************************************************************* +*/ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System Include Files */ +#include +#include +#include +#include + +/* User Include Files */ +#include "ih264_typedefs.h" +#include "iv2.h" +#include "ive2.h" +#include "isvc_defs.h" +#include "ih264_size_defs.h" +#include "isvce_defs.h" +#include "ih264e_error.h" +#include "ih264e_bitstream.h" +#include "ime_distortion_metrics.h" +#include "ime_defs.h" +#include "ime_structs.h" +#include "ih264_error.h" +#include "isvc_structs.h" +#include "isvc_trans_quant_itrans_iquant.h" +#include "isvc_inter_pred_filters.h" +#include "isvc_mem_fns.h" +#include "ih264_padding.h" +#include "ih264_intra_pred_filters.h" +#include "ih264_deblk_edge_filters.h" +#include "isvc_cabac_tables.h" +#include "isvc_macros.h" +#include "ih264_platform_macros.h" +#include "irc_cntrl_param.h" +#include "irc_frame_info_collector.h" +#include "isvce_rate_control.h" +#include "isvce_cabac_structs.h" +#include "isvce_structs.h" +#include "isvce_cabac.h" +#include "ih264e_platform_macros.h" +#include "isvce_platform_macros.h" + +/** +******************************************************************************* +* +* @brief Initialize the intra/inter/transform/deblk function pointers of +* codec context +* +* @par Description: the current routine initializes the function pointers of +* codec context basing on the architecture in use +* +* @param[in] ps_codec +* Codec context pointer +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +void isvce_init_function_ptr(void *pv_codec) +{ + isvce_codec_t *ps_codec = (isvce_codec_t *) pv_codec; + isvce_init_function_ptr_generic(ps_codec); + switch(ps_codec->s_cfg.e_arch) + { +#if defined(ARMV8) + case ARCH_ARM_A53: + case ARCH_ARM_A57: + case ARCH_ARM_V8_NEON: + default: + isvce_init_function_ptr_neon_av8(ps_codec); + break; +#elif !defined(DISABLE_NEON) + case ARCH_ARM_A9Q: + case ARCH_ARM_A9A: + case ARCH_ARM_A9: + case ARCH_ARM_A7: + case ARCH_ARM_A5: + case ARCH_ARM_A15: + default: + isvce_init_function_ptr_neon_a9q(ps_codec); + break; +#else + default: +#endif + case ARCH_X86_GENERIC: + break; + } +} + +/** +******************************************************************************* +* +* @brief Determine the architecture of the encoder executing environment +* +* @par Description: This routine returns the architecture of the enviro- +* ment in which the current encoder is being tested +* +* @param[in] void +* +* @returns IV_ARCH_T +* architecture +* +* @remarks none +* +******************************************************************************* +*/ +IV_ARCH_T isvce_default_arch(void) +{ +#if defined(ARMV8) + return ARCH_ARM_V8_NEON; +#elif !defined(DISABLE_NEON) + return ARCH_ARM_A9Q; +#else + return ARCH_GENERIC; +#endif +} diff --git a/encoder/arm/svc/isvce_function_selector_a9q.c b/encoder/arm/svc/isvce_function_selector_a9q.c new file mode 100644 index 0000000..b5f8ba4 --- /dev/null +++ b/encoder/arm/svc/isvce_function_selector_a9q.c @@ -0,0 +1,270 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ +/** +******************************************************************************* +* @file +* isvce_function_selector_a9q.c +* +* @brief +* Contains functions to initialize function pointers of codec context +* +* @author +* Ittiam +* +* @par List of Functions: +* - isvce_init_function_ptr_generic +* +* @remarks +* None +* +******************************************************************************* +*/ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System Include files */ +#include +#include +#include +#include + +/* User Include files */ +#include "ih264_typedefs.h" +#include "iv2.h" +#include "ive2.h" +#include "isvc_defs.h" +#include "ih264_size_defs.h" +#include "isvce_defs.h" +#include "ih264e_error.h" +#include "ih264e_bitstream.h" +#include "ime_distortion_metrics.h" +#include "ime_defs.h" +#include "ime_structs.h" +#include "ih264_error.h" +#include "isvc_structs.h" +#include "isvc_trans_quant_itrans_iquant.h" +#include "ih264_inter_pred_filters.h" +#include "ih264_mem_fns.h" +#include "isvc_mem_fns.h" +#include "ih264_padding.h" +#include "ih264_intra_pred_filters.h" +#include "ih264_deblk_edge_filters.h" +#include "isvc_cabac_tables.h" +#include "irc_cntrl_param.h" +#include "irc_frame_info_collector.h" +#include "isvce_rate_control.h" +#include "isvce_cabac_structs.h" +#include "isvce_structs.h" +#include "ih264e_platform_macros.h" +#include "isvce_cabac.h" +#include "isvce_core_coding.h" +#include "ih264_cavlc_tables.h" +#include "isvce_cavlc.h" +#include "ih264e_intra_modes_eval.h" +#include "ih264e_fmt_conv.h" +#include "ih264e_half_pel.h" + +/** +******************************************************************************* +* +* @brief Initialize the intra/inter/transform/deblk function pointers of +* codec context +* +* @par Description: the current routine initializes the function pointers of +* codec context basing on the architecture in use +* +* @param[in] ps_codec +* Codec context pointer +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +void isvce_init_function_ptr_neon_a9q(isvce_codec_t *ps_codec) +{ + WORD32 i = 0; + + /* curr proc ctxt */ + isvce_process_ctxt_t *ps_proc = NULL; + isvce_me_ctxt_t *ps_me_ctxt = NULL; + isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns; + enc_loop_fxns_t *ps_enc_loop_fxns = &ps_isa_dependent_fxns->s_enc_loop_fxns; + inter_pred_fxns_t *ps_inter_pred_fxns = &ps_isa_dependent_fxns->s_inter_pred_fxns; + mem_fxns_t *ps_mem_fxns = &ps_isa_dependent_fxns->s_mem_fxns; + + /* Init function pointers for intra pred leaf level functions luma + * Intra 16x16 */ + ps_codec->apf_intra_pred_16_l[0] = ih264_intra_pred_luma_16x16_mode_vert_a9q; + ps_codec->apf_intra_pred_16_l[1] = ih264_intra_pred_luma_16x16_mode_horz_a9q; + ps_codec->apf_intra_pred_16_l[2] = ih264_intra_pred_luma_16x16_mode_dc_a9q; + ps_codec->apf_intra_pred_16_l[3] = ih264_intra_pred_luma_16x16_mode_plane_a9q; + + /* Init function pointers for intra pred leaf level functions luma + * Intra 4x4 */ + ps_codec->apf_intra_pred_4_l[0] = ih264_intra_pred_luma_4x4_mode_vert_a9q; + ps_codec->apf_intra_pred_4_l[1] = ih264_intra_pred_luma_4x4_mode_horz_a9q; + ps_codec->apf_intra_pred_4_l[2] = ih264_intra_pred_luma_4x4_mode_dc_a9q; + ps_codec->apf_intra_pred_4_l[3] = ih264_intra_pred_luma_4x4_mode_diag_dl_a9q; + ps_codec->apf_intra_pred_4_l[4] = ih264_intra_pred_luma_4x4_mode_diag_dr_a9q; + ps_codec->apf_intra_pred_4_l[5] = ih264_intra_pred_luma_4x4_mode_vert_r_a9q; + ps_codec->apf_intra_pred_4_l[6] = ih264_intra_pred_luma_4x4_mode_horz_d_a9q; + ps_codec->apf_intra_pred_4_l[7] = ih264_intra_pred_luma_4x4_mode_vert_l_a9q; + ps_codec->apf_intra_pred_4_l[8] = ih264_intra_pred_luma_4x4_mode_horz_u_a9q; + + /* Init function pointers for intra pred leaf level functions luma + * Intra 8x8 */ + ps_codec->apf_intra_pred_8_l[0] = ih264_intra_pred_luma_8x8_mode_vert_a9q; + ps_codec->apf_intra_pred_8_l[2] = ih264_intra_pred_luma_8x8_mode_dc_a9q; + ps_codec->apf_intra_pred_8_l[3] = ih264_intra_pred_luma_8x8_mode_diag_dl_a9q; + ps_codec->apf_intra_pred_8_l[4] = ih264_intra_pred_luma_8x8_mode_diag_dr_a9q; + ps_codec->apf_intra_pred_8_l[5] = ih264_intra_pred_luma_8x8_mode_vert_r_a9q; + ps_codec->apf_intra_pred_8_l[6] = ih264_intra_pred_luma_8x8_mode_horz_d_a9q; + ps_codec->apf_intra_pred_8_l[7] = ih264_intra_pred_luma_8x8_mode_vert_l_a9q; + ps_codec->apf_intra_pred_8_l[8] = ih264_intra_pred_luma_8x8_mode_horz_u_a9q; + + /* Init function pointers for intra pred leaf level functions chroma + * Intra 8x8 */ + ps_codec->apf_intra_pred_c[0] = ih264_intra_pred_chroma_8x8_mode_dc_a9q; + ps_codec->apf_intra_pred_c[1] = ih264_intra_pred_chroma_8x8_mode_horz_a9q; + ps_codec->apf_intra_pred_c[2] = ih264_intra_pred_chroma_8x8_mode_vert_a9q; + ps_codec->apf_intra_pred_c[3] = ih264_intra_pred_chroma_8x8_mode_plane_a9q; + + /* Init forward transform fn ptr */ + ps_enc_loop_fxns->apf_resi_trans_quant_8x8[0] = isvc_resi_trans_quant_8x8; + ps_enc_loop_fxns->apf_resi_trans_quant_8x8[1] = isvc_resi_trans_quant_8x8; + ps_enc_loop_fxns->apf_resi_trans_quant_4x4[0] = isvc_resi_trans_quant_4x4_neon; + ps_enc_loop_fxns->apf_resi_trans_quant_4x4[1] = + isvc_resi_trans_quant_4x4_with_residual_sub_neon; + ps_enc_loop_fxns->apf_resi_trans_quant_chroma_4x4[0] = isvc_resi_trans_quant_chroma_4x4_neon; + ps_enc_loop_fxns->apf_resi_trans_quant_chroma_4x4[1] = + isvc_resi_trans_quant_chroma_4x4_with_residual_sub_neon; + + /* Init inverse transform fn ptr */ + ps_enc_loop_fxns->apf_iquant_itrans_recon_8x8[0] = isvc_iquant_itrans_recon_8x8; + ps_enc_loop_fxns->apf_iquant_itrans_recon_8x8[1] = isvc_iquant_itrans_recon_8x8; + ps_enc_loop_fxns->apf_iquant_itrans_recon_8x8[2] = isvc_iquant_itrans_recon_8x8; + + ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4[0] = + isvc_iquant_itrans_recon_4x4_with_res_output_neon; + ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4[1] = + isvc_iquant_itrans_recon_4x4_with_res_accumulate_neon; + ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4[2] = isvc_iquant_itrans_recon_4x4_neon; + + ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4_dc[0] = + isvc_iquant_itrans_recon_4x4_dc_with_res_output_neon; + ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4_dc[1] = + isvc_iquant_itrans_recon_4x4_dc_with_res_accumulate_neon; + ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4_dc[2] = isvc_iquant_itrans_recon_4x4_dc_neon; + + ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4[0] = + isvc_iquant_itrans_recon_chroma_4x4_with_res_output_neon; + ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4[1] = + isvc_iquant_itrans_recon_chroma_4x4_with_res_accumulate_neon; + ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4[2] = + isvc_iquant_itrans_recon_chroma_4x4_neon; + + ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4_dc[0] = + isvc_iquant_itrans_recon_chroma_4x4_dc_with_res_output_neon; + ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4_dc[1] = + isvc_iquant_itrans_recon_chroma_4x4_dc_with_res_accumulate_neon; + ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4_dc[2] = + isvc_iquant_itrans_recon_chroma_4x4_dc_neon; + + ps_enc_loop_fxns->pf_ihadamard_scaling_4x4 = ih264_ihadamard_scaling_4x4_a9; + ps_enc_loop_fxns->pf_ihadamard_scaling_2x2_uv = ih264_ihadamard_scaling_2x2_uv_a9; + + /* Init fn ptr luma core coding */ + ps_enc_loop_fxns->apf_luma_energy_compaction[0] = isvce_code_luma_intra_macroblock_16x16; + ps_enc_loop_fxns->apf_luma_energy_compaction[1] = isvce_code_luma_intra_macroblock_4x4; + ps_enc_loop_fxns->apf_luma_energy_compaction[3] = isvce_code_luma_inter_macroblock_16x16; + + /* Init fn ptr chroma core coding */ + ps_enc_loop_fxns->apf_chroma_energy_compaction[0] = isvce_code_chroma_intra_macroblock_8x8; + ps_enc_loop_fxns->apf_chroma_energy_compaction[1] = isvce_code_chroma_inter_macroblock_8x8; + + /* Init fn ptr luma deblocking */ + ps_codec->pf_deblk_luma_vert_bs4 = ih264_deblk_luma_vert_bs4_a9; + ps_codec->pf_deblk_luma_vert_bslt4 = ih264_deblk_luma_vert_bslt4_a9; + ps_codec->pf_deblk_luma_horz_bs4 = ih264_deblk_luma_horz_bs4_a9; + ps_codec->pf_deblk_luma_horz_bslt4 = ih264_deblk_luma_horz_bslt4_a9; + + /* Init fn ptr chroma deblocking */ + ps_codec->pf_deblk_chroma_vert_bs4 = ih264_deblk_chroma_vert_bs4_a9; + ps_codec->pf_deblk_chroma_vert_bslt4 = ih264_deblk_chroma_vert_bslt4_a9; + ps_codec->pf_deblk_chroma_horz_bs4 = ih264_deblk_chroma_horz_bs4_a9; + ps_codec->pf_deblk_chroma_horz_bslt4 = ih264_deblk_chroma_horz_bslt4_a9; + + /* write mb syntax layer */ + ps_codec->pf_write_mb_syntax_layer[CAVLC][ISLICE] = isvce_write_islice_mb_cavlc; + ps_codec->pf_write_mb_syntax_layer[CAVLC][PSLICE] = isvce_write_pslice_mb_cavlc; + ps_codec->pf_write_mb_syntax_layer[CAVLC][BSLICE] = isvce_write_bslice_mb_cavlc; + ps_codec->pf_write_mb_syntax_layer[CABAC][ISLICE] = isvce_write_islice_mb_cabac; + ps_codec->pf_write_mb_syntax_layer[CABAC][PSLICE] = isvce_write_pslice_mb_cabac; + + /* Padding Functions */ + ps_codec->pf_pad_top = ih264_pad_top_a9q; + ps_codec->pf_pad_bottom = ih264_pad_bottom; + ps_codec->pf_pad_left_luma = ih264_pad_left_luma_a9q; + ps_codec->pf_pad_left_chroma = ih264_pad_left_chroma_a9q; + ps_codec->pf_pad_right_luma = ih264_pad_right_luma_a9q; + ps_codec->pf_pad_right_chroma = ih264_pad_right_chroma_a9q; + + /* Inter pred leaf level functions */ + ps_inter_pred_fxns->pf_inter_pred_luma_copy = ih264_inter_pred_luma_copy_a9q; + ps_inter_pred_fxns->pf_inter_pred_luma_horz = ih264_inter_pred_luma_horz_a9q; + ps_inter_pred_fxns->pf_inter_pred_luma_vert = ih264_inter_pred_luma_vert_a9q; + ps_inter_pred_fxns->pf_inter_pred_luma_bilinear = ih264_inter_pred_luma_bilinear_a9q; + ps_inter_pred_fxns->pf_inter_pred_chroma = ih264_inter_pred_chroma_a9q; + + /* sad me level functions */ + ps_codec->apf_compute_sad_16x16[0] = ime_compute_sad_16x16_a9q; + ps_codec->apf_compute_sad_16x16[1] = ime_compute_sad_16x16_fast_a9q; + ps_codec->pf_compute_sad_16x8 = ime_compute_sad_16x8_a9q; + + /* memor handling operations */ + ps_mem_fxns->pf_mem_cpy = ih264_memcpy_a9q; + ps_mem_fxns->pf_mem_cpy_mul8 = ih264_memcpy_mul_8_a9q; + ps_mem_fxns->pf_mem_set = ih264_memset_a9q; + ps_mem_fxns->pf_mem_set_mul8 = ih264_memset_mul_8_a9q; + + /* sad me level functions */ + for(i = 0; i < (MAX_PROCESS_CTXT); i++) + { + ps_proc = &ps_codec->as_process[i]; + ps_me_ctxt = &ps_proc->s_me_ctxt; + ps_me_ctxt->pf_ime_compute_sad_16x16[0] = ime_compute_sad_16x16_a9q; + ps_me_ctxt->pf_ime_compute_sad_16x16[1] = ime_compute_sad_16x16_fast_a9q; + ps_me_ctxt->pf_ime_compute_sad_16x8 = ime_compute_sad_16x8_a9q; + ps_me_ctxt->pf_ime_compute_sad4_diamond = ime_calculate_sad4_prog_a9q; + ps_me_ctxt->pf_ime_compute_sad3_diamond = ime_calculate_sad3_prog_a9q; + ps_me_ctxt->pf_ime_compute_sad2_diamond = ime_calculate_sad2_prog_a9q; + ps_me_ctxt->pf_ime_sub_pel_compute_sad_16x16 = ime_sub_pel_compute_sad_16x16_a9q; + ps_me_ctxt->pf_ime_compute_sad_stat_luma_16x16 = ime_compute_satqd_16x16_lumainter_a9q; + } + + /* intra mode eval -encoder level function */ + ps_codec->pf_ih264e_evaluate_intra16x16_modes = ih264e_evaluate_intra16x16_modes_a9q; + ps_codec->pf_ih264e_evaluate_intra_chroma_modes = ih264e_evaluate_intra_chroma_modes_a9q; + ps_codec->pf_ih264e_evaluate_intra_4x4_modes = ih264e_evaluate_intra_4x4_modes_a9q; +} diff --git a/encoder/arm/svc/isvce_function_selector_av8.c b/encoder/arm/svc/isvce_function_selector_av8.c new file mode 100644 index 0000000..16c08bb --- /dev/null +++ b/encoder/arm/svc/isvce_function_selector_av8.c @@ -0,0 +1,278 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ +/** +******************************************************************************* +* @file +* isvce_function_selector_av8.c +* +* @brief +* Contains functions to initialize function pointers of codec context +* +* @author +* Ittiam +* +* @par List of Functions: +* - isvce_init_function_ptr_generic +* +* @remarks +* None +* +******************************************************************************* +*/ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System Include files */ +#include +#include +#include +#include + +/* User Include files */ +#include "ih264_typedefs.h" +#include "iv2.h" +#include "ive2.h" +#include "isvc_defs.h" +#include "ih264_size_defs.h" +#include "isvce_defs.h" +#include "ih264e_error.h" +#include "ih264e_bitstream.h" +#include "ime_distortion_metrics.h" +#include "ime_defs.h" +#include "ime_structs.h" +#include "ih264_error.h" +#include "isvc_structs.h" +#include "isvc_trans_quant_itrans_iquant.h" +#include "ih264_inter_pred_filters.h" +#include "ih264_mem_fns.h" +#include "isvc_mem_fns.h" +#include "ih264_padding.h" +#include "ih264_intra_pred_filters.h" +#include "ih264_deblk_edge_filters.h" +#include "isvc_cabac_tables.h" +#include "irc_cntrl_param.h" +#include "irc_frame_info_collector.h" +#include "isvce_rate_control.h" +#include "isvce_cabac_structs.h" +#include "isvce_structs.h" +#include "ih264e_platform_macros.h" +#include "isvce_cabac.h" +#include "isvce_core_coding.h" +#include "ih264_cavlc_tables.h" +#include "isvce_cavlc.h" +#include "ih264e_intra_modes_eval.h" +#include "ih264e_fmt_conv.h" +#include "ih264e_half_pel.h" + +/** +******************************************************************************* +* +* @brief Initialize the intra/inter/transform/deblk function pointers of +* codec context +* +* @par Description: the current routine initializes the function pointers of +* codec context basing on the architecture in use +* +* @param[in] ps_codec +* Codec context pointer +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +void isvce_init_function_ptr_neon_av8(isvce_codec_t *ps_codec) +{ + WORD32 i = 0; + + /* curr proc ctxt */ + isvce_process_ctxt_t *ps_proc = NULL; + isvce_me_ctxt_t *ps_me_ctxt = NULL; + isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns; + enc_loop_fxns_t *ps_enc_loop_fxns = &ps_isa_dependent_fxns->s_enc_loop_fxns; + inter_pred_fxns_t *ps_inter_pred_fxns = &ps_isa_dependent_fxns->s_inter_pred_fxns; + mem_fxns_t *ps_mem_fxns = &ps_isa_dependent_fxns->s_mem_fxns; + + /* Init function pointers for intra pred leaf level functions luma + * Intra 16x16 */ + ps_codec->apf_intra_pred_16_l[0] = ih264_intra_pred_luma_16x16_mode_vert_av8; + ps_codec->apf_intra_pred_16_l[1] = ih264_intra_pred_luma_16x16_mode_horz_av8; + ps_codec->apf_intra_pred_16_l[2] = ih264_intra_pred_luma_16x16_mode_dc_av8; + ps_codec->apf_intra_pred_16_l[3] = ih264_intra_pred_luma_16x16_mode_plane_av8; + + /* Init function pointers for intra pred leaf level functions luma + * Intra 4x4 */ + ps_codec->apf_intra_pred_4_l[0] = ih264_intra_pred_luma_4x4_mode_vert_av8; + ps_codec->apf_intra_pred_4_l[1] = ih264_intra_pred_luma_4x4_mode_horz_av8; + ps_codec->apf_intra_pred_4_l[2] = ih264_intra_pred_luma_4x4_mode_dc_av8; + ps_codec->apf_intra_pred_4_l[3] = ih264_intra_pred_luma_4x4_mode_diag_dl_av8; + ps_codec->apf_intra_pred_4_l[4] = ih264_intra_pred_luma_4x4_mode_diag_dr_av8; + ps_codec->apf_intra_pred_4_l[5] = ih264_intra_pred_luma_4x4_mode_vert_r_av8; + ps_codec->apf_intra_pred_4_l[6] = ih264_intra_pred_luma_4x4_mode_horz_d_av8; + ps_codec->apf_intra_pred_4_l[7] = ih264_intra_pred_luma_4x4_mode_vert_l_av8; + ps_codec->apf_intra_pred_4_l[8] = ih264_intra_pred_luma_4x4_mode_horz_u_av8; + + /* Init function pointers for intra pred leaf level functions luma + * Intra 8x8 */ + ps_codec->apf_intra_pred_8_l[0] = ih264_intra_pred_luma_8x8_mode_vert_av8; + ps_codec->apf_intra_pred_8_l[2] = ih264_intra_pred_luma_8x8_mode_dc_av8; + ps_codec->apf_intra_pred_8_l[3] = ih264_intra_pred_luma_8x8_mode_diag_dl_av8; + ps_codec->apf_intra_pred_8_l[4] = ih264_intra_pred_luma_8x8_mode_diag_dr_av8; + ps_codec->apf_intra_pred_8_l[5] = ih264_intra_pred_luma_8x8_mode_vert_r_av8; + ps_codec->apf_intra_pred_8_l[6] = ih264_intra_pred_luma_8x8_mode_horz_d_av8; + ps_codec->apf_intra_pred_8_l[7] = ih264_intra_pred_luma_8x8_mode_vert_l_av8; + ps_codec->apf_intra_pred_8_l[8] = ih264_intra_pred_luma_8x8_mode_horz_u_av8; + + /* Init function pointers for intra pred leaf level functions chroma + * Intra 8x8 */ + ps_codec->apf_intra_pred_c[0] = ih264_intra_pred_chroma_8x8_mode_dc_av8; + ps_codec->apf_intra_pred_c[1] = ih264_intra_pred_chroma_8x8_mode_horz_av8; + ps_codec->apf_intra_pred_c[2] = ih264_intra_pred_chroma_8x8_mode_vert_av8; + ps_codec->apf_intra_pred_c[3] = ih264_intra_pred_chroma_8x8_mode_plane_av8; + + /* Init forward transform fn ptr */ + ps_enc_loop_fxns->apf_resi_trans_quant_8x8[0] = isvc_resi_trans_quant_8x8; + ps_enc_loop_fxns->apf_resi_trans_quant_8x8[1] = isvc_resi_trans_quant_8x8; + ps_enc_loop_fxns->apf_resi_trans_quant_4x4[0] = isvc_resi_trans_quant_4x4_neon; + ps_enc_loop_fxns->apf_resi_trans_quant_4x4[1] = + isvc_resi_trans_quant_4x4_with_residual_sub_neon; + ps_enc_loop_fxns->apf_resi_trans_quant_chroma_4x4[0] = isvc_resi_trans_quant_chroma_4x4_neon; + ps_enc_loop_fxns->apf_resi_trans_quant_chroma_4x4[1] = + isvc_resi_trans_quant_chroma_4x4_with_residual_sub_neon; + + /* Init inverse transform fn ptr */ + ps_enc_loop_fxns->apf_iquant_itrans_recon_8x8[0] = isvc_iquant_itrans_recon_8x8; + ps_enc_loop_fxns->apf_iquant_itrans_recon_8x8[1] = isvc_iquant_itrans_recon_8x8; + ps_enc_loop_fxns->apf_iquant_itrans_recon_8x8[2] = isvc_iquant_itrans_recon_8x8; + + ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4[0] = + isvc_iquant_itrans_recon_4x4_with_res_output_neon; + ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4[1] = + isvc_iquant_itrans_recon_4x4_with_res_accumulate_neon; + ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4[2] = isvc_iquant_itrans_recon_4x4_neon; + + ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4_dc[0] = + isvc_iquant_itrans_recon_4x4_dc_with_res_output_neon; + ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4_dc[1] = + isvc_iquant_itrans_recon_4x4_dc_with_res_accumulate_neon; + ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4_dc[2] = isvc_iquant_itrans_recon_4x4_dc_neon; + + ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4[0] = + isvc_iquant_itrans_recon_chroma_4x4_with_res_output_neon; + ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4[1] = + isvc_iquant_itrans_recon_chroma_4x4_with_res_accumulate_neon; + ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4[2] = + isvc_iquant_itrans_recon_chroma_4x4_neon; + + ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4_dc[0] = + isvc_iquant_itrans_recon_chroma_4x4_dc_with_res_output_neon; + ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4_dc[1] = + isvc_iquant_itrans_recon_chroma_4x4_dc_with_res_accumulate_neon; + ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4_dc[2] = + isvc_iquant_itrans_recon_chroma_4x4_dc_neon; + + ps_enc_loop_fxns->pf_ihadamard_scaling_4x4 = ih264_ihadamard_scaling_4x4_av8; + ps_enc_loop_fxns->pf_ihadamard_scaling_2x2_uv = ih264_ihadamard_scaling_2x2_uv_av8; + + /* Init fn ptr luma core coding */ + ps_enc_loop_fxns->apf_luma_energy_compaction[0] = isvce_code_luma_intra_macroblock_16x16; + ps_enc_loop_fxns->apf_luma_energy_compaction[1] = isvce_code_luma_intra_macroblock_4x4; + ps_enc_loop_fxns->apf_luma_energy_compaction[3] = isvce_code_luma_inter_macroblock_16x16; + + /* Init fn ptr chroma core coding */ + ps_enc_loop_fxns->apf_chroma_energy_compaction[0] = isvce_code_chroma_intra_macroblock_8x8; + ps_enc_loop_fxns->apf_chroma_energy_compaction[1] = isvce_code_chroma_inter_macroblock_8x8; + + /* Init fn ptr luma deblocking */ + ps_codec->pf_deblk_luma_vert_bs4 = ih264_deblk_luma_vert_bs4_av8; + ps_codec->pf_deblk_luma_vert_bslt4 = ih264_deblk_luma_vert_bslt4_av8; + ps_codec->pf_deblk_luma_horz_bs4 = ih264_deblk_luma_horz_bs4_av8; + ps_codec->pf_deblk_luma_horz_bslt4 = ih264_deblk_luma_horz_bslt4_av8; + + /* Init fn ptr chroma deblocking */ + ps_codec->pf_deblk_chroma_vert_bs4 = ih264_deblk_chroma_vert_bs4_av8; + ps_codec->pf_deblk_chroma_vert_bslt4 = ih264_deblk_chroma_vert_bslt4_av8; + ps_codec->pf_deblk_chroma_horz_bs4 = ih264_deblk_chroma_horz_bs4_av8; + ps_codec->pf_deblk_chroma_horz_bslt4 = ih264_deblk_chroma_horz_bslt4_av8; + + /* write mb syntax layer */ + ps_codec->pf_write_mb_syntax_layer[CAVLC][ISLICE] = isvce_write_islice_mb_cavlc; + ps_codec->pf_write_mb_syntax_layer[CAVLC][PSLICE] = isvce_write_pslice_mb_cavlc; + ps_codec->pf_write_mb_syntax_layer[CAVLC][BSLICE] = isvce_write_bslice_mb_cavlc; + ps_codec->pf_write_mb_syntax_layer[CABAC][ISLICE] = isvce_write_islice_mb_cabac; + ps_codec->pf_write_mb_syntax_layer[CABAC][PSLICE] = isvce_write_pslice_mb_cabac; + + /* Padding Functions */ + ps_codec->pf_pad_top = ih264_pad_top_av8; + ps_codec->pf_pad_bottom = ih264_pad_bottom; + ps_codec->pf_pad_left_luma = ih264_pad_left_luma_av8; + ps_codec->pf_pad_left_chroma = ih264_pad_left_chroma_av8; + ps_codec->pf_pad_right_luma = ih264_pad_right_luma_av8; + ps_codec->pf_pad_right_chroma = ih264_pad_right_chroma_av8; + + /* Inter pred leaf level functions */ + ps_inter_pred_fxns->pf_inter_pred_luma_copy = ih264_inter_pred_luma_copy_av8; + ps_inter_pred_fxns->pf_inter_pred_luma_horz = ih264_inter_pred_luma_horz_av8; + ps_inter_pred_fxns->pf_inter_pred_luma_vert = ih264_inter_pred_luma_vert_av8; + ps_inter_pred_fxns->pf_inter_pred_luma_bilinear = ih264_inter_pred_luma_bilinear; + ps_inter_pred_fxns->pf_inter_pred_chroma = ih264_inter_pred_chroma_av8; + + /* sad me level functions */ + ps_codec->apf_compute_sad_16x16[0] = ime_compute_sad_16x16_av8; + ps_codec->apf_compute_sad_16x16[1] = ime_compute_sad_16x16_fast_av8; + ps_codec->pf_compute_sad_16x8 = ime_compute_sad_16x8_av8; + + /* memor handling operations */ + ps_mem_fxns->pf_mem_cpy = ih264_memcpy_av8; + ps_mem_fxns->pf_mem_cpy_mul8 = ih264_memcpy_mul_8_av8; + ps_mem_fxns->pf_mem_set = ih264_memset_av8; + ps_mem_fxns->pf_mem_set_mul8 = ih264_memset_mul_8_av8; + + /* sad me level functions */ + for(i = 0; i < (MAX_PROCESS_CTXT); i++) + { + ps_proc = &ps_codec->as_process[i]; + ps_me_ctxt = &ps_proc->s_me_ctxt; + ps_me_ctxt->pf_ime_compute_sad_16x16[0] = ime_compute_sad_16x16_av8; + ps_me_ctxt->pf_ime_compute_sad_16x16[1] = ime_compute_sad_16x16_fast_av8; + ps_me_ctxt->pf_ime_compute_sad_16x8 = ime_compute_sad_16x8_av8; + ps_me_ctxt->pf_ime_compute_sad4_diamond = ime_calculate_sad4_prog_av8; + ps_me_ctxt->pf_ime_compute_sad3_diamond = ime_calculate_sad3_prog_av8; + ps_me_ctxt->pf_ime_compute_sad2_diamond = ime_calculate_sad2_prog_av8; + ps_me_ctxt->pf_ime_sub_pel_compute_sad_16x16 = ime_sub_pel_compute_sad_16x16_av8; + ps_me_ctxt->pf_ime_compute_sad_stat_luma_16x16 = ime_compute_satqd_16x16_lumainter_av8; + } + + /* intra mode eval -encoder level function */ + ps_codec->pf_ih264e_evaluate_intra16x16_modes = ih264e_evaluate_intra16x16_modes_av8; + ps_codec->pf_ih264e_evaluate_intra_chroma_modes = ih264e_evaluate_intra_chroma_modes_av8; + ps_codec->pf_ih264e_evaluate_intra_4x4_modes = ih264e_evaluate_intra_4x4_modes; + + /* csc */ + ps_codec->pf_ih264e_conv_420p_to_420sp = ih264e_fmt_conv_420p_to_420sp; + ps_codec->pf_ih264e_fmt_conv_422i_to_420sp = ih264e_fmt_conv_422i_to_420sp; + + /* Halp pel generation function - encoder level*/ + ps_codec->pf_ih264e_sixtapfilter_horz = ih264e_sixtapfilter_horz_av8; + ps_codec->pf_ih264e_sixtap_filter_2dvh_vert = ih264e_sixtap_filter_2dvh_vert_av8; +} diff --git a/encoder/arm/svc/isvce_platform_macros.h b/encoder/arm/svc/isvce_platform_macros.h new file mode 100644 index 0000000..df18315 --- /dev/null +++ b/encoder/arm/svc/isvce_platform_macros.h @@ -0,0 +1,139 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ +/** +******************************************************************************* +* @file +* ih264e_platform_macros.h +* +* @brief +* Contains platform specific routines used for codec context intialization +* +* @author +* ittiam +* +* @remarks +* none +* +******************************************************************************* +*/ + +#ifndef _ISVCE_PLATFORM_MACROS_H_ +#define _ISVCE_PLATFORM_MACROS_H_ + +/** +******************************************************************************* +* +* @brief Initialize the intra/inter/transform/deblk function pointers of +* codec context +* +* @par Description: the current routine initializes the function pointers of +* codec context basing on the architecture in use +* +* @param[in] ps_codec +* Codec context pointer +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +void isvce_init_function_ptr_neon_a9q(isvce_codec_t *ps_codec); + +/** +******************************************************************************* +* +* @brief Initialize the intra/inter/transform/deblk function pointers of +* codec context +* +* @par Description: the current routine initializes the function pointers of +* codec context basing on the architecture in use +* +* @param[in] ps_codec +* Codec context pointer +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +void isvce_init_function_ptr_neon_av8(isvce_codec_t *ps_codec); + +/** +******************************************************************************* +* +* @brief Initialize the intra/inter/transform/deblk function pointers of +* codec context +* +* @par Description: the current routine initializes the function pointers of +* codec context basing on the architecture in use +* +* @param[in] ps_codec +* Codec context pointer +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +void isvce_init_function_ptr_generic(isvce_codec_t *ps_codec); + +/** +******************************************************************************* +* +* @brief Initialize the intra/inter/transform/deblk function pointers of +* codec context +* +* @par Description: the current routine initializes the function pointers of +* codec context basing on the architecture in use +* +* @param[in] ps_codec +* Codec context pointer +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +void isvce_init_function_ptr(void *pv_codec); + +/** +******************************************************************************* +* +* @brief Determine the architecture of the encoder executing environment +* +* @par Description: This routine returns the architecture of the enviro- +* ment in which the current encoder is being tested +* +* @param[in] void +* +* @returns IV_ARCH_T +* architecture +* +* @remarks none +* +******************************************************************************* +*/ +IV_ARCH_T isvce_default_arch(void); + +#endif diff --git a/encoder/arm/svc/isvce_rc_utils_neon.c b/encoder/arm/svc/isvce_rc_utils_neon.c new file mode 100644 index 0000000..6ae04bd --- /dev/null +++ b/encoder/arm/svc/isvce_rc_utils_neon.c @@ -0,0 +1,625 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** +****************************************************************************** +* @file isvce_svc_rc_utils_neon.c +* +* @brief +* This file contains the neom SIMD version of the function which computes +* gradient per pixel value being used in Init Qp +* +* @author +* Ittiam +* +* @par List of Functions: +* - isvce_get_gpp_neon() +* +* @remarks +* None +* +******************************************************************************* +*/ + +#include + +#include "ih264_typedefs.h" +#include "ih264_debug.h" +#include "isvc_structs.h" +#include "isvce_rc_utils_private_defs.h" + +/** +******************************************************************************* +* +* @brief +* get gpp function +* +* @par Description: +* computes gradient per pixel value for a given frame +* +* @param[in] ps_input_buf +* pointer to yuv buffer properties +* +* @returns +* calculated gpp value +* +* @remarks +* none +* +******************************************************************************* +*/ + +DOUBLE isvce_get_gpp_neon(yuv_buf_props_t *ps_input_buf) +{ + UWORD8 *pu1_input_buf; + UWORD32 i, j, k; + UWORD32 u4_width, u4_height, i4_input_stride; + DOUBLE d_gpp_y, d_gpp_u, d_gpp_v, d_gpp; + + uint8x8_t reg_8x8_src_r0, reg_8x8_src_r1, reg_8x8_src_r2, reg_8x8_src_r3, reg_8x8_src_r4, + reg_8x8_src_r5, reg_8x8_src_r6, reg_8x8_src_r7, reg_8x8_src_r8; + uint8x8_t reg_8x8_src_right_r0, reg_8x8_src_right_r1, reg_8x8_src_right_r2, + reg_8x8_src_right_r3, reg_8x8_src_right_r4, reg_8x8_src_right_r5, reg_8x8_src_right_r6, + reg_8x8_src_right_r7; + uint16x8_t reg_16x8_abs_diff_y, reg_16x8_abs_diff_uv; + uint64x2_t reg_64x2_gpp_y, reg_64x2_gpp_uv; + + uint8x8_t reg_8x8_shuffle = {0, 2, 4, 6, 1, 3, 5, 7}; + uint16x8_t reg_16x8_and_mask_y = {0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0x0000}; + uint16x8_t reg_16x8_and_mask_uv = {0xffff, 0xffff, 0xffff, 0x0000, + 0xffff, 0xffff, 0xffff, 0x0000}; + uint32x4_t reg_32x4_abs_diff_hadd_y = vdupq_n_u32(0); + uint32x4_t reg_32x4_abs_diff_hadd_uv = vdupq_n_u32(0); + + d_gpp_y = 0; + d_gpp_u = 0; + d_gpp_v = 0; + d_gpp = 0; + pu1_input_buf = (UWORD8 *) ps_input_buf->as_component_bufs[0].pv_data; + i4_input_stride = ps_input_buf->as_component_bufs[0].i4_data_stride; + u4_width = ps_input_buf->u4_width; + u4_height = ps_input_buf->u4_height; + + ASSERT((u4_width % 8) == 0); + + /***********************************************************/ + /* For Luma - */ + /* This code block calculates gpp value for luma by adding */ + /* the absolute difference between the current pixel and */ + /* it's immediate right pixel with the absolute difference */ + /* between the current pixel and it's immediate bottom */ + /* pixel and accumulating for every pixel in the frame. */ + /***********************************************************/ + /* -8 in the checks below since right column and bottow row being used for gradients, */ + /* and last row and column are ignored for gradient computation. */ + /* Note that input is not required to be padded */ + for(i = 0; i < u4_height - 8; i += 8) + { + for(j = 0; j < u4_width - 8; j += 8) + { + reg_8x8_src_r0 = vld1_u8(pu1_input_buf + j); + reg_8x8_src_r1 = vld1_u8(pu1_input_buf + i4_input_stride + j); + reg_8x8_src_r2 = vld1_u8(pu1_input_buf + (i4_input_stride * 2) + j); + reg_8x8_src_r3 = vld1_u8(pu1_input_buf + (i4_input_stride * 3) + j); + reg_8x8_src_r4 = vld1_u8(pu1_input_buf + (i4_input_stride * 4) + j); + reg_8x8_src_r5 = vld1_u8(pu1_input_buf + (i4_input_stride * 5) + j); + reg_8x8_src_r6 = vld1_u8(pu1_input_buf + (i4_input_stride * 6) + j); + reg_8x8_src_r7 = vld1_u8(pu1_input_buf + (i4_input_stride * 7) + j); + reg_8x8_src_r8 = vld1_u8(pu1_input_buf + (i4_input_stride * 8) + j); + + reg_8x8_src_right_r0 = vld1_u8(pu1_input_buf + j + 1); + reg_8x8_src_right_r1 = vld1_u8(pu1_input_buf + i4_input_stride + j + 1); + reg_8x8_src_right_r2 = vld1_u8(pu1_input_buf + (i4_input_stride * 2) + j + 1); + reg_8x8_src_right_r3 = vld1_u8(pu1_input_buf + (i4_input_stride * 3) + j + 1); + reg_8x8_src_right_r4 = vld1_u8(pu1_input_buf + (i4_input_stride * 4) + j + 1); + reg_8x8_src_right_r5 = vld1_u8(pu1_input_buf + (i4_input_stride * 5) + j + 1); + reg_8x8_src_right_r6 = vld1_u8(pu1_input_buf + (i4_input_stride * 6) + j + 1); + reg_8x8_src_right_r7 = vld1_u8(pu1_input_buf + (i4_input_stride * 7) + j + 1); + + reg_16x8_abs_diff_y = vabdl_u8(reg_8x8_src_r0, reg_8x8_src_r1); + reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r1, reg_8x8_src_r2); + reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r2, reg_8x8_src_r3); + reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r3, reg_8x8_src_r4); + reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r4, reg_8x8_src_r5); + reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r5, reg_8x8_src_r6); + reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r6, reg_8x8_src_r7); + reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r7, reg_8x8_src_r8); + + reg_16x8_abs_diff_y = + vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r0, reg_8x8_src_right_r0); + reg_16x8_abs_diff_y = + vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r1, reg_8x8_src_right_r1); + reg_16x8_abs_diff_y = + vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r2, reg_8x8_src_right_r2); + reg_16x8_abs_diff_y = + vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r3, reg_8x8_src_right_r3); + reg_16x8_abs_diff_y = + vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r4, reg_8x8_src_right_r4); + reg_16x8_abs_diff_y = + vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r5, reg_8x8_src_right_r5); + reg_16x8_abs_diff_y = + vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r6, reg_8x8_src_right_r6); + reg_16x8_abs_diff_y = + vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r7, reg_8x8_src_right_r7); + + reg_32x4_abs_diff_hadd_y = vpadalq_u16(reg_32x4_abs_diff_hadd_y, reg_16x8_abs_diff_y); + } + + /************************************************************/ + /* Remaining width - */ + /* Since Last pixel is not getting processed, remaining 7 */ + /* pixels are getting processed separately by performing */ + /* and operations with reg_16x8_and_mask_y */ + /************************************************************/ + ASSERT((u4_width - j) == 8); + reg_8x8_src_r0 = vld1_u8(pu1_input_buf + j); + reg_8x8_src_r1 = vld1_u8(pu1_input_buf + i4_input_stride + j); + reg_8x8_src_r2 = vld1_u8(pu1_input_buf + (i4_input_stride * 2) + j); + reg_8x8_src_r3 = vld1_u8(pu1_input_buf + (i4_input_stride * 3) + j); + reg_8x8_src_r4 = vld1_u8(pu1_input_buf + (i4_input_stride * 4) + j); + reg_8x8_src_r5 = vld1_u8(pu1_input_buf + (i4_input_stride * 5) + j); + reg_8x8_src_r6 = vld1_u8(pu1_input_buf + (i4_input_stride * 6) + j); + reg_8x8_src_r7 = vld1_u8(pu1_input_buf + (i4_input_stride * 7) + j); + reg_8x8_src_r8 = vld1_u8(pu1_input_buf + (i4_input_stride * 8) + j); + + reg_8x8_src_right_r0 = vext_u8(reg_8x8_src_r0, reg_8x8_src_r0, 1); + reg_8x8_src_right_r1 = vext_u8(reg_8x8_src_r1, reg_8x8_src_r1, 1); + reg_8x8_src_right_r2 = vext_u8(reg_8x8_src_r2, reg_8x8_src_r2, 1); + reg_8x8_src_right_r3 = vext_u8(reg_8x8_src_r3, reg_8x8_src_r3, 1); + reg_8x8_src_right_r4 = vext_u8(reg_8x8_src_r4, reg_8x8_src_r4, 1); + reg_8x8_src_right_r5 = vext_u8(reg_8x8_src_r5, reg_8x8_src_r5, 1); + reg_8x8_src_right_r6 = vext_u8(reg_8x8_src_r6, reg_8x8_src_r6, 1); + reg_8x8_src_right_r7 = vext_u8(reg_8x8_src_r7, reg_8x8_src_r7, 1); + + reg_16x8_abs_diff_y = vabdl_u8(reg_8x8_src_r0, reg_8x8_src_r1); + reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r1, reg_8x8_src_r2); + reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r2, reg_8x8_src_r3); + reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r3, reg_8x8_src_r4); + reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r4, reg_8x8_src_r5); + reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r5, reg_8x8_src_r6); + reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r6, reg_8x8_src_r7); + reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r7, reg_8x8_src_r8); + + reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r0, reg_8x8_src_right_r0); + reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r1, reg_8x8_src_right_r1); + reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r2, reg_8x8_src_right_r2); + reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r3, reg_8x8_src_right_r3); + reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r4, reg_8x8_src_right_r4); + reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r5, reg_8x8_src_right_r5); + reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r6, reg_8x8_src_right_r6); + reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r7, reg_8x8_src_right_r7); + + reg_16x8_abs_diff_y = vandq_u16(reg_16x8_abs_diff_y, reg_16x8_and_mask_y); + + reg_32x4_abs_diff_hadd_y = vpadalq_u16(reg_32x4_abs_diff_hadd_y, reg_16x8_abs_diff_y); + + pu1_input_buf += (i4_input_stride * 8); + } + + /* Loop for remaining height less than 8 */ + /* 4 <= remaining_height < 8 */ + for(k = i; k < u4_height - 4; k += 4, i += 4) + { + for(j = 0; j < u4_width - 8; j += 8) + { + reg_8x8_src_r0 = vld1_u8(pu1_input_buf + j); + reg_8x8_src_r1 = vld1_u8(pu1_input_buf + i4_input_stride + j); + reg_8x8_src_r2 = vld1_u8(pu1_input_buf + (i4_input_stride * 2) + j); + reg_8x8_src_r3 = vld1_u8(pu1_input_buf + (i4_input_stride * 3) + j); + reg_8x8_src_r4 = vld1_u8(pu1_input_buf + (i4_input_stride * 4) + j); + reg_8x8_src_right_r0 = vld1_u8(pu1_input_buf + j + 1); + reg_8x8_src_right_r1 = vld1_u8(pu1_input_buf + i4_input_stride + j + 1); + reg_8x8_src_right_r2 = vld1_u8(pu1_input_buf + (i4_input_stride * 2) + j + 1); + reg_8x8_src_right_r3 = vld1_u8(pu1_input_buf + (i4_input_stride * 3) + j + 1); + + reg_16x8_abs_diff_y = vabdl_u8(reg_8x8_src_r0, reg_8x8_src_r1); + reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r1, reg_8x8_src_r2); + reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r2, reg_8x8_src_r3); + reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r3, reg_8x8_src_r4); + + reg_16x8_abs_diff_y = + vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r0, reg_8x8_src_right_r0); + reg_16x8_abs_diff_y = + vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r1, reg_8x8_src_right_r1); + reg_16x8_abs_diff_y = + vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r2, reg_8x8_src_right_r2); + reg_16x8_abs_diff_y = + vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r3, reg_8x8_src_right_r3); + + reg_32x4_abs_diff_hadd_y = vpadalq_u16(reg_32x4_abs_diff_hadd_y, reg_16x8_abs_diff_y); + } + + /************************************************************/ + /* Remaining width - */ + /* Since Last pixel is not getting processed, remaining 7 */ + /* pixels are getting processed separately by performing */ + /* and operations with reg_16x8_and_mask_y */ + /************************************************************/ + ASSERT((u4_width - j) == 8); + reg_8x8_src_r0 = vld1_u8(pu1_input_buf + j); + reg_8x8_src_r1 = vld1_u8(pu1_input_buf + i4_input_stride + j); + reg_8x8_src_r2 = vld1_u8(pu1_input_buf + (i4_input_stride * 2) + j); + reg_8x8_src_r3 = vld1_u8(pu1_input_buf + (i4_input_stride * 3) + j); + reg_8x8_src_r4 = vld1_u8(pu1_input_buf + (i4_input_stride * 4) + j); + + reg_8x8_src_right_r0 = vext_u8(reg_8x8_src_r0, reg_8x8_src_r0, 1); + reg_8x8_src_right_r1 = vext_u8(reg_8x8_src_r1, reg_8x8_src_r1, 1); + reg_8x8_src_right_r2 = vext_u8(reg_8x8_src_r2, reg_8x8_src_r2, 1); + reg_8x8_src_right_r3 = vext_u8(reg_8x8_src_r3, reg_8x8_src_r3, 1); + + reg_16x8_abs_diff_y = vabdl_u8(reg_8x8_src_r0, reg_8x8_src_r1); + reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r1, reg_8x8_src_r2); + reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r2, reg_8x8_src_r3); + reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r3, reg_8x8_src_r4); + + reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r0, reg_8x8_src_right_r0); + reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r1, reg_8x8_src_right_r1); + reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r2, reg_8x8_src_right_r2); + reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r3, reg_8x8_src_right_r3); + + reg_16x8_abs_diff_y = vandq_u16(reg_16x8_abs_diff_y, reg_16x8_and_mask_y); + + reg_32x4_abs_diff_hadd_y = vpadalq_u16(reg_32x4_abs_diff_hadd_y, reg_16x8_abs_diff_y); + + pu1_input_buf += (i4_input_stride * 4); + } + + /* Loop for remaining height less than 4 */ + /* 0 <= remaining_height < 4 */ + for(k = i; k < u4_height - 1; k++) + { + for(j = 0; j < u4_width - 8; j += 8) + { + reg_8x8_src_r0 = vld1_u8(pu1_input_buf + j); + reg_8x8_src_r1 = vld1_u8(pu1_input_buf + i4_input_stride + j); + reg_8x8_src_right_r0 = vld1_u8(pu1_input_buf + j + 1); + + reg_16x8_abs_diff_y = vabdl_u8(reg_8x8_src_r0, reg_8x8_src_r1); + reg_16x8_abs_diff_y = + vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r0, reg_8x8_src_right_r0); + + reg_32x4_abs_diff_hadd_y = vpadalq_u16(reg_32x4_abs_diff_hadd_y, reg_16x8_abs_diff_y); + } + + /************************************************************/ + /* Remaining width - */ + /* Since Last pixel is not getting processed, remaining 7 */ + /* pixels are getting processed separately by performing */ + /* and operations with reg_16x8_and_mask_y */ + /************************************************************/ + reg_8x8_src_r0 = vld1_u8(pu1_input_buf + j); + reg_8x8_src_r1 = vld1_u8(pu1_input_buf + i4_input_stride + j); + reg_8x8_src_right_r0 = vext_u8(reg_8x8_src_r0, reg_8x8_src_r0, 1); + + reg_16x8_abs_diff_y = vabdl_u8(reg_8x8_src_r0, reg_8x8_src_r1); + reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r0, reg_8x8_src_right_r0); + + reg_16x8_abs_diff_y = vandq_u16(reg_16x8_abs_diff_y, reg_16x8_and_mask_y); + + reg_32x4_abs_diff_hadd_y = vpadalq_u16(reg_32x4_abs_diff_hadd_y, reg_16x8_abs_diff_y); + + pu1_input_buf += i4_input_stride; + } + + /* Pairwise add reg_32x4_abs_diff_hadd_y to get final gpp value */ + reg_64x2_gpp_y = vpaddlq_u32(reg_32x4_abs_diff_hadd_y); + d_gpp_y = vgetq_lane_u64(reg_64x2_gpp_y, 0); + d_gpp_y += vgetq_lane_u64(reg_64x2_gpp_y, 1); + + pu1_input_buf = (UWORD8 *) ps_input_buf->as_component_bufs[1].pv_data; + i4_input_stride = ps_input_buf->as_component_bufs[1].i4_data_stride; + + /***************************************************************/ + /* For Chroma - */ + /* This code block first deinterleaves the Cb and Cr values, */ + /* calculates gpp value for both Cb and Cr separately by */ + /* adding the absolute difference between the current pixel */ + /* and it's immediate right pixel with the absolute */ + /* difference between the current pixel and it's immediate */ + /* bottom pixel and accumulating for every pixel in the frame. */ + /***************************************************************/ + for(i = 0; i < (u4_height >> 1) - 8; i += 8) + { + for(j = 0; j < u4_width - 8; j += 8) + { + reg_8x8_src_r0 = vld1_u8(pu1_input_buf + j); + reg_8x8_src_r1 = vld1_u8(pu1_input_buf + i4_input_stride + j); + reg_8x8_src_r2 = vld1_u8(pu1_input_buf + (i4_input_stride * 2) + j); + reg_8x8_src_r3 = vld1_u8(pu1_input_buf + (i4_input_stride * 3) + j); + reg_8x8_src_r4 = vld1_u8(pu1_input_buf + (i4_input_stride * 4) + j); + reg_8x8_src_r5 = vld1_u8(pu1_input_buf + (i4_input_stride * 5) + j); + reg_8x8_src_r6 = vld1_u8(pu1_input_buf + (i4_input_stride * 6) + j); + reg_8x8_src_r7 = vld1_u8(pu1_input_buf + (i4_input_stride * 7) + j); + reg_8x8_src_r8 = vld1_u8(pu1_input_buf + (i4_input_stride * 8) + j); + + reg_8x8_src_right_r0 = vld1_u8(pu1_input_buf + j + 2); + reg_8x8_src_right_r1 = vld1_u8(pu1_input_buf + i4_input_stride + j + 2); + reg_8x8_src_right_r2 = vld1_u8(pu1_input_buf + (i4_input_stride * 2) + j + 2); + reg_8x8_src_right_r3 = vld1_u8(pu1_input_buf + (i4_input_stride * 3) + j + 2); + reg_8x8_src_right_r4 = vld1_u8(pu1_input_buf + (i4_input_stride * 4) + j + 2); + reg_8x8_src_right_r5 = vld1_u8(pu1_input_buf + (i4_input_stride * 5) + j + 2); + reg_8x8_src_right_r6 = vld1_u8(pu1_input_buf + (i4_input_stride * 6) + j + 2); + reg_8x8_src_right_r7 = vld1_u8(pu1_input_buf + (i4_input_stride * 7) + j + 2); + + /* separating u and v */ + reg_8x8_src_r0 = vtbl1_u8(reg_8x8_src_r0, reg_8x8_shuffle); + reg_8x8_src_r1 = vtbl1_u8(reg_8x8_src_r1, reg_8x8_shuffle); + reg_8x8_src_r2 = vtbl1_u8(reg_8x8_src_r2, reg_8x8_shuffle); + reg_8x8_src_r3 = vtbl1_u8(reg_8x8_src_r3, reg_8x8_shuffle); + reg_8x8_src_r4 = vtbl1_u8(reg_8x8_src_r4, reg_8x8_shuffle); + reg_8x8_src_r5 = vtbl1_u8(reg_8x8_src_r5, reg_8x8_shuffle); + reg_8x8_src_r6 = vtbl1_u8(reg_8x8_src_r6, reg_8x8_shuffle); + reg_8x8_src_r7 = vtbl1_u8(reg_8x8_src_r7, reg_8x8_shuffle); + reg_8x8_src_r8 = vtbl1_u8(reg_8x8_src_r8, reg_8x8_shuffle); + reg_8x8_src_right_r0 = vtbl1_u8(reg_8x8_src_right_r0, reg_8x8_shuffle); + reg_8x8_src_right_r1 = vtbl1_u8(reg_8x8_src_right_r1, reg_8x8_shuffle); + reg_8x8_src_right_r2 = vtbl1_u8(reg_8x8_src_right_r2, reg_8x8_shuffle); + reg_8x8_src_right_r3 = vtbl1_u8(reg_8x8_src_right_r3, reg_8x8_shuffle); + reg_8x8_src_right_r4 = vtbl1_u8(reg_8x8_src_right_r4, reg_8x8_shuffle); + reg_8x8_src_right_r5 = vtbl1_u8(reg_8x8_src_right_r5, reg_8x8_shuffle); + reg_8x8_src_right_r6 = vtbl1_u8(reg_8x8_src_right_r6, reg_8x8_shuffle); + reg_8x8_src_right_r7 = vtbl1_u8(reg_8x8_src_right_r7, reg_8x8_shuffle); + + reg_16x8_abs_diff_uv = vabdl_u8(reg_8x8_src_r0, reg_8x8_src_r1); + reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r1, reg_8x8_src_r2); + reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r2, reg_8x8_src_r3); + reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r3, reg_8x8_src_r4); + reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r4, reg_8x8_src_r5); + reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r5, reg_8x8_src_r6); + reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r6, reg_8x8_src_r7); + reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r7, reg_8x8_src_r8); + reg_16x8_abs_diff_uv = + vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r0, reg_8x8_src_right_r0); + reg_16x8_abs_diff_uv = + vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r1, reg_8x8_src_right_r1); + reg_16x8_abs_diff_uv = + vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r2, reg_8x8_src_right_r2); + reg_16x8_abs_diff_uv = + vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r3, reg_8x8_src_right_r3); + reg_16x8_abs_diff_uv = + vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r4, reg_8x8_src_right_r4); + reg_16x8_abs_diff_uv = + vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r5, reg_8x8_src_right_r5); + reg_16x8_abs_diff_uv = + vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r6, reg_8x8_src_right_r6); + reg_16x8_abs_diff_uv = + vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r7, reg_8x8_src_right_r7); + + reg_32x4_abs_diff_hadd_uv = + vpadalq_u16(reg_32x4_abs_diff_hadd_uv, reg_16x8_abs_diff_uv); + } + + /************************************************************/ + /* Remaining width - */ + /* Since Last pixel is not getting processed, remaining 6 */ + /* pixels are getting processed separately by performing */ + /* and operations with reg_16x8_and_mask_uv */ + /************************************************************/ + ASSERT((u4_width - j) == 8); + reg_8x8_src_r0 = vld1_u8(pu1_input_buf + j); + reg_8x8_src_r1 = vld1_u8(pu1_input_buf + i4_input_stride + j); + reg_8x8_src_r2 = vld1_u8(pu1_input_buf + (i4_input_stride * 2) + j); + reg_8x8_src_r3 = vld1_u8(pu1_input_buf + (i4_input_stride * 3) + j); + reg_8x8_src_r4 = vld1_u8(pu1_input_buf + (i4_input_stride * 4) + j); + reg_8x8_src_r5 = vld1_u8(pu1_input_buf + (i4_input_stride * 5) + j); + reg_8x8_src_r6 = vld1_u8(pu1_input_buf + (i4_input_stride * 6) + j); + reg_8x8_src_r7 = vld1_u8(pu1_input_buf + (i4_input_stride * 7) + j); + reg_8x8_src_r8 = vld1_u8(pu1_input_buf + (i4_input_stride * 8) + j); + reg_8x8_src_right_r0 = vext_u8(reg_8x8_src_r0, reg_8x8_src_r0, 2); + reg_8x8_src_right_r1 = vext_u8(reg_8x8_src_r1, reg_8x8_src_r1, 2); + reg_8x8_src_right_r2 = vext_u8(reg_8x8_src_r2, reg_8x8_src_r2, 2); + reg_8x8_src_right_r3 = vext_u8(reg_8x8_src_r3, reg_8x8_src_r3, 2); + reg_8x8_src_right_r4 = vext_u8(reg_8x8_src_r4, reg_8x8_src_r4, 2); + reg_8x8_src_right_r5 = vext_u8(reg_8x8_src_r5, reg_8x8_src_r5, 2); + reg_8x8_src_right_r6 = vext_u8(reg_8x8_src_r6, reg_8x8_src_r6, 2); + reg_8x8_src_right_r7 = vext_u8(reg_8x8_src_r7, reg_8x8_src_r7, 2); + + /* separating u and v */ + reg_8x8_src_r0 = vtbl1_u8(reg_8x8_src_r0, reg_8x8_shuffle); + reg_8x8_src_r1 = vtbl1_u8(reg_8x8_src_r1, reg_8x8_shuffle); + reg_8x8_src_r2 = vtbl1_u8(reg_8x8_src_r2, reg_8x8_shuffle); + reg_8x8_src_r3 = vtbl1_u8(reg_8x8_src_r3, reg_8x8_shuffle); + reg_8x8_src_r4 = vtbl1_u8(reg_8x8_src_r4, reg_8x8_shuffle); + reg_8x8_src_r5 = vtbl1_u8(reg_8x8_src_r5, reg_8x8_shuffle); + reg_8x8_src_r6 = vtbl1_u8(reg_8x8_src_r6, reg_8x8_shuffle); + reg_8x8_src_r7 = vtbl1_u8(reg_8x8_src_r7, reg_8x8_shuffle); + reg_8x8_src_r8 = vtbl1_u8(reg_8x8_src_r8, reg_8x8_shuffle); + reg_8x8_src_right_r0 = vtbl1_u8(reg_8x8_src_right_r0, reg_8x8_shuffle); + reg_8x8_src_right_r1 = vtbl1_u8(reg_8x8_src_right_r1, reg_8x8_shuffle); + reg_8x8_src_right_r2 = vtbl1_u8(reg_8x8_src_right_r2, reg_8x8_shuffle); + reg_8x8_src_right_r3 = vtbl1_u8(reg_8x8_src_right_r3, reg_8x8_shuffle); + reg_8x8_src_right_r4 = vtbl1_u8(reg_8x8_src_right_r4, reg_8x8_shuffle); + reg_8x8_src_right_r5 = vtbl1_u8(reg_8x8_src_right_r5, reg_8x8_shuffle); + reg_8x8_src_right_r6 = vtbl1_u8(reg_8x8_src_right_r6, reg_8x8_shuffle); + reg_8x8_src_right_r7 = vtbl1_u8(reg_8x8_src_right_r7, reg_8x8_shuffle); + + reg_16x8_abs_diff_uv = vabdl_u8(reg_8x8_src_r0, reg_8x8_src_r1); + reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r1, reg_8x8_src_r2); + reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r2, reg_8x8_src_r3); + reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r3, reg_8x8_src_r4); + reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r4, reg_8x8_src_r5); + reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r5, reg_8x8_src_r6); + reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r6, reg_8x8_src_r7); + reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r7, reg_8x8_src_r8); + reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r0, reg_8x8_src_right_r0); + reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r1, reg_8x8_src_right_r1); + reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r2, reg_8x8_src_right_r2); + reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r3, reg_8x8_src_right_r3); + reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r4, reg_8x8_src_right_r4); + reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r5, reg_8x8_src_right_r5); + reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r6, reg_8x8_src_right_r6); + reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r7, reg_8x8_src_right_r7); + + reg_16x8_abs_diff_uv = vandq_u16(reg_16x8_abs_diff_uv, reg_16x8_and_mask_uv); + + reg_32x4_abs_diff_hadd_uv = vpadalq_u16(reg_32x4_abs_diff_hadd_uv, reg_16x8_abs_diff_uv); + + pu1_input_buf += (i4_input_stride * 8); + } + + /* Loop for remaining height less than 8 */ + /* 4 <= remaining_height < 8 */ + for(k = i; k < (u4_height >> 1) - 4; k += 4, i += 4) + { + for(j = 0; j < u4_width - 8; j += 8) + { + reg_8x8_src_r0 = vld1_u8(pu1_input_buf + j); + reg_8x8_src_r1 = vld1_u8(pu1_input_buf + i4_input_stride + j); + reg_8x8_src_r2 = vld1_u8(pu1_input_buf + (i4_input_stride * 2) + j); + reg_8x8_src_r3 = vld1_u8(pu1_input_buf + (i4_input_stride * 3) + j); + reg_8x8_src_r4 = vld1_u8(pu1_input_buf + (i4_input_stride * 4) + j); + reg_8x8_src_right_r0 = vld1_u8(pu1_input_buf + j + 2); + reg_8x8_src_right_r1 = vld1_u8(pu1_input_buf + i4_input_stride + j + 2); + reg_8x8_src_right_r2 = vld1_u8(pu1_input_buf + (i4_input_stride * 2) + j + 2); + reg_8x8_src_right_r3 = vld1_u8(pu1_input_buf + (i4_input_stride * 3) + j + 2); + + /* separating u and v */ + reg_8x8_src_r0 = vtbl1_u8(reg_8x8_src_r0, reg_8x8_shuffle); + reg_8x8_src_r1 = vtbl1_u8(reg_8x8_src_r1, reg_8x8_shuffle); + reg_8x8_src_r2 = vtbl1_u8(reg_8x8_src_r2, reg_8x8_shuffle); + reg_8x8_src_r3 = vtbl1_u8(reg_8x8_src_r3, reg_8x8_shuffle); + reg_8x8_src_r4 = vtbl1_u8(reg_8x8_src_r4, reg_8x8_shuffle); + reg_8x8_src_right_r0 = vtbl1_u8(reg_8x8_src_right_r0, reg_8x8_shuffle); + reg_8x8_src_right_r1 = vtbl1_u8(reg_8x8_src_right_r1, reg_8x8_shuffle); + reg_8x8_src_right_r2 = vtbl1_u8(reg_8x8_src_right_r2, reg_8x8_shuffle); + reg_8x8_src_right_r3 = vtbl1_u8(reg_8x8_src_right_r3, reg_8x8_shuffle); + + reg_16x8_abs_diff_uv = vabdl_u8(reg_8x8_src_r0, reg_8x8_src_r1); + reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r1, reg_8x8_src_r2); + reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r2, reg_8x8_src_r3); + reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r3, reg_8x8_src_r4); + reg_16x8_abs_diff_uv = + vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r0, reg_8x8_src_right_r0); + reg_16x8_abs_diff_uv = + vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r1, reg_8x8_src_right_r1); + reg_16x8_abs_diff_uv = + vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r2, reg_8x8_src_right_r2); + reg_16x8_abs_diff_uv = + vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r3, reg_8x8_src_right_r3); + + reg_32x4_abs_diff_hadd_uv = + vpadalq_u16(reg_32x4_abs_diff_hadd_uv, reg_16x8_abs_diff_uv); + } + + /************************************************************/ + /* Remaining width - */ + /* Since Last pixel is not getting processed, remaining 6 */ + /* pixels are getting processed separately by performing */ + /* and operations with reg_16x8_and_mask_uv */ + /************************************************************/ + reg_8x8_src_r0 = vld1_u8(pu1_input_buf + j); + reg_8x8_src_r1 = vld1_u8(pu1_input_buf + i4_input_stride + j); + reg_8x8_src_r2 = vld1_u8(pu1_input_buf + (i4_input_stride * 2) + j); + reg_8x8_src_r3 = vld1_u8(pu1_input_buf + (i4_input_stride * 3) + j); + reg_8x8_src_r4 = vld1_u8(pu1_input_buf + (i4_input_stride * 4) + j); + reg_8x8_src_right_r0 = vext_u8(reg_8x8_src_r0, reg_8x8_src_r0, 2); + reg_8x8_src_right_r1 = vext_u8(reg_8x8_src_r1, reg_8x8_src_r1, 2); + reg_8x8_src_right_r2 = vext_u8(reg_8x8_src_r2, reg_8x8_src_r2, 2); + reg_8x8_src_right_r3 = vext_u8(reg_8x8_src_r3, reg_8x8_src_r3, 2); + + /* separating u and v */ + reg_8x8_src_r0 = vtbl1_u8(reg_8x8_src_r0, reg_8x8_shuffle); + reg_8x8_src_r1 = vtbl1_u8(reg_8x8_src_r1, reg_8x8_shuffle); + reg_8x8_src_r2 = vtbl1_u8(reg_8x8_src_r2, reg_8x8_shuffle); + reg_8x8_src_r3 = vtbl1_u8(reg_8x8_src_r3, reg_8x8_shuffle); + reg_8x8_src_r4 = vtbl1_u8(reg_8x8_src_r4, reg_8x8_shuffle); + reg_8x8_src_right_r0 = vtbl1_u8(reg_8x8_src_right_r0, reg_8x8_shuffle); + reg_8x8_src_right_r1 = vtbl1_u8(reg_8x8_src_right_r1, reg_8x8_shuffle); + reg_8x8_src_right_r2 = vtbl1_u8(reg_8x8_src_right_r2, reg_8x8_shuffle); + reg_8x8_src_right_r3 = vtbl1_u8(reg_8x8_src_right_r3, reg_8x8_shuffle); + + reg_16x8_abs_diff_uv = vabdl_u8(reg_8x8_src_r0, reg_8x8_src_r1); + reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r1, reg_8x8_src_r2); + reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r2, reg_8x8_src_r3); + reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r3, reg_8x8_src_r4); + reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r0, reg_8x8_src_right_r0); + reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r1, reg_8x8_src_right_r1); + reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r2, reg_8x8_src_right_r2); + reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r3, reg_8x8_src_right_r3); + + reg_16x8_abs_diff_uv = vandq_u16(reg_16x8_abs_diff_uv, reg_16x8_and_mask_uv); + + reg_32x4_abs_diff_hadd_uv = vpadalq_u16(reg_32x4_abs_diff_hadd_uv, reg_16x8_abs_diff_uv); + + pu1_input_buf += (i4_input_stride * 4); + } + + /* Loop for remaining height less than 4 */ + /* 0 <= remaining_height < 4 */ + for(k = i; k < (u4_height >> 1) - 1; k++) + { + for(j = 0; j < u4_width - 8; j += 8) + { + reg_8x8_src_r0 = vld1_u8(pu1_input_buf + j); + reg_8x8_src_r1 = vld1_u8(pu1_input_buf + i4_input_stride + j); + reg_8x8_src_right_r0 = vld1_u8(pu1_input_buf + j + 2); + + /* separating u and v */ + reg_8x8_src_r0 = vtbl1_u8(reg_8x8_src_r0, reg_8x8_shuffle); + reg_8x8_src_r1 = vtbl1_u8(reg_8x8_src_r1, reg_8x8_shuffle); + reg_8x8_src_right_r0 = vtbl1_u8(reg_8x8_src_right_r0, reg_8x8_shuffle); + + reg_16x8_abs_diff_uv = vabdl_u8(reg_8x8_src_r0, reg_8x8_src_r1); + reg_16x8_abs_diff_uv = + vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r0, reg_8x8_src_right_r0); + + reg_32x4_abs_diff_hadd_uv = + vpadalq_u16(reg_32x4_abs_diff_hadd_uv, reg_16x8_abs_diff_uv); + } + + /************************************************************/ + /* Remaining width - */ + /* Since Last pixel is not getting processed, remaining 6 */ + /* pixels are getting processed separately by performing */ + /* and operations with reg_16x8_and_mask_uv */ + /************************************************************/ + reg_8x8_src_r0 = vld1_u8(pu1_input_buf + j); + reg_8x8_src_r1 = vld1_u8(pu1_input_buf + i4_input_stride + j); + reg_8x8_src_right_r0 = vext_u8(reg_8x8_src_r0, reg_8x8_src_r0, 2); + + /* separating u and v */ + reg_8x8_src_r0 = vtbl1_u8(reg_8x8_src_r0, reg_8x8_shuffle); + reg_8x8_src_r1 = vtbl1_u8(reg_8x8_src_r1, reg_8x8_shuffle); + reg_8x8_src_right_r0 = vtbl1_u8(reg_8x8_src_right_r0, reg_8x8_shuffle); + + reg_16x8_abs_diff_uv = vabdl_u8(reg_8x8_src_r0, reg_8x8_src_r1); + reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r0, reg_8x8_src_right_r0); + + reg_16x8_abs_diff_uv = vandq_u16(reg_16x8_abs_diff_uv, reg_16x8_and_mask_uv); + + reg_32x4_abs_diff_hadd_uv = vpadalq_u16(reg_32x4_abs_diff_hadd_uv, reg_16x8_abs_diff_uv); + + pu1_input_buf += i4_input_stride; + } + + /* Pairwise add u4_abd_hadd_uv to get final gpp_u and gpp_v value */ + reg_64x2_gpp_uv = vpaddlq_u32(reg_32x4_abs_diff_hadd_uv); + d_gpp_u = vgetq_lane_u64(reg_64x2_gpp_uv, 0); + d_gpp_v = vgetq_lane_u64(reg_64x2_gpp_uv, 1); + + d_gpp_y /= (u4_width * u4_height); + d_gpp_u /= ((u4_width / 2) * (u4_height / 2)); + d_gpp_v /= ((u4_width / 2) * (u4_height / 2)); + + d_gpp = (DOUBLE) ((WT_LUMA_GPP * d_gpp_y) + d_gpp_u + d_gpp_v) / WT_TOTAL_GPP; + + return d_gpp; +} diff --git a/encoder/arm/svc/isvce_residual_pred_neon.c b/encoder/arm/svc/isvce_residual_pred_neon.c new file mode 100644 index 0000000..37065f5 --- /dev/null +++ b/encoder/arm/svc/isvce_residual_pred_neon.c @@ -0,0 +1,666 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** +******************************************************************************* +* +* @file +* isvce_svc_residual_pred_neon.c +* +* @brief +* Contains functions +* used for SVC residual +* prediction +* +******************************************************************************* +*/ +#include + +#include "ih264_typedefs.h" +#include "ih264_macros.h" +#include "ih264_size_defs.h" +#include "isvc_macros.h" +#include "isvc_structs.h" + +void isvce_luma_residual_sampler_2x_neon(coordinates_t *ps_ref_array_positions, + coordinates_t *ps_ref_array_phases, + buffer_container_t *ps_inp, buffer_container_t *ps_out, + buffer_container_t *ps_scratch, UWORD32 u4_ref_nnz, + UWORD8 u1_ref_tx_size) +{ + WORD16 *pi2_inp_data = (WORD16 *) ps_inp->pv_data; + WORD16 *pi2_out_res = (WORD16 *) ps_out->pv_data; + WORD32 i4_inp_data_stride = ps_inp->i4_data_stride; + WORD32 i4_out_res_stride = ps_out->i4_data_stride; + WORD16 *pi2_refarray_buffer = (WORD16 *) ps_scratch->pv_data; + WORD32 i4_blk_ctr; + + UNUSED(ps_ref_array_positions); + UNUSED(ps_ref_array_phases); + + /* For 2x scaling, offsets always point to TL pixel outside MB */ + /* Hence, refTransBlkIdc will be different and since phase */ + /* for first refArray pos for horiz filtering samples > 8, */ + /* first row and first column from the refArray is never used */ + pi2_inp_data += 1 + i4_inp_data_stride; + + if((u1_ref_tx_size) && (0 != u4_ref_nnz)) + { + WORD16 *pi2_ref_data_byte; + WORD32 *pi4_ref_array; + WORD32 i4_i, i4_j; + + /* ----------- Horizontal Interpolation ---------------- */ + int16x8_t i2_coeff_add_16x8_r0; + int16x8_t i2_coeff_16x8_r0_0, i2_coeff_16x8_r0_1; + int16x8_t i2_coeff_16x8_sl_r0_0, i2_coeff_16x8_sl_r0_1; + int16x8_t result_16x8_r0_0, result_16x8_r0_1; + + int16x8_t i2_coeff_add_16x8_r1; + int16x8_t i2_coeff_16x8_r1_0, i2_coeff_16x8_r1_1; + int16x8_t i2_coeff_16x8_sl_r1_0, i2_coeff_16x8_sl_r1_1; + int16x8_t result_16x8_r1_0, result_16x8_r1_1; + int16x8x2_t final_result_16x8x2_r0, final_result_16x8x2_r1; + + pi2_ref_data_byte = pi2_inp_data; + + /* ----------- Horizontal Interpolation ---------------- */ + pi4_ref_array = (WORD32 *) pi2_refarray_buffer; + + for(i4_i = 0; i4_i < BLK8x8SIZE; i4_i += 2) + { + i2_coeff_16x8_r0_0 = vld1q_s16(pi2_ref_data_byte); + i2_coeff_16x8_r0_1 = vld1q_s16((pi2_ref_data_byte + 1)); + + i2_coeff_16x8_r1_0 = vld1q_s16(pi2_ref_data_byte + i4_inp_data_stride); + i2_coeff_16x8_r1_1 = vld1q_s16((pi2_ref_data_byte + i4_inp_data_stride + 1)); + + i2_coeff_add_16x8_r0 = vaddq_s16(i2_coeff_16x8_r0_0, i2_coeff_16x8_r0_1); + i2_coeff_16x8_sl_r0_0 = vshlq_n_s16(i2_coeff_16x8_r0_0, 1); + i2_coeff_16x8_sl_r0_1 = vshlq_n_s16(i2_coeff_16x8_r0_1, 1); + + i2_coeff_add_16x8_r1 = vaddq_s16(i2_coeff_16x8_r1_0, i2_coeff_16x8_r1_1); + i2_coeff_16x8_sl_r1_0 = vshlq_n_s16(i2_coeff_16x8_r1_0, 1); + i2_coeff_16x8_sl_r1_1 = vshlq_n_s16(i2_coeff_16x8_r1_1, 1); + + result_16x8_r0_0 = vaddq_s16(i2_coeff_16x8_sl_r0_0, i2_coeff_add_16x8_r0); + result_16x8_r0_1 = vaddq_s16(i2_coeff_16x8_sl_r0_1, i2_coeff_add_16x8_r0); + + result_16x8_r1_0 = vaddq_s16(i2_coeff_16x8_sl_r1_0, i2_coeff_add_16x8_r1); + result_16x8_r1_1 = vaddq_s16(i2_coeff_16x8_sl_r1_1, i2_coeff_add_16x8_r1); + + final_result_16x8x2_r0 = vzipq_s16(result_16x8_r0_0, result_16x8_r0_1); + final_result_16x8x2_r1 = vzipq_s16(result_16x8_r1_0, result_16x8_r1_1); + + vst1q_s32(pi4_ref_array + 1, vmovl_s16(vget_low_s16(final_result_16x8x2_r0.val[0]))); + vst1q_s32(pi4_ref_array + 5, vmovl_s16(vget_high_s16(final_result_16x8x2_r0.val[0]))); + vst1q_s32(pi4_ref_array + 9, vmovl_s16(vget_low_s16(final_result_16x8x2_r0.val[1]))); + vst1q_s32(pi4_ref_array + 13, vmovl_s16(vget_high_s16(final_result_16x8x2_r0.val[1]))); + + pi4_ref_array[0] = pi2_ref_data_byte[0] << 2; + pi4_ref_array[15] = pi2_ref_data_byte[7] << 2; + pi4_ref_array += 16; + pi2_ref_data_byte += i4_inp_data_stride; + + vst1q_s32(pi4_ref_array + 1, vmovl_s16(vget_low_s16(final_result_16x8x2_r1.val[0]))); + vst1q_s32(pi4_ref_array + 5, vmovl_s16(vget_high_s16(final_result_16x8x2_r1.val[0]))); + vst1q_s32(pi4_ref_array + 9, vmovl_s16(vget_low_s16(final_result_16x8x2_r1.val[1]))); + vst1q_s32(pi4_ref_array + 13, vmovl_s16(vget_high_s16(final_result_16x8x2_r1.val[1]))); + + pi4_ref_array[0] = pi2_ref_data_byte[0] << 2; + pi4_ref_array[15] = pi2_ref_data_byte[7] << 2; + pi4_ref_array += 16; + /* vertical loop updates */ + pi2_ref_data_byte = pi2_inp_data + ((i4_i + 2) * i4_inp_data_stride); + } + + /* ----------- Vertical Interpolation ---------------- */ + pi4_ref_array = (WORD32 *) pi2_refarray_buffer; + { + WORD32 *pi4_ref_array_temp; + WORD16 *pi2_out; + int32x4_t i4_horz_samp_32x4_r1_1, i4_horz_samp_32x4_r1_2, i4_horz_samp_32x4_r1_3, + i4_horz_samp_32x4_r1_4; + int32x4_t i4_horz_samp_32x4_r2_1, i4_horz_samp_32x4_r2_2, i4_horz_samp_32x4_r2_3, + i4_horz_samp_32x4_r2_4; + + int32x4_t i4_horz_res_32x4_r1_1, i4_horz_res_32x4_r1_2, i4_horz_res_32x4_r1_3, + i4_horz_res_32x4_r1_4; + int32x4_t i4_horz_res_32x4_r2_1, i4_horz_res_32x4_r2_2, i4_horz_res_32x4_r2_3, + i4_horz_res_32x4_r2_4; + int32x4_t i4_horz_res_32x4_r3_1, i4_horz_res_32x4_r3_2, i4_horz_res_32x4_r3_3, + i4_horz_res_32x4_r3_4; + int32x4_t horz_add_32x4_r2_1, horz_add_32x4_r2_2, horz_add_32x4_r2_3, + horz_add_32x4_r2_4; + + int16x8_t comb_horz_16x8_1, comb_horz_16x8_2, comb_horz_16x8_3, comb_horz_16x8_4; + pi4_ref_array_temp = pi4_ref_array; + pi2_out = pi2_out_res; + + i4_horz_samp_32x4_r1_1 = vld1q_s32(pi4_ref_array_temp); + i4_horz_samp_32x4_r1_2 = vld1q_s32(pi4_ref_array_temp + 4); + i4_horz_samp_32x4_r1_3 = vld1q_s32(pi4_ref_array_temp + 8); + i4_horz_samp_32x4_r1_4 = vld1q_s32(pi4_ref_array_temp + 12); + + /* populate the first inter sample */ + i4_horz_res_32x4_r1_1 = vrshrq_n_s32(i4_horz_samp_32x4_r1_1, 2); + i4_horz_res_32x4_r1_2 = vrshrq_n_s32(i4_horz_samp_32x4_r1_2, 2); + i4_horz_res_32x4_r1_3 = vrshrq_n_s32(i4_horz_samp_32x4_r1_3, 2); + i4_horz_res_32x4_r1_4 = vrshrq_n_s32(i4_horz_samp_32x4_r1_4, 2); + + comb_horz_16x8_1 = + vcombine_s16(vmovn_s32(i4_horz_res_32x4_r1_1), vmovn_s32(i4_horz_res_32x4_r1_2)); + comb_horz_16x8_2 = + vcombine_s16(vmovn_s32(i4_horz_res_32x4_r1_3), vmovn_s32(i4_horz_res_32x4_r1_4)); + vst1q_s16(pi2_out, comb_horz_16x8_1); + vst1q_s16(pi2_out + 8, comb_horz_16x8_2); + + pi2_out += i4_out_res_stride; + + for(i4_j = 0; i4_j < 14; i4_j += 2) + { + pi4_ref_array_temp += MB_SIZE; + i4_horz_samp_32x4_r2_1 = vld1q_s32(pi4_ref_array_temp); + i4_horz_samp_32x4_r2_2 = vld1q_s32(pi4_ref_array_temp + 4); + i4_horz_samp_32x4_r2_3 = vld1q_s32(pi4_ref_array_temp + 8); + i4_horz_samp_32x4_r2_4 = vld1q_s32(pi4_ref_array_temp + 12); + + horz_add_32x4_r2_1 = vaddq_s32(i4_horz_samp_32x4_r1_1, i4_horz_samp_32x4_r2_1); + horz_add_32x4_r2_2 = vaddq_s32(i4_horz_samp_32x4_r1_2, i4_horz_samp_32x4_r2_2); + horz_add_32x4_r2_3 = vaddq_s32(i4_horz_samp_32x4_r1_3, i4_horz_samp_32x4_r2_3); + horz_add_32x4_r2_4 = vaddq_s32(i4_horz_samp_32x4_r1_4, i4_horz_samp_32x4_r2_4); + + i4_horz_res_32x4_r2_1 = + vaddq_s32(vshlq_n_s32(i4_horz_samp_32x4_r1_1, 1), horz_add_32x4_r2_1); + i4_horz_res_32x4_r2_2 = + vaddq_s32(vshlq_n_s32(i4_horz_samp_32x4_r1_2, 1), horz_add_32x4_r2_2); + i4_horz_res_32x4_r2_3 = + vaddq_s32(vshlq_n_s32(i4_horz_samp_32x4_r1_3, 1), horz_add_32x4_r2_3); + i4_horz_res_32x4_r2_4 = + vaddq_s32(vshlq_n_s32(i4_horz_samp_32x4_r1_4, 1), horz_add_32x4_r2_4); + + i4_horz_res_32x4_r3_1 = + vaddq_s32(vshlq_n_s32(i4_horz_samp_32x4_r2_1, 1), horz_add_32x4_r2_1); + i4_horz_res_32x4_r3_2 = + vaddq_s32(vshlq_n_s32(i4_horz_samp_32x4_r2_2, 1), horz_add_32x4_r2_2); + i4_horz_res_32x4_r3_3 = + vaddq_s32(vshlq_n_s32(i4_horz_samp_32x4_r2_3, 1), horz_add_32x4_r2_3); + i4_horz_res_32x4_r3_4 = + vaddq_s32(vshlq_n_s32(i4_horz_samp_32x4_r2_4, 1), horz_add_32x4_r2_4); + + i4_horz_res_32x4_r2_1 = vrshrq_n_s32(i4_horz_res_32x4_r2_1, 4); + i4_horz_res_32x4_r2_2 = vrshrq_n_s32(i4_horz_res_32x4_r2_2, 4); + i4_horz_res_32x4_r2_3 = vrshrq_n_s32(i4_horz_res_32x4_r2_3, 4); + i4_horz_res_32x4_r2_4 = vrshrq_n_s32(i4_horz_res_32x4_r2_4, 4); + + i4_horz_res_32x4_r3_1 = vrshrq_n_s32(i4_horz_res_32x4_r3_1, 4); + i4_horz_res_32x4_r3_2 = vrshrq_n_s32(i4_horz_res_32x4_r3_2, 4); + i4_horz_res_32x4_r3_3 = vrshrq_n_s32(i4_horz_res_32x4_r3_3, 4); + i4_horz_res_32x4_r3_4 = vrshrq_n_s32(i4_horz_res_32x4_r3_4, 4); + + comb_horz_16x8_1 = vcombine_s16(vmovn_s32(i4_horz_res_32x4_r2_1), + vmovn_s32(i4_horz_res_32x4_r2_2)); + comb_horz_16x8_2 = vcombine_s16(vmovn_s32(i4_horz_res_32x4_r2_3), + vmovn_s32(i4_horz_res_32x4_r2_4)); + + comb_horz_16x8_3 = vcombine_s16(vmovn_s32(i4_horz_res_32x4_r3_1), + vmovn_s32(i4_horz_res_32x4_r3_2)); + comb_horz_16x8_4 = vcombine_s16(vmovn_s32(i4_horz_res_32x4_r3_3), + vmovn_s32(i4_horz_res_32x4_r3_4)); + + /* populate 2 samples based on current coeffs */ + vst1q_s16(pi2_out, comb_horz_16x8_1); + vst1q_s16(pi2_out + 8, comb_horz_16x8_2); + pi2_out += i4_out_res_stride; + + vst1q_s16(pi2_out, comb_horz_16x8_3); + vst1q_s16(pi2_out + 8, comb_horz_16x8_4); + pi2_out += i4_out_res_stride; + + /* store the coeff 2 to coeff 1 */ + /* (used in next iteration) */ + i4_horz_samp_32x4_r1_1 = i4_horz_samp_32x4_r2_1; + i4_horz_samp_32x4_r1_2 = i4_horz_samp_32x4_r2_2; + i4_horz_samp_32x4_r1_3 = i4_horz_samp_32x4_r2_3; + i4_horz_samp_32x4_r1_4 = i4_horz_samp_32x4_r2_4; + } + + /* populate the first inter sample */ + i4_horz_res_32x4_r1_1 = vrshrq_n_s32(i4_horz_samp_32x4_r1_1, 2); + i4_horz_res_32x4_r1_2 = vrshrq_n_s32(i4_horz_samp_32x4_r1_2, 2); + i4_horz_res_32x4_r1_3 = vrshrq_n_s32(i4_horz_samp_32x4_r1_3, 2); + i4_horz_res_32x4_r1_4 = vrshrq_n_s32(i4_horz_samp_32x4_r1_4, 2); + + comb_horz_16x8_1 = + vcombine_s16(vmovn_s32(i4_horz_res_32x4_r1_1), vmovn_s32(i4_horz_res_32x4_r1_2)); + comb_horz_16x8_2 = + vcombine_s16(vmovn_s32(i4_horz_res_32x4_r1_3), vmovn_s32(i4_horz_res_32x4_r1_4)); + vst1q_s16(pi2_out, comb_horz_16x8_1); + vst1q_s16(pi2_out + 8, comb_horz_16x8_2); + + /* horizontal loop updates */ + pi4_ref_array++; + pi2_out_res++; + } + } + else + { + /* ----------------------------------------------------------------- */ + /* LOOP over number of blocks */ + /* ----------------------------------------------------------------- */ + for(i4_blk_ctr = 0; i4_blk_ctr < 4; i4_blk_ctr++) + { + /* if reference layer is not coded then no processing */ + if(0 != (u4_ref_nnz & 0x1)) + { + int16x8_t i2_coeff1_16x8_r0_0, i2_coeff1_16x8_r0_1; + int16x8_t i2_coeff1_16x8_r1_0, i2_coeff1_16x8_r1_1; + int16x8_t i2_coeff1_16x8_r2_0, i2_coeff1_16x8_r2_1; + int16x8_t i2_coeff1_16x8_r3_0, i2_coeff1_16x8_r3_1; + int16x8_t i2_add_16x8_r0_0; + int16x8_t i2_add_16x8_r1_0; + int16x8_t i2_add_16x8_r2_0; + int16x8_t i2_add_16x8_r3_0; + int16x8_t i2_res_16x8_r0_0, i2_res_16x8_r0_1; + int16x8_t i2_res_16x8_r1_0, i2_res_16x8_r1_1; + int16x8_t i2_res_16x8_r2_0, i2_res_16x8_r2_1; + int16x8_t i2_res_16x8_r3_0, i2_res_16x8_r3_1; + int16x4_t i4_horz_samp_16x4_r0_1, i4_horz_samp_16x4_r0_2; + int16x4_t i4_horz_samp_16x4_r1_1, i4_horz_samp_16x4_r1_2; + int16x4_t i4_horz_samp_16x4_r2_1, i4_horz_samp_16x4_r2_2; + int16x4_t i4_horz_samp_16x4_r3_1, i4_horz_samp_16x4_r3_2; + int32x4_t i4_horz_samp_32x4_r0_1, i4_horz_samp_32x4_r0_2; + int32x4_t i4_horz_samp_32x4_r1_1, i4_horz_samp_32x4_r1_2; + int32x4_t i4_horz_samp_32x4_r2_1, i4_horz_samp_32x4_r2_2; + int32x4_t i4_horz_samp_32x4_r3_1, i4_horz_samp_32x4_r3_2; + int32x4_t i4_horz_add_32x4_r1_1, i4_horz_add_32x4_r1_2; + int32x4_t i4_horz_add_32x4_r2_1, i4_horz_add_32x4_r2_2; + int32x4_t i4_horz_add_32x4_r3_1, i4_horz_add_32x4_r3_2; + int16x4_t i4_horz_res_16x4_r0_1, i4_horz_res_16x4_r0_2; + int16x4_t i4_horz_res_16x4_r1_1, i4_horz_res_16x4_r1_2; + int16x4_t i4_horz_res_16x4_r2_1, i4_horz_res_16x4_r2_2; + int16x4_t i4_horz_res_16x4_r3_1, i4_horz_res_16x4_r3_2; + int16x4_t i4_horz_res_16x4_r4_1, i4_horz_res_16x4_r4_2; + int16x4_t i4_horz_res_16x4_r5_1, i4_horz_res_16x4_r5_2; + int16x4_t i4_horz_res_16x4_r6_1, i4_horz_res_16x4_r6_2; + int16x4_t i4_horz_res_16x4_r7_1, i4_horz_res_16x4_r7_2; + int32x4_t i4_horz_res_32x4_r1_1, i4_horz_res_32x4_r1_2; + int32x4_t i4_horz_res_32x4_r2_1, i4_horz_res_32x4_r2_2; + int32x4_t i4_horz_res_32x4_r3_1, i4_horz_res_32x4_r3_2; + int32x4_t i4_horz_res_32x4_r4_1, i4_horz_res_32x4_r4_2; + int32x4_t i4_horz_res_32x4_r5_1, i4_horz_res_32x4_r5_2; + int32x4_t i4_horz_res_32x4_r6_1, i4_horz_res_32x4_r6_2; + int16x8x2_t ti2_res_16x8x2_r0, ti2_res_16x8x2_r1; + int16x8x2_t ti2_res_16x8x2_r2, ti2_res_16x8x2_r3; + + i2_coeff1_16x8_r0_0 = vld1q_s16(pi2_inp_data); + i2_coeff1_16x8_r1_0 = vld1q_s16(pi2_inp_data + i4_inp_data_stride); + i2_coeff1_16x8_r2_0 = vld1q_s16(pi2_inp_data + (i4_inp_data_stride << 1)); + i2_coeff1_16x8_r3_0 = + vld1q_s16(pi2_inp_data + (i4_inp_data_stride << 1) + i4_inp_data_stride); + + i2_coeff1_16x8_r0_1 = vextq_s16(i2_coeff1_16x8_r0_0, i2_coeff1_16x8_r0_0, 1); + i2_coeff1_16x8_r1_1 = vextq_s16(i2_coeff1_16x8_r1_0, i2_coeff1_16x8_r1_0, 1); + i2_coeff1_16x8_r2_1 = vextq_s16(i2_coeff1_16x8_r2_0, i2_coeff1_16x8_r2_0, 1); + i2_coeff1_16x8_r3_1 = vextq_s16(i2_coeff1_16x8_r3_0, i2_coeff1_16x8_r3_0, 1); + + i2_add_16x8_r0_0 = vaddq_s16(i2_coeff1_16x8_r0_1, i2_coeff1_16x8_r0_0); + i2_add_16x8_r1_0 = vaddq_s16(i2_coeff1_16x8_r1_1, i2_coeff1_16x8_r1_0); + i2_add_16x8_r2_0 = vaddq_s16(i2_coeff1_16x8_r2_1, i2_coeff1_16x8_r2_0); + i2_add_16x8_r3_0 = vaddq_s16(i2_coeff1_16x8_r3_1, i2_coeff1_16x8_r3_0); + + i2_coeff1_16x8_r0_0 = vshlq_n_s16(i2_coeff1_16x8_r0_0, 1); + i2_coeff1_16x8_r1_0 = vshlq_n_s16(i2_coeff1_16x8_r1_0, 1); + i2_coeff1_16x8_r2_0 = vshlq_n_s16(i2_coeff1_16x8_r2_0, 1); + i2_coeff1_16x8_r3_0 = vshlq_n_s16(i2_coeff1_16x8_r3_0, 1); + + i2_coeff1_16x8_r0_1 = vshlq_n_s16(i2_coeff1_16x8_r0_1, 1); + i2_coeff1_16x8_r1_1 = vshlq_n_s16(i2_coeff1_16x8_r1_1, 1); + i2_coeff1_16x8_r2_1 = vshlq_n_s16(i2_coeff1_16x8_r2_1, 1); + i2_coeff1_16x8_r3_1 = vshlq_n_s16(i2_coeff1_16x8_r3_1, 1); + + i2_res_16x8_r0_0 = vaddq_s16(i2_coeff1_16x8_r0_0, i2_add_16x8_r0_0); + i2_res_16x8_r1_0 = vaddq_s16(i2_coeff1_16x8_r1_0, i2_add_16x8_r1_0); + i2_res_16x8_r2_0 = vaddq_s16(i2_coeff1_16x8_r2_0, i2_add_16x8_r2_0); + i2_res_16x8_r3_0 = vaddq_s16(i2_coeff1_16x8_r3_0, i2_add_16x8_r3_0); + + i2_res_16x8_r0_1 = vaddq_s16(i2_coeff1_16x8_r0_1, i2_add_16x8_r0_0); + i2_res_16x8_r1_1 = vaddq_s16(i2_coeff1_16x8_r1_1, i2_add_16x8_r1_0); + i2_res_16x8_r2_1 = vaddq_s16(i2_coeff1_16x8_r2_1, i2_add_16x8_r2_0); + i2_res_16x8_r3_1 = vaddq_s16(i2_coeff1_16x8_r3_1, i2_add_16x8_r3_0); + + ti2_res_16x8x2_r0 = vzipq_s16(i2_res_16x8_r0_0, i2_res_16x8_r0_1); + ti2_res_16x8x2_r1 = vzipq_s16(i2_res_16x8_r1_0, i2_res_16x8_r1_1); + ti2_res_16x8x2_r2 = vzipq_s16(i2_res_16x8_r2_0, i2_res_16x8_r2_1); + ti2_res_16x8x2_r3 = vzipq_s16(i2_res_16x8_r3_0, i2_res_16x8_r3_1); + + i2_coeff1_16x8_r0_0 = vshlq_n_s16(i2_coeff1_16x8_r0_0, 1); + i2_coeff1_16x8_r1_0 = vshlq_n_s16(i2_coeff1_16x8_r1_0, 1); + i2_coeff1_16x8_r2_0 = vshlq_n_s16(i2_coeff1_16x8_r2_0, 1); + i2_coeff1_16x8_r3_0 = vshlq_n_s16(i2_coeff1_16x8_r3_0, 1); + + vst1q_s16(pi2_refarray_buffer + 1, ti2_res_16x8x2_r0.val[0]); + vst1q_lane_s16(pi2_refarray_buffer, i2_coeff1_16x8_r0_0, 0); + vst1q_lane_s16(pi2_refarray_buffer + 7, i2_coeff1_16x8_r0_0, 3); + + vst1q_s16(pi2_refarray_buffer + 9, ti2_res_16x8x2_r1.val[0]); + vst1q_lane_s16(pi2_refarray_buffer + 8, i2_coeff1_16x8_r1_0, 0); + vst1q_lane_s16(pi2_refarray_buffer + 15, i2_coeff1_16x8_r1_0, 3); + + vst1q_s16(pi2_refarray_buffer + 17, ti2_res_16x8x2_r2.val[0]); + vst1q_lane_s16(pi2_refarray_buffer + 16, i2_coeff1_16x8_r2_0, 0); + vst1q_lane_s16(pi2_refarray_buffer + 23, i2_coeff1_16x8_r2_0, 3); + + vst1q_s16(pi2_refarray_buffer + 25, ti2_res_16x8x2_r3.val[0]); + vst1q_lane_s16(pi2_refarray_buffer + 24, i2_coeff1_16x8_r3_0, 0); + vst1q_lane_s16(pi2_refarray_buffer + 31, i2_coeff1_16x8_r3_0, 3); + + i4_horz_samp_16x4_r0_1 = vld1_s16(pi2_refarray_buffer); + i4_horz_samp_16x4_r0_2 = vld1_s16(pi2_refarray_buffer + 4); + + i4_horz_samp_16x4_r1_1 = vld1_s16(pi2_refarray_buffer + 8); + i4_horz_samp_16x4_r1_2 = vld1_s16(pi2_refarray_buffer + 12); + + i4_horz_samp_16x4_r2_1 = vld1_s16(pi2_refarray_buffer + 16); + i4_horz_samp_16x4_r2_2 = vld1_s16(pi2_refarray_buffer + 20); + + i4_horz_samp_16x4_r3_1 = vld1_s16(pi2_refarray_buffer + 24); + i4_horz_samp_16x4_r3_2 = vld1_s16(pi2_refarray_buffer + 28); + + i4_horz_res_16x4_r0_1 = vrshr_n_s16(i4_horz_samp_16x4_r0_1, 2); + i4_horz_res_16x4_r0_2 = vrshr_n_s16(i4_horz_samp_16x4_r0_2, 2); + + i4_horz_add_32x4_r1_1 = vaddl_s16(i4_horz_samp_16x4_r0_1, i4_horz_samp_16x4_r1_1); + i4_horz_add_32x4_r1_2 = vaddl_s16(i4_horz_samp_16x4_r0_2, i4_horz_samp_16x4_r1_2); + + i4_horz_add_32x4_r2_1 = vaddl_s16(i4_horz_samp_16x4_r1_1, i4_horz_samp_16x4_r2_1); + i4_horz_add_32x4_r2_2 = vaddl_s16(i4_horz_samp_16x4_r1_2, i4_horz_samp_16x4_r2_2); + + i4_horz_add_32x4_r3_1 = vaddl_s16(i4_horz_samp_16x4_r2_1, i4_horz_samp_16x4_r3_1); + i4_horz_add_32x4_r3_2 = vaddl_s16(i4_horz_samp_16x4_r2_2, i4_horz_samp_16x4_r3_2); + + i4_horz_samp_32x4_r0_1 = vshll_n_s16(i4_horz_samp_16x4_r0_1, 1); + i4_horz_samp_32x4_r0_2 = vshll_n_s16(i4_horz_samp_16x4_r0_2, 1); + + i4_horz_samp_32x4_r1_1 = vshll_n_s16(i4_horz_samp_16x4_r1_1, 1); + i4_horz_samp_32x4_r1_2 = vshll_n_s16(i4_horz_samp_16x4_r1_2, 1); + + i4_horz_samp_32x4_r2_1 = vshll_n_s16(i4_horz_samp_16x4_r2_1, 1); + i4_horz_samp_32x4_r2_2 = vshll_n_s16(i4_horz_samp_16x4_r2_2, 1); + + i4_horz_samp_32x4_r3_1 = vshll_n_s16(i4_horz_samp_16x4_r3_1, 1); + i4_horz_samp_32x4_r3_2 = vshll_n_s16(i4_horz_samp_16x4_r3_2, 1); + + i4_horz_res_32x4_r1_1 = vaddq_s32(i4_horz_samp_32x4_r0_1, i4_horz_add_32x4_r1_1); + i4_horz_res_32x4_r1_2 = vaddq_s32(i4_horz_samp_32x4_r0_2, i4_horz_add_32x4_r1_2); + + i4_horz_res_32x4_r2_1 = vaddq_s32(i4_horz_samp_32x4_r1_1, i4_horz_add_32x4_r1_1); + i4_horz_res_32x4_r2_2 = vaddq_s32(i4_horz_samp_32x4_r1_2, i4_horz_add_32x4_r1_2); + + i4_horz_res_32x4_r3_1 = vaddq_s32(i4_horz_samp_32x4_r1_1, i4_horz_add_32x4_r2_1); + i4_horz_res_32x4_r3_2 = vaddq_s32(i4_horz_samp_32x4_r1_2, i4_horz_add_32x4_r2_2); + + i4_horz_res_32x4_r4_1 = vaddq_s32(i4_horz_samp_32x4_r2_1, i4_horz_add_32x4_r2_1); + i4_horz_res_32x4_r4_2 = vaddq_s32(i4_horz_samp_32x4_r2_2, i4_horz_add_32x4_r2_2); + + i4_horz_res_32x4_r5_1 = vaddq_s32(i4_horz_samp_32x4_r2_1, i4_horz_add_32x4_r3_1); + i4_horz_res_32x4_r5_2 = vaddq_s32(i4_horz_samp_32x4_r2_2, i4_horz_add_32x4_r3_2); + + i4_horz_res_32x4_r6_1 = vaddq_s32(i4_horz_samp_32x4_r3_1, i4_horz_add_32x4_r3_1); + i4_horz_res_32x4_r6_2 = vaddq_s32(i4_horz_samp_32x4_r3_2, i4_horz_add_32x4_r3_2); + + i4_horz_res_16x4_r1_1 = vqrshrn_n_s32(i4_horz_res_32x4_r1_1, 4); + i4_horz_res_16x4_r1_2 = vqrshrn_n_s32(i4_horz_res_32x4_r1_2, 4); + + i4_horz_res_16x4_r2_1 = vqrshrn_n_s32(i4_horz_res_32x4_r2_1, 4); + i4_horz_res_16x4_r2_2 = vqrshrn_n_s32(i4_horz_res_32x4_r2_2, 4); + + i4_horz_res_16x4_r3_1 = vqrshrn_n_s32(i4_horz_res_32x4_r3_1, 4); + i4_horz_res_16x4_r3_2 = vqrshrn_n_s32(i4_horz_res_32x4_r3_2, 4); + + i4_horz_res_16x4_r4_1 = vqrshrn_n_s32(i4_horz_res_32x4_r4_1, 4); + i4_horz_res_16x4_r4_2 = vqrshrn_n_s32(i4_horz_res_32x4_r4_2, 4); + + i4_horz_res_16x4_r5_1 = vqrshrn_n_s32(i4_horz_res_32x4_r5_1, 4); + i4_horz_res_16x4_r5_2 = vqrshrn_n_s32(i4_horz_res_32x4_r5_2, 4); + + i4_horz_res_16x4_r6_1 = vqrshrn_n_s32(i4_horz_res_32x4_r6_1, 4); + i4_horz_res_16x4_r6_2 = vqrshrn_n_s32(i4_horz_res_32x4_r6_2, 4); + + i4_horz_res_16x4_r7_1 = vrshr_n_s16(i4_horz_samp_16x4_r3_1, 2); + i4_horz_res_16x4_r7_2 = vrshr_n_s16(i4_horz_samp_16x4_r3_2, 2); + + vst1_s16(pi2_out_res, i4_horz_res_16x4_r0_1); + vst1_s16(pi2_out_res + 4, i4_horz_res_16x4_r0_2); + + vst1_s16(pi2_out_res + i4_out_res_stride, i4_horz_res_16x4_r1_1); + vst1_s16(pi2_out_res + i4_out_res_stride + 4, i4_horz_res_16x4_r1_2); + + vst1_s16(pi2_out_res + (i4_out_res_stride << 1), i4_horz_res_16x4_r2_1); + vst1_s16(pi2_out_res + (i4_out_res_stride << 1) + 4, i4_horz_res_16x4_r2_2); + + vst1_s16(pi2_out_res + (i4_out_res_stride * 3), i4_horz_res_16x4_r3_1); + vst1_s16(pi2_out_res + (i4_out_res_stride * 3) + 4, i4_horz_res_16x4_r3_2); + + vst1_s16(pi2_out_res + (i4_out_res_stride << 2), i4_horz_res_16x4_r4_1); + vst1_s16(pi2_out_res + (i4_out_res_stride << 2) + 4, i4_horz_res_16x4_r4_2); + + vst1_s16(pi2_out_res + (i4_out_res_stride * 5), i4_horz_res_16x4_r5_1); + vst1_s16(pi2_out_res + (i4_out_res_stride * 5) + 4, i4_horz_res_16x4_r5_2); + + vst1_s16(pi2_out_res + (i4_out_res_stride * 6), i4_horz_res_16x4_r6_1); + vst1_s16(pi2_out_res + (i4_out_res_stride * 6) + 4, i4_horz_res_16x4_r6_2); + + vst1_s16(pi2_out_res + (i4_out_res_stride * 7), i4_horz_res_16x4_r7_1); + vst1_s16(pi2_out_res + (i4_out_res_stride * 7) + 4, i4_horz_res_16x4_r7_2); + + pi2_out_res += BLK8x8SIZE; + } + else + { + pi2_out_res += BLK8x8SIZE; + } + + /* Block level loop updates */ + if(1 == i4_blk_ctr) + { + pi2_inp_data -= SUB_BLK_WIDTH_4x4; + pi2_inp_data += (i4_inp_data_stride * SUB_BLK_HEIGHT_4x4); + pi2_out_res -= MB_SIZE; + pi2_out_res += (i4_out_res_stride * BLK8x8SIZE); + u4_ref_nnz >>= 2; + } + else + { + pi2_inp_data += SUB_BLK_HEIGHT_4x4; + } + u4_ref_nnz >>= 1; + } + /* The above loop iterates over all the blocks */ + } +} + +UWORD32 isvce_get_sad_with_residual_pred_neon(buffer_container_t *ps_src, + buffer_container_t *ps_pred, + buffer_container_t *ps_res, UWORD32 u4_mb_wd, + UWORD32 u4_mb_ht) +{ + UWORD32 i, j, u4_sad = 0; + UWORD8 *pu1_src = (UWORD8 *) ps_src->pv_data; + UWORD8 *pu1_pred = (UWORD8 *) ps_pred->pv_data; + WORD16 *pi2_res = (WORD16 *) ps_res->pv_data; + WORD32 i4_src_stride = ps_src->i4_data_stride; + WORD32 i4_pred_stride = ps_pred->i4_data_stride; + WORD32 i4_res_stride = ps_res->i4_data_stride; + UWORD32 u4_num_rows_per_loop = 8; + UWORD32 u4_ht_by_8 = u4_mb_ht / u4_num_rows_per_loop; + uint8x8_t src0, src1, src2, src3; + uint8x8_t src4, src5, src6, src7; + uint8x8_t pred0, pred1, pred2, pred3; + uint8x8_t pred4, pred5, pred6, pred7; + int16x8_t res0_16x8, res1_16x8, res2_16x8, res3_16x8, res4_16x8, res5_16x8, res6_16x8, + res7_16x8; + uint16x8_t res0_u16x8, res1_u16x8, res2_u16x8, res3_u16x8, res4_u16x8, res5_u16x8, res6_u16x8, + res7_u16x8; + int16x8_t respred0_16x8, respred1_16x8, respred2_16x8, respred3_16x8, respred4_16x8, + respred5_16x8, respred6_16x8, respred7_16x8; + int16x8_t temp0_16x8, temp1_16x8, temp2_16x8, temp3_16x8, temp4_16x8, temp5_16x8, temp6_16x8, + temp7_16x8; + int32x4_t temp0_32x4; + int32x2_t temp0_32x2; + + if((u4_mb_wd == 16) && (u4_mb_ht % 8 == 0)) + { + for(i = 0; i < u4_ht_by_8; i++) + { + /* This loop processes 4 rows of 16 bytes each iteration */ + /* So, 8 rows are processed across two iterations */ + for(j = 0; j < 2; j++) + { + src0 = vld1_u8(pu1_src); + src1 = vld1_u8(pu1_src + 8); + + pu1_src += i4_src_stride; + + src2 = vld1_u8(pu1_src); + src3 = vld1_u8(pu1_src + 8); + + pu1_src += i4_src_stride; + + src4 = vld1_u8(pu1_src); + src5 = vld1_u8(pu1_src + 8); + + pu1_src += i4_src_stride; + + src6 = vld1_u8(pu1_src); + src7 = vld1_u8(pu1_src + 8); + + pu1_src += i4_src_stride; + + pred0 = vld1_u8(pu1_pred); + pred1 = vld1_u8(pu1_pred + 8); + + pu1_pred += i4_pred_stride; + + pred2 = vld1_u8(pu1_pred); + pred3 = vld1_u8(pu1_pred + 8); + + pu1_pred += i4_pred_stride; + + pred4 = vld1_u8(pu1_pred); + pred5 = vld1_u8(pu1_pred + 8); + + pu1_pred += i4_pred_stride; + + pred6 = vld1_u8(pu1_pred); + pred7 = vld1_u8(pu1_pred + 8); + + pu1_pred += i4_pred_stride; + + res0_u16x8 = vsubl_u8(src0, pred0); + res1_u16x8 = vsubl_u8(src1, pred1); + res2_u16x8 = vsubl_u8(src2, pred2); + res3_u16x8 = vsubl_u8(src3, pred3); + res4_u16x8 = vsubl_u8(src4, pred4); + res5_u16x8 = vsubl_u8(src5, pred5); + res6_u16x8 = vsubl_u8(src6, pred6); + res7_u16x8 = vsubl_u8(src7, pred7); + + res0_16x8 = vreinterpretq_s16_u16(res0_u16x8); + res1_16x8 = vreinterpretq_s16_u16(res1_u16x8); + res2_16x8 = vreinterpretq_s16_u16(res2_u16x8); + res3_16x8 = vreinterpretq_s16_u16(res3_u16x8); + res4_16x8 = vreinterpretq_s16_u16(res4_u16x8); + res5_16x8 = vreinterpretq_s16_u16(res5_u16x8); + res6_16x8 = vreinterpretq_s16_u16(res6_u16x8); + res7_16x8 = vreinterpretq_s16_u16(res7_u16x8); + + respred0_16x8 = vld1q_s16(pi2_res); + respred1_16x8 = vld1q_s16(pi2_res + 8); + + pi2_res += i4_res_stride; + + respred2_16x8 = vld1q_s16(pi2_res); + respred3_16x8 = vld1q_s16(pi2_res + 8); + + pi2_res += i4_res_stride; + + respred4_16x8 = vld1q_s16(pi2_res); + respred5_16x8 = vld1q_s16(pi2_res + 8); + + pi2_res += i4_res_stride; + + respred6_16x8 = vld1q_s16(pi2_res); + respred7_16x8 = vld1q_s16(pi2_res + 8); + + pi2_res += i4_res_stride; + + temp0_16x8 = vsubq_s16(res0_16x8, respred0_16x8); + temp1_16x8 = vsubq_s16(res1_16x8, respred1_16x8); + temp2_16x8 = vsubq_s16(res2_16x8, respred2_16x8); + temp3_16x8 = vsubq_s16(res3_16x8, respred3_16x8); + temp4_16x8 = vsubq_s16(res4_16x8, respred4_16x8); + temp5_16x8 = vsubq_s16(res5_16x8, respred5_16x8); + temp6_16x8 = vsubq_s16(res6_16x8, respred6_16x8); + temp7_16x8 = vsubq_s16(res7_16x8, respred7_16x8); + + temp0_16x8 = vabsq_s16(temp0_16x8); + temp1_16x8 = vabsq_s16(temp1_16x8); + temp2_16x8 = vabsq_s16(temp2_16x8); + temp3_16x8 = vabsq_s16(temp3_16x8); + temp4_16x8 = vabsq_s16(temp4_16x8); + temp5_16x8 = vabsq_s16(temp5_16x8); + temp6_16x8 = vabsq_s16(temp6_16x8); + temp7_16x8 = vabsq_s16(temp7_16x8); + + temp0_16x8 = vaddq_s16(temp0_16x8, temp1_16x8); + temp1_16x8 = vaddq_s16(temp2_16x8, temp3_16x8); + temp2_16x8 = vaddq_s16(temp4_16x8, temp5_16x8); + temp3_16x8 = vaddq_s16(temp6_16x8, temp7_16x8); + + temp0_16x8 = vaddq_s16(temp0_16x8, temp1_16x8); + temp1_16x8 = vaddq_s16(temp2_16x8, temp3_16x8); + + temp0_16x8 = vaddq_s16(temp0_16x8, temp1_16x8); + + temp0_32x4 = vpaddlq_s16(temp0_16x8); + temp0_32x2 = vpadd_s32(vget_low_s32(temp0_32x4), vget_high_s32(temp0_32x4)); + + u4_sad += vget_lane_s32(temp0_32x2, 0); + u4_sad += vget_lane_s32(temp0_32x2, 1); + } + } + } + else + { + for(i = 0; i < u4_mb_ht; i++) + { + for(j = 0; j < u4_mb_wd; j++) + { + WORD16 i2_src = pu1_src[j + i * i4_src_stride]; + WORD16 i2_pred = pu1_pred[j + i * i4_pred_stride]; + WORD16 i2_res = pi2_res[j + i * i4_res_stride]; + u4_sad += ABS(i2_src - i2_pred - i2_res); + } + } + } + + return u4_sad; +} diff --git a/encoder/irc_rate_control_api_structs.h b/encoder/irc_rate_control_api_structs.h index ba39e7f..3248c74 100644 --- a/encoder/irc_rate_control_api_structs.h +++ b/encoder/irc_rate_control_api_structs.h @@ -16,7 +16,7 @@ * ***************************************************************************** * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore -*/ + */ #ifndef _RATE_CONTROL_API_STRUCTS_H_ #define _RATE_CONTROL_API_STRUCTS_H_ @@ -74,7 +74,9 @@ typedef struct rate_control_api_t UWORD8 u1_is_first_frm; - UWORD8 au1_min_max_qp[(MAX_PIC_TYPE << 1)]; + UWORD8 au1_min_max_qp[MAX_PIC_TYPE * 2]; + + UWORD8 au1_min_max_avc_qp[MAX_PIC_TYPE * 2]; WORD32 i4_prev_frm_est_bits; @@ -89,5 +91,4 @@ typedef struct rate_control_api_t } rate_control_api_t; -#endif/*_RATE_CONTROL_API_STRUCTS_H_*/ - +#endif /*_RATE_CONTROL_API_STRUCTS_H_*/ diff --git a/encoder/riscv/svc/isvce_function_selector.c b/encoder/riscv/svc/isvce_function_selector.c new file mode 100644 index 0000000..363d02d --- /dev/null +++ b/encoder/riscv/svc/isvce_function_selector.c @@ -0,0 +1,80 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** +******************************************************************************* +* @file +* isvce_function_selector.c +* +* @brief +* Contains functions to initialize function pointers used in svc +* +* @author +* Ittiam +* +* @par List of Functions: +* +* @remarks +* None +* +******************************************************************************* +*/ + +#include "iv2.h" +#include "isvce_structs.h" + +/** +******************************************************************************* +* +* @brief Initialize the intra/inter/transform/deblk function pointers of +* codec context +* +* @par Description: the current routine initializes the function pointers of +* codec context basing on the architecture in use +* +* @param[in] ps_codec +* Codec context pointer +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +void isvce_init_function_ptr(isvce_codec_t *ps_codec) { isvce_init_function_ptr_generic(ps_codec); } + +/** +******************************************************************************* +* +* @brief Determine the architecture of the encoder executing environment +* +* @par Description: This routine returns the architecture of the enviro- +* ment in which the current encoder is being tested +* +* @param[in] void +* +* @returns IV_ARCH_T +* architecture +* +* @remarks none +* +******************************************************************************* +*/ +IV_ARCH_T isvce_default_arch(void) { return ARCH_NA; } diff --git a/encoder/riscv/svc/isvce_platform_macros.h b/encoder/riscv/svc/isvce_platform_macros.h new file mode 100644 index 0000000..3664ee4 --- /dev/null +++ b/encoder/riscv/svc/isvce_platform_macros.h @@ -0,0 +1,103 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ +/** + ******************************************************************************* + * @file + * isvce_platform_macros.h + * + * @brief + * Contains platform specific routines used for codec context intialization + * + * @author + * ittiam + * + * @remarks + * none + * + ******************************************************************************* + */ + +#ifndef _ISVCE_PLATFORM_MACROS_H_ +#define _ISVCE_PLATFORM_MACROS_H_ + +/*****************************************************************************/ +/* Extern Function Declarations */ +/*****************************************************************************/ + +/** +******************************************************************************* +* +* @brief Initialize the intra/inter/transform/deblk function pointers of +* codec context +* +* @par Description: the current routine initializes the function pointers of +* codec context basing on the architecture in use +* +* @param[in] ps_codec +* Codec context pointer +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +void isvce_init_function_ptr_generic(isvce_codec_t *ps_codec); + +/** +******************************************************************************* +* +* @brief Initialize the intra/inter/transform/deblk function pointers of +* codec context +* +* @par Description: the current routine initializes the function pointers of +* codec context basing on the architecture in use +* +* @param[in] ps_codec +* Codec context pointer +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +void isvce_init_function_ptr(isvce_codec_t *ps_codec); + +/** +******************************************************************************* +* +* @brief Determine the architecture of the encoder executing environment +* +* @par Description: This routine returns the architecture of the enviro- +* ment in which the current encoder is being tested +* +* @param[in] void +* +* @returns IV_ARCH_T +* architecture +* +* @remarks none +* +******************************************************************************* +*/ +IV_ARCH_T isvce_default_arch(void); + +#endif diff --git a/encoder/svc/irc_svc_rate_control_api.c b/encoder/svc/irc_svc_rate_control_api.c new file mode 100644 index 0000000..a2e9453 --- /dev/null +++ b/encoder/svc/irc_svc_rate_control_api.c @@ -0,0 +1,116 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/*****************************************************************************/ +/* Includes */ +/*****************************************************************************/ + +/* System include files */ +#include "stdio.h" + +/* User include files */ +#include "irc_datatypes.h" +#include "irc_common.h" +#include "irc_cntrl_param.h" +#include "irc_mem_req_and_acq.h" +#include "irc_rd_model.h" +#include "irc_est_sad.h" +#include "irc_fixed_point_error_bits.h" +#include "irc_vbr_storage_vbv.h" +#include "irc_picture_type.h" +#include "irc_bit_allocation.h" +#include "irc_mb_model_based.h" +#include "irc_cbr_buffer_control.h" +#include "irc_vbr_str_prms.h" +#include "irc_rate_control_api.h" +#include "irc_rate_control_api_structs.h" +#include "irc_trace_support.h" + +#define MIN(a, b) (((a) < (b)) ? (a) : (b)) +#define MAX(a, b) (((a) > (b)) ? (a) : (b)) + +#define DEV_Q 4 /*Q format(Shift) for Deviation range factor */ +#define HI_DEV_FCTR 22 /* 1.4*16 */ +#define LO_DEV_FCTR 12 /* 0.75*16 */ +#define GET_HI_DEV_QP(Qprev) ((((WORD32) Qprev) * HI_DEV_FCTR + (1 << (DEV_Q - 1))) >> DEV_Q) +#define GET_LO_DEV_QP(Qprev) ((((WORD32) Qprev) * LO_DEV_FCTR + (1 << (DEV_Q - 1))) >> DEV_Q) +#define CLIP_QP(Qc, hi_d, lo_d) (((Qc) < (lo_d)) ? ((lo_d)) : (((Qc) > (hi_d)) ? (hi_d) : (Qc))) + +/******************************************************************************* + * Description : Gets the frame level qp for the given picture type + * based on bits per pixel and gradient per pixel + ******************************************************************************/ +/* Get frame level QP based on BPP and GPP */ +UWORD8 irc_get_frame_level_init_qp(rate_control_handle *ps_rate_control_api, rc_type_e e_rc_type, + picture_type_e e_pic_type, DOUBLE d_bpp, DOUBLE d_gpp) +{ + DOUBLE d_frame_qp; + + UWORD8 u1_min_qp = + ((rate_control_api_t *) (ps_rate_control_api))->au1_min_max_avc_qp[(e_pic_type << 1)]; + UWORD8 u1_max_qp = + ((rate_control_api_t *) (ps_rate_control_api))->au1_min_max_avc_qp[(e_pic_type << 1) + 1]; + + if((e_rc_type != VBR_STORAGE) && (e_rc_type != VBR_STORAGE_DVD_COMP) && + (e_rc_type != CBR_NLDRC) && (e_rc_type != CONST_QP) && (e_rc_type != VBR_STREAMING)) + { + trace_printf( + (const WORD8 *) (const WORD8 *) " Only VBR,NLDRC and CONST QP supported for now \n"); + return (0); + } + + if(d_bpp <= 0.18) + { + d_frame_qp = 43.49 + (0.59 * d_gpp) - (106.45 * d_bpp); + } + else if(d_bpp <= 0.6) + { + d_frame_qp = 25.12 + (0.69 * d_gpp) - (29.23 * (d_bpp - 0.18)); + } + else + { + d_frame_qp = 13.93 + (0.74 * d_gpp) - (18.4 * (d_bpp - 0.6)); + } + + /* Truncating the QP to the Max and Min Qp values possible */ + if(d_frame_qp < u1_min_qp) d_frame_qp = u1_min_qp; + if(d_frame_qp > u1_max_qp) d_frame_qp = u1_max_qp; + + return ((UWORD8) (d_frame_qp + 0.5)); +} + +void irc_change_qp_constraints(rate_control_api_t *ps_rate_control_api, UWORD8 *pu1_min_max_qp, + UWORD8 *pu1_min_max_avc_qp) +{ + WORD32 i; + + for(i = 0; i < MAX_PIC_TYPE; i++) + { + ps_rate_control_api->au1_min_max_qp[(i << 1)] = pu1_min_max_qp[(i << 1)]; + ps_rate_control_api->au1_min_max_qp[(i << 1) + 1] = pu1_min_max_qp[(i << 1) + 1]; + ps_rate_control_api->au1_min_max_avc_qp[(i << 1)] = pu1_min_max_avc_qp[(i << 1)]; + ps_rate_control_api->au1_min_max_avc_qp[(i << 1) + 1] = pu1_min_max_avc_qp[(i << 1) + 1]; + } +} + +UWORD8 irc_is_scenecut(rate_control_api_t *ps_rate_control_api) +{ + return ((rate_control_api_t *) (ps_rate_control_api))->u1_scd_detected; +} diff --git a/encoder/svc/irc_svc_rate_control_api.h b/encoder/svc/irc_svc_rate_control_api.h new file mode 100644 index 0000000..7400b3d --- /dev/null +++ b/encoder/svc/irc_svc_rate_control_api.h @@ -0,0 +1,46 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +#ifndef _IRC_SVC_RATE_CONTROL_API_H_ +#define _IRC_SVC_RATE_CONTROL_API_H_ + +/* Dependencies of 'irc_rate_control_api_structs' */ +#include "irc_picture_type.h" +#include "irc_rd_model.h" +#include "irc_vbr_storage_vbv.h" +#include "irc_est_sad.h" +#include "irc_bit_allocation.h" +#include "irc_mb_model_based.h" +#include "irc_cbr_buffer_control.h" +#include "irc_vbr_str_prms.h" +#include "irc_common.h" + +#include "irc_rate_control_api_structs.h" + +/* Get frame level QP based on BPP and GPP */ +UWORD8 irc_get_frame_level_init_qp(rate_control_api_t *ps_rate_control_api, rc_type_e e_rc_type, + picture_type_e e_pic_type, DOUBLE d_bpp, DOUBLE d_gpp); + +void irc_change_qp_constraints(rate_control_api_t *ps_rate_control_api, UWORD8 *pu1_min_max_qp, + UWORD8 *pu1_min_max_avc_qp); + +extern UWORD8 irc_is_scenecut(rate_control_api_t *ps_rate_control_api); + +#endif diff --git a/encoder/svc/isvce.h b/encoder/svc/isvce.h new file mode 100644 index 0000000..9b914a7 --- /dev/null +++ b/encoder/svc/isvce.h @@ -0,0 +1,1023 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ +/*****************************************************************************/ +/* */ +/* File Name : isvce.h */ +/* */ +/* Description : This file contains all the necessary structure and */ +/* enumeration definitions needed for the Application */ +/* Program Interface(API) of the Ittiam SVC Encoder */ +/* */ +/* List of Functions : isvce_api_function */ +/* */ +/*****************************************************************************/ + +#ifndef _ISVCE_H_ +#define _ISVCE_H_ + +#ifdef __cplusplus +extern "C" +{ +#endif + +#include + +#include "iv2.h" +#include "ive2.h" + + /*****************************************************************************/ + /* Enums */ + /*****************************************************************************/ + typedef enum ISVCE_API_COMMAND_TYPE_T + { + ISVCE_CMD_VIDEO_NA = 0x7FFFFFFF, + ISVCE_CMD_GET_NUM_MEM_REC = 0x0, + ISVCE_CMD_FILL_NUM_MEM_REC = 0x1, + ISVCE_CMD_RETRIEVE_MEMREC = 0x2, + ISVCE_CMD_INIT = 0x3, + ISVCE_CMD_EXTENSIONS = 0x100, + ISVCE_CMD_VIDEO_CTL, + ISVCE_CMD_VIDEO_ENCODE + } ISVCE_API_COMMAND_TYPE_T; + + typedef enum ISVCE_CONTROL_API_COMMAND_TYPE_T + { + ISVCE_CMD_CT_NA = 0x7FFFFFFF, + ISVCE_CMD_CTL_SETDEFAULT = 0x0, + ISVCE_CMD_CTL_SET_DIMENSIONS = 0x1, + ISVCE_CMD_CTL_SET_FRAMERATE = 0x2, + ISVCE_CMD_CTL_SET_BITRATE = 0x3, + ISVCE_CMD_CTL_SET_FRAMETYPE = 0x4, + ISVCE_CMD_CTL_SET_QP = 0x5, + ISVCE_CMD_CTL_SET_ENC_MODE = 0x6, + ISVCE_CMD_CTL_SET_VBV_PARAMS = 0x7, + ISVCE_CMD_CTL_SET_AIR_PARAMS = 0x8, + ISVCE_CMD_CTL_SET_ME_PARAMS = 0X9, + ISVCE_CMD_CTL_SET_GOP_PARAMS = 0XA, + ISVCE_CMD_CTL_SET_PROFILE_PARAMS = 0XB, + ISVCE_CMD_CTL_SET_DEBLOCK_PARAMS = 0XC, + ISVCE_CMD_CTL_SET_IPE_PARAMS = 0XD, + ISVCE_CMD_CTL_SET_VUI_PARAMS = 0XE, + ISVCE_CMD_CTL_SET_NUM_CORES = 0x30, + ISVCE_CMD_CTL_RESET = 0xA0, + ISVCE_CMD_CTL_FLUSH = 0xB0, + ISVCE_CMD_CTL_GETBUFINFO = 0xC0, + ISVCE_CMD_CTL_GETVERSION = 0xC1, + ISVCE_CMD_CTL_SET_SEI_MDCV_PARAMS = 0xD0, + ISVCE_CMD_CTL_SET_SEI_CLL_PARAMS = 0xD1, + ISVCE_CMD_CTL_SET_SEI_AVE_PARAMS = 0xD2, + ISVCE_CMD_CTL_SET_SEI_CCV_PARAMS = 0xD3, + ISVCE_CMD_CTL_GET_ENC_FRAME_DIMENSIONS = 0xE1 + } ISVCE_CONTROL_API_COMMAND_TYPE_T; + + /*****************************************************************************/ + /* Extended Structures */ + /*****************************************************************************/ + + /*****************************************************************************/ + /* Get Number of Memory Records */ + /*****************************************************************************/ + typedef struct svc_inp_params_t + { + /** + * Num Temporal Layers + */ + UWORD8 u1_num_temporal_layers; + + /** + * Num Spatial Layers + */ + UWORD8 u1_num_spatial_layers; + + /** + * Resolution ration b/w spatial layers + */ + DOUBLE d_spatial_res_ratio; + + } svc_inp_params_t; + + typedef struct isvce_num_mem_rec_ip_t + { + iv_num_mem_rec_ip_t s_ive_ip; + } isvce_num_mem_rec_ip_t; + + typedef struct isvce_num_mem_rec_op_t + { + iv_num_mem_rec_op_t s_ive_op; + } isvce_num_mem_rec_op_t; + + /*****************************************************************************/ + /* Fill Memory Records */ + /*****************************************************************************/ + + typedef struct isvce_fill_mem_rec_ip_t + { + iv_fill_mem_rec_ip_t s_ive_ip; + + svc_inp_params_t s_svc_inp_params; + + UWORD32 u4_wd; + + UWORD32 u4_ht; + + } isvce_fill_mem_rec_ip_t; + + typedef struct isvce_fill_mem_rec_op_t + { + iv_fill_mem_rec_op_t s_ive_op; + } isvce_fill_mem_rec_op_t; + + /*****************************************************************************/ + /* Retrieve Memory Records */ + /*****************************************************************************/ + + typedef struct isvce_retrieve_mem_rec_ip_t + { + iv_retrieve_mem_rec_ip_t s_ive_ip; + } isvce_retrieve_mem_rec_ip_t; + + typedef struct isvce_retrieve_mem_rec_op_t + { + iv_retrieve_mem_rec_op_t s_ive_op; + } isvce_retrieve_mem_rec_op_t; + + /*****************************************************************************/ + /* Initialize encoder */ + /*****************************************************************************/ + + typedef struct isvce_init_ip_t + { + ive_init_ip_t s_ive_ip; + + svc_inp_params_t s_svc_inp_params; + + UWORD32 *pu4_max_bitrate; + + UWORD32 u4_wd; + + UWORD32 u4_ht; + + bool b_use_default_vui; + + bool b_nalu_info_export_enable; + + } isvce_init_ip_t; + + typedef struct isvce_init_op_t + { + ive_init_op_t s_ive_op; + } isvce_init_op_t; + + /*****************************************************************************/ + /* Video control Flush */ + /*****************************************************************************/ + + typedef struct isvce_ctl_flush_ip_t + { + ive_ctl_flush_ip_t s_ive_ip; + } isvce_ctl_flush_ip_t; + + typedef struct isvce_ctl_flush_op_t + { + ive_ctl_flush_op_t s_ive_op; + } isvce_ctl_flush_op_t; + + /*****************************************************************************/ + /* Video control reset */ + /*****************************************************************************/ + + typedef struct isvce_ctl_reset_ip_t + { + ive_ctl_reset_ip_t s_ive_ip; + } isvce_ctl_reset_ip_t; + + typedef struct isvce_ctl_reset_op_t + { + ive_ctl_reset_op_t s_ive_op; + } isvce_ctl_reset_op_t; + + /*****************************************************************************/ + /* Video control:Get Buf Info */ + /*****************************************************************************/ + + typedef struct isvce_ctl_getbufinfo_ip_t + { + ive_ctl_getbufinfo_ip_t s_ive_ip; + } isvce_ctl_getbufinfo_ip_t; + + typedef struct isvce_ctl_getbufinfo_op_t + { + ive_ctl_getbufinfo_op_t s_ive_op; + + UWORD32 au4_min_rec_buf_size[IVE_MAX_IO_BUFFER_COMPONENTS]; + + UWORD32 u4_rec_comp_cnt; + + UWORD32 u4_min_rec_bufs; + + UWORD32 u4_min_nalu_info_bufs; + + UWORD32 u4_min_nalu_info_buf_size; + } isvce_ctl_getbufinfo_op_t; + + /*****************************************************************************/ + /* Video control:Get Version Info */ + /*****************************************************************************/ + + typedef struct isvce_ctl_getversioninfo_ip_t + { + ive_ctl_getversioninfo_ip_t s_ive_ip; + } isvce_ctl_getversioninfo_ip_t; + + typedef struct isvce_ctl_getversioninfo_op_t + { + ive_ctl_getversioninfo_op_t s_ive_op; + } isvce_ctl_getversioninfo_op_t; + + /*****************************************************************************/ + /* Video control:Set default params */ + /*****************************************************************************/ + + typedef struct isvce_ctl_setdefault_ip_t + { + ive_ctl_setdefault_ip_t s_ive_ip; + } isvce_ctl_setdefault_ip_t; + + typedef struct isvce_ctl_setdefault_op_t + { + ive_ctl_setdefault_op_t s_ive_op; + } isvce_ctl_setdefault_op_t; + + /*****************************************************************************/ + /* Video control Set IPE params */ + /*****************************************************************************/ + typedef struct isvce_ctl_set_ipe_params_ip_t + { + ive_ctl_set_ipe_params_ip_t s_ive_ip; + } isvce_ctl_set_ipe_params_ip_t; + + typedef struct isvce_ctl_set_ipe_params_op_t + { + ive_ctl_set_ipe_params_op_t s_ive_op; + } isvce_ctl_set_ipe_params_op_t; + + /*****************************************************************************/ + /* Video control Set Frame dimensions */ + /*****************************************************************************/ + typedef struct isvce_ctl_set_dimensions_ip_t + { + ive_ctl_set_dimensions_ip_t s_ive_ip; + } isvce_ctl_set_dimensions_ip_t; + + typedef struct isvce_ctl_set_dimensions_op_t + { + ive_ctl_set_dimensions_op_t s_ive_op; + } isvce_ctl_set_dimensions_op_t; + + /* Video control - Get Enc Frame dimensions */ + typedef struct isvce_ctl_get_enc_dimensions_ip_t + { + UWORD32 u4_inp_frame_wd; + + UWORD32 u4_inp_frame_ht; + } isvce_ctl_get_enc_dimensions_ip_t; + + typedef struct isvce_ctl_get_enc_dimensions_op_t + { + UWORD32 u4_error_code; + + UWORD32 u4_enc_frame_wd; + + UWORD32 u4_enc_frame_ht; + + } isvce_ctl_get_enc_dimensions_op_t; + + /*****************************************************************************/ + /* Video control Set Frame rates */ + /*****************************************************************************/ + typedef struct isvce_ctl_set_frame_rate_ip_t + { + ive_ctl_set_frame_rate_ip_t s_ive_ip; + } isvce_ctl_set_frame_rate_ip_t; + + typedef struct isvce_ctl_set_frame_rate_op_t + { + ive_ctl_set_frame_rate_op_t s_ive_op; + } isvce_ctl_set_frame_rate_op_t; + + /*****************************************************************************/ + /* Video control Set Bitrate */ + /*****************************************************************************/ + typedef struct isvce_ctl_set_bitrate_ip_t + { + ive_ctl_set_bitrate_ip_t s_ive_ip; + + UWORD32 *pu4_target_bitrate; + } isvce_ctl_set_bitrate_ip_t; + + typedef struct isvce_ctl_set_bitrate_op_t + { + ive_ctl_set_bitrate_op_t s_ive_op; + } isvce_ctl_set_bitrate_op_t; + + /*****************************************************************************/ + /* Video control Set Frame type */ + /*****************************************************************************/ + typedef struct isvce_ctl_set_frame_type_ip_t + { + ive_ctl_set_frame_type_ip_t s_ive_ip; + } isvce_ctl_set_frame_type_ip_t; + + typedef struct isvce_ctl_set_frame_type_op_t + { + ive_ctl_set_frame_type_op_t s_ive_op; + } isvce_ctl_set_frame_type_op_t; + + /*****************************************************************************/ + /* Video control Set Encode mode */ + /*****************************************************************************/ + typedef struct isvce_ctl_set_enc_mode_ip_t + { + ive_ctl_set_enc_mode_ip_t s_ive_ip; + } isvce_ctl_set_enc_mode_ip_t; + + typedef struct isvce_ctl_set_enc_mode_op_t + { + ive_ctl_set_enc_mode_op_t s_ive_op; + } isvce_ctl_set_enc_mode_op_t; + + /*****************************************************************************/ + /* Video control Set QP */ + /*****************************************************************************/ + typedef struct isvce_ctl_set_qp_ip_t + { + ive_ctl_set_qp_ip_t s_ive_ip; + + UWORD32 *pu4_i_qp; + + UWORD32 *pu4_i_qp_max; + + UWORD32 *pu4_i_qp_min; + + UWORD32 *pu4_p_qp; + + UWORD32 *pu4_p_qp_max; + + UWORD32 *pu4_p_qp_min; + + UWORD32 *pu4_b_qp; + + UWORD32 *pu4_b_qp_max; + + UWORD32 *pu4_b_qp_min; + + } isvce_ctl_set_qp_ip_t; + + typedef struct isvce_ctl_set_qp_op_t + { + ive_ctl_set_qp_op_t s_ive_op; + } isvce_ctl_set_qp_op_t; + + /*****************************************************************************/ + /* Video control Set AIR params */ + /*****************************************************************************/ + typedef struct isvce_ctl_set_air_params_ip_t + { + ive_ctl_set_air_params_ip_t s_ive_ip; + } isvce_ctl_set_air_params_ip_t; + + typedef struct isvce_ctl_set_air_params_op_t + { + ive_ctl_set_air_params_op_t s_ive_op; + } isvce_ctl_set_air_params_op_t; + + /*****************************************************************************/ + /* Video control Set VBV params */ + /*****************************************************************************/ + typedef struct isvce_ctl_set_vbv_params_ip_t + { + ive_ctl_set_vbv_params_ip_t s_ive_ip; + + UWORD32 *pu4_vbv_buffer_delay; + } isvce_ctl_set_vbv_params_ip_t; + + typedef struct isvce_ctl_set_vbv_params_op_t + { + ive_ctl_set_vbv_params_op_t s_ive_op; + } isvce_ctl_set_vbv_params_op_t; + + /*****************************************************************************/ + /* Video control Set Processor Details */ + /*****************************************************************************/ + typedef struct isvce_ctl_set_num_cores_ip_t + { + ive_ctl_set_num_cores_ip_t s_ive_ip; + } isvce_ctl_set_num_cores_ip_t; + + typedef struct isvce_ctl_set_num_cores_op_t + { + ive_ctl_set_num_cores_op_t s_ive_op; + } isvce_ctl_set_num_cores_op_t; + + /*****************************************************************************/ + /* Video control Set Motion estimation params */ + /*****************************************************************************/ + typedef struct isvce_ctl_set_me_params_ip_t + { + ive_ctl_set_me_params_ip_t s_ive_ip; + } isvce_ctl_set_me_params_ip_t; + + typedef struct isvce_ctl_set_me_params_op_t + { + ive_ctl_set_me_params_op_t s_ive_op; + } isvce_ctl_set_me_params_op_t; + + /*****************************************************************************/ + /* Video control Set GOP params */ + /*****************************************************************************/ + typedef struct isvce_ctl_set_gop_params_ip_t + { + ive_ctl_set_gop_params_ip_t s_ive_ip; + } isvce_ctl_set_gop_params_ip_t; + + typedef struct isvce_ctl_set_gop_params_op_t + { + ive_ctl_set_gop_params_op_t s_ive_op; + } isvce_ctl_set_gop_params_op_t; + + /*****************************************************************************/ + /* Video control Set Deblock params */ + /*****************************************************************************/ + typedef struct isvce_ctl_set_deblock_params_ip_t + { + ive_ctl_set_deblock_params_ip_t s_ive_ip; + } isvce_ctl_set_deblock_params_ip_t; + + typedef struct isvce_ctl_set_deblock_params_op_t + { + ive_ctl_set_deblock_params_op_t s_ive_op; + } isvce_ctl_set_deblock_params_op_t; + + /*****************************************************************************/ + /* Video control Set Profile params */ + /*****************************************************************************/ + typedef struct isvce_ctl_set_profile_params_ip_t + { + ive_ctl_set_profile_params_ip_t s_ive_ip; + } isvce_ctl_set_profile_params_ip_t; + + typedef struct isvce_ctl_set_profile_params_op_t + { + ive_ctl_set_profile_params_op_t s_ive_op; + } isvce_ctl_set_profile_params_op_t; + + /*****************************************************************************/ + /* Synchronous video encode call */ + /*****************************************************************************/ + typedef struct isvce_nalu_info_buf_t + { + /* For each NALU, following info will be copied as a csv string - */ + /* 'type,length,SId,TID,isIDR,isFirstSliceInLayer,isLastSliceInLayer' */ + UWORD8 *pu1_buf; + + UWORD32 u4_num_bytes; + + UWORD32 u4_buf_size; + } isvce_nalu_info_buf_t; + + typedef struct isvce_video_encode_ip_t + { + ive_video_encode_ip_t s_ive_ip; + + isvce_nalu_info_buf_t *ps_nalu_info_buf; + + } isvce_video_encode_ip_t; + + typedef struct isvce_video_encode_op_t + { + ive_video_encode_op_t s_ive_op; + + bool b_is_nalu_info_present; + + isvce_nalu_info_buf_t *ps_nalu_info_buf; + + } isvce_video_encode_op_t; + + /*****************************************************************************/ + /* Video usability information */ + /*****************************************************************************/ + typedef struct isvce_vui_ip_t + { + /** size of the structure */ + UWORD32 u4_size; + + /** Command type : ISVCE_CMD_VIDEO_CTL */ + ISVCE_API_COMMAND_TYPE_T e_cmd; + + /** Sub command type : ISVCE_CMD_CTL_SET_GOP_PARAMS */ + ISVCE_CONTROL_API_COMMAND_TYPE_T e_sub_cmd; + + /** indicates the presence of aspect_ratio */ + UWORD8 u1_aspect_ratio_info_present_flag; + + /** specifies the aspect ratio of the luma samples */ + UWORD8 u1_aspect_ratio_idc; + + /** width of the luma samples. user dependent */ + UWORD16 u2_sar_width; + + /** Height of the luma samples. user dependent */ + UWORD16 u2_sar_height; + + /** if 1, specifies that the overscan_appropriate_flag is present + * if 0, the preferred display method for the video signal is unspecified */ + UWORD8 u1_overscan_info_present_flag; + + /** if 1,indicates that the cropped decoded pictures output + * are suitable for display using overscan */ + UWORD8 u1_overscan_appropriate_flag; + + /** if 1 specifies that video_format, video_full_range_flag and + * colour_description_present_flag are present */ + UWORD8 u1_video_signal_type_present_flag; + + /** pal, secam, ntsc, ... */ + UWORD8 u1_video_format; + + /** indicates the black level and range of the luma and chroma signals */ + UWORD8 u1_video_full_range_flag; + + /** if 1,specifies that colour_primaries, transfer_characteristics + * and matrix_coefficients are present */ + UWORD8 u1_colour_description_present_flag; + + /** indicates the chromaticity coordinates of the source primaries */ + UWORD8 u1_colour_primaries; + + /** indicates the opto-electronic transfer characteristic of the source picture */ + UWORD8 u1_transfer_characteristics; + + /** the matrix coefficients used in deriving luma and chroma signals + * from the green, blue, and red primaries */ + UWORD8 u1_matrix_coefficients; + + /** if 1, specifies that chroma_sample_loc_type_top_field and + * chroma_sample_loc_type_bottom_field are present */ + UWORD8 u1_chroma_loc_info_present_flag; + + /** location of chroma samples */ + UWORD8 u1_chroma_sample_loc_type_top_field; + + UWORD8 u1_chroma_sample_loc_type_bottom_field; + + /** Indicates the presence of the num_units_in_ticks, time_scale flag */ + UWORD8 u1_vui_timing_info_present_flag; + + /** Number of units that correspond to one increment of the + * clock. Indicates the resolution */ + UWORD32 u4_vui_num_units_in_tick; + + /** The number of time units that pass in one second */ + UWORD32 u4_vui_time_scale; + + /** Flag indicating that time difference between two frames is a constant */ + UWORD8 u1_fixed_frame_rate_flag; + + /** Indicates the presence of NAL HRD parameters */ + UWORD8 u1_nal_hrd_parameters_present_flag; + + /** Indicates the presence of VCL HRD parameters */ + UWORD8 u1_vcl_hrd_parameters_present_flag; + + /** Specifies the HRD operational mode */ + UWORD8 u1_low_delay_hrd_flag; + + /** Indicates presence of SEI messages which include pic_struct syntax element */ + UWORD8 u1_pic_struct_present_flag; + + /** 1, specifies that the following cvs bitstream restriction parameters are present */ + UWORD8 u1_bitstream_restriction_flag; + + /** if 0, indicates that no pel outside the pic boundaries and + * no sub-pels derived using pels outside the pic boundaries is used for inter prediction */ + UWORD8 u1_motion_vectors_over_pic_boundaries_flag; + + /** Indicates a number of bytes not exceeded by the sum of the sizes of the VCL NAL units + * associated with any coded picture */ + UWORD8 u1_max_bytes_per_pic_denom; + + /** Indicates an upper bound for the number of bits of coding_unit() data */ + UWORD8 u1_max_bits_per_mb_denom; + + /** Indicate the maximum absolute value of a decoded horizontal MV component + * in quarter-pel luma units */ + UWORD8 u1_log2_max_mv_length_horizontal; + + /** Indicate the maximum absolute value of a decoded vertical MV component + * in quarter-pel luma units */ + UWORD8 u1_log2_max_mv_length_vertical; + + /** Max number of frames that are not synchronized in display and decode order */ + UWORD8 u1_num_reorder_frames; + + /** specifies required size of the HRD DPB in units of frame buffers */ + UWORD8 u1_max_dec_frame_buffering; + + } isvce_vui_ip_t; + + typedef struct isvce_vui_op_t + { + /** size of the structure */ + UWORD32 u4_size; + + /** Return error code */ + UWORD32 u4_error_code; + } isvce_vui_op_t; + + /*****************************************************************************/ + /* Video control Set SEI MDCV params */ + /*****************************************************************************/ + typedef struct isvce_ctl_set_sei_mdcv_params_ip_t + { + /** size of the structure */ + UWORD32 u4_size; + + /** Command type : ISVCE_CMD_VIDEO_CTL */ + ISVCE_API_COMMAND_TYPE_T e_cmd; + + /** Sub command type : ISVCE_CMD_CTL_SET_SEI_MDCV_PARAMS */ + ISVCE_CONTROL_API_COMMAND_TYPE_T e_sub_cmd; + + /** mastering display color volume info present flag */ + UWORD8 u1_sei_mdcv_params_present_flag; + + /** Array to store the display_primaries_x values */ + UWORD16 au2_display_primaries_x[3]; + + /** Array to store the display_primaries_y values */ + UWORD16 au2_display_primaries_y[3]; + + /** Variable to store the white point x value */ + UWORD16 u2_white_point_x; + + /** Variable to store the white point y value */ + UWORD16 u2_white_point_y; + + /** Variable to store the max display mastering luminance value */ + UWORD32 u4_max_display_mastering_luminance; + + /** Variable to store the min display mastering luminance value */ + UWORD32 u4_min_display_mastering_luminance; + + /** Lower 32bits of time stamp corresponding to input buffer, + * from which this command takes effect */ + UWORD32 u4_timestamp_low; + + /** Upper 32bits of time stamp corresponding to input buffer, + * from which this command takes effect */ + UWORD32 u4_timestamp_high; + + } isvce_ctl_set_sei_mdcv_params_ip_t; + + typedef struct isvce_ctl_set_sei_mdcv_params_op_t + { + /** size of the structure */ + UWORD32 u4_size; + + /** Return error code */ + UWORD32 u4_error_code; + + } isvce_ctl_set_sei_mdcv_params_op_t; + + /*****************************************************************************/ + /* Video control Set SEI CLL params */ + /*****************************************************************************/ + typedef struct isvce_ctl_set_sei_cll_params_ip_t + { + /** size of the structure */ + UWORD32 u4_size; + + /** Command type : ISVCE_CMD_VIDEO_CTL */ + ISVCE_API_COMMAND_TYPE_T e_cmd; + + /** Sub command type : ISVCE_CMD_CTL_SET_SEI_CLL_PARAMS */ + ISVCE_CONTROL_API_COMMAND_TYPE_T e_sub_cmd; + + /** content light level info present flag */ + UWORD8 u1_sei_cll_params_present_flag; + + /** The maximum pixel intensity of all samples */ + UWORD16 u2_max_content_light_level; + + /** The average pixel intensity of all samples */ + UWORD16 u2_max_pic_average_light_level; + + /** Lower 32bits of time stamp corresponding to input buffer, + * from which this command takes effect */ + UWORD32 u4_timestamp_low; + + /** Upper 32bits of time stamp corresponding to input buffer, + * from which this command takes effect */ + UWORD32 u4_timestamp_high; + + } isvce_ctl_set_sei_cll_params_ip_t; + + typedef struct isvce_ctl_set_sei_cll_params_op_t + { + /** size of the structure */ + UWORD32 u4_size; + + /** Return error code */ + UWORD32 u4_error_code; + + } isvce_ctl_set_sei_cll_params_op_t; + + /*****************************************************************************/ + /* Video control Set SEI AVE params */ + /*****************************************************************************/ + typedef struct isvce_ctl_set_sei_ave_params_ip_t + { + /** size of the structure */ + UWORD32 u4_size; + + /** Command type : ISVCE_CMD_VIDEO_CTL */ + ISVCE_API_COMMAND_TYPE_T e_cmd; + + /** Sub command type : ISVCE_CMD_CTL_SET_SEI_AVE_PARAMS */ + ISVCE_CONTROL_API_COMMAND_TYPE_T e_sub_cmd; + + /** ambient viewing environment info present flag */ + UWORD8 u1_sei_ave_params_present_flag; + + /** specifies the environmental illluminance of the ambient viewing + * environment */ + UWORD32 u4_ambient_illuminance; + + /** specify the normalized x chromaticity coordinates of the + * environmental ambient light in the nominal viewing environment */ + UWORD16 u2_ambient_light_x; + + /** specify the normalized y chromaticity coordinates of the + * environmental ambient light in the nominal viewing environment */ + UWORD16 u2_ambient_light_y; + + /** Lower 32bits of time stamp corresponding to input buffer, + * from which this command takes effect */ + UWORD32 u4_timestamp_low; + + /** Upper 32bits of time stamp corresponding to input buffer, + * from which this command takes effect */ + UWORD32 u4_timestamp_high; + + } isvce_ctl_set_sei_ave_params_ip_t; + + typedef struct isvce_ctl_set_sei_ave_params_op_t + { + /** size of the structure */ + UWORD32 u4_size; + + /** Return error code */ + UWORD32 u4_error_code; + + } isvce_ctl_set_sei_ave_params_op_t; + + /*****************************************************************************/ + /* Video control Set SEI CCV params */ + /*****************************************************************************/ + typedef struct isvce_ctl_set_sei_ccv_params_ip_t + { + /** size of the structure */ + UWORD32 u4_size; + + /** Command type : ISVCE_CMD_VIDEO_CTL */ + ISVCE_API_COMMAND_TYPE_T e_cmd; + + /** Sub command type : ISVCE_CMD_CTL_SET_SEI_CCV_PARAMS */ + ISVCE_CONTROL_API_COMMAND_TYPE_T e_sub_cmd; + + /** content color volume info present flag */ + UWORD8 u1_sei_ccv_params_present_flag; + + /** Flag used to control persistence of CCV SEI messages */ + UWORD8 u1_ccv_cancel_flag; + + /** specifies the persistence of the CCV SEI message for the + * current layer */ + UWORD8 u1_ccv_persistence_flag; + + /** specifies the presence of syntax elements ccv_primaries_x + * and ccv_primaries_y */ + UWORD8 u1_ccv_primaries_present_flag; + + /** specifies that the syntax element ccv_min_luminance_value + * is present */ + UWORD8 u1_ccv_min_luminance_value_present_flag; + + /** specifies that the syntax element ccv_max_luminance_value + * is present */ + UWORD8 u1_ccv_max_luminance_value_present_flag; + + /** specifies that the syntax element ccv_avg_luminance_value + * is present */ + UWORD8 u1_ccv_avg_luminance_value_present_flag; + + /** shall be equal to 0 in bitstreams conforming to this version. + * Other values for reserved_zero_2bits are reserved for future use */ + UWORD8 u1_ccv_reserved_zero_2bits; + + /** specify the normalized x chromaticity coordinates of the colour + * primary component c of the nominal content colour volume */ + WORD32 ai4_ccv_primaries_x[3]; + + /** specify the normalized y chromaticity coordinates of the colour + * primary component c of the nominal content colour volume */ + WORD32 ai4_ccv_primaries_y[3]; + + /** specifies the normalized minimum luminance value */ + UWORD32 u4_ccv_min_luminance_value; + + /** specifies the normalized maximum luminance value */ + UWORD32 u4_ccv_max_luminance_value; + + /** specifies the normalized average luminance value */ + UWORD32 u4_ccv_avg_luminance_value; + + /** Lower 32bits of time stamp corresponding to input buffer, + * from which this command takes effect */ + UWORD32 u4_timestamp_low; + + /** Upper 32bits of time stamp corresponding to input buffer, + * from which this command takes effect */ + UWORD32 u4_timestamp_high; + + } isvce_ctl_set_sei_ccv_params_ip_t; + + typedef struct isvce_ctl_set_sei_ccv_params_op_t + { + /** size of the structure */ + UWORD32 u4_size; + + /** Return error code */ + UWORD32 u4_error_code; + + } isvce_ctl_set_sei_ccv_params_op_t; + + /* The enum values should not have greater than 8 bits as this is assigned to WORD8 */ + typedef enum IV_MB_TYPE_T + { + INTRA16x16 = 0, + INTRA4x4, + INTER16x16 + } IV_MB_TYPE_T; + + /*****************************************************************************/ + /* Pic info structures */ + /*****************************************************************************/ + typedef struct isvce_pic_info1_t + { + /** Qp */ + UWORD32 u4_qp; + + /** Pic Type */ + IV_PICTURE_CODING_TYPE_T e_frame_type; + + } isvce_pic_info1_t; + + /*****************************************************************************/ + /* MB info structures */ + /*****************************************************************************/ + typedef struct isvce_mv_t + { + /** MV X */ + WORD16 i2_mv_x; + + /** MV Y */ + WORD16 i2_mv_y; + } isvce_mv_t; + + typedef struct isvce_mb_info1_t + { + /** Intra / Inter */ + WORD8 i1_mb_type; + + union + { + isvce_mv_t as_mv[1]; + + /** Intra mode */ + WORD8 ai1_intra_mode[1]; + }; + } isvce_mb_info1_t; + + typedef struct isvce_mb_info2_t + { + /** Intra / Inter */ + WORD8 i1_mb_type; + + /** SAD */ + UWORD16 u2_sad; + + union + { + isvce_mv_t as_mv[1]; + + /** Intra mode */ + WORD8 ai1_intra_mode[1]; + }; + + } isvce_mb_info2_t; + + typedef struct isvce_mb_info3_t + { + /** Intra / Inter */ + WORD8 i1_mb_type; + + union + { + isvce_mv_t as_mv[4]; + + /** Intra mode */ + WORD8 ai1_intra_mode[16]; + }; + + } isvce_mb_info3_t; + + typedef struct isvce_mb_info4_t + { + /** Intra / Inter */ + WORD8 i1_mb_type; + + /** Intra Mode */ + WORD8 i1_intra_mode; + + /** SAD */ + UWORD16 u2_sad; + + union + { + isvce_mv_t as_mv[16]; + + /** Intra mode */ + WORD8 ai1_intra_mode[16]; + }; + + } isvce_mb_info4_t; + + /* Add any new structures to the following union. It is used to calculate the + * max size needed for allocation of memory */ + typedef struct isvce_api_mb_info_t + { + union + { + isvce_mb_info1_t s_mb_info1; + isvce_mb_info2_t s_mb_info2; + isvce_mb_info3_t s_mb_info3; + isvce_mb_info4_t s_mb_info4; + }; + } isvce_api_mb_info_t; + + typedef struct isvce_pic_info2_t + { + /** Qp */ + UWORD32 u4_qp; + + /** Pic Type */ + IV_PICTURE_CODING_TYPE_T e_frame_type; + + /** Disable deblock level (0: Enable completely, 3: Disable completely */ + UWORD32 u4_disable_deblock_level; + + } isvce_pic_info2_t; + + typedef struct isvce_api_cmds_t + { + ISVCE_API_COMMAND_TYPE_T e_cmd; + + ISVCE_CONTROL_API_COMMAND_TYPE_T e_ctl_cmd; + } isvce_api_cmds_t; + + extern IV_STATUS_T isvce_api_function(iv_obj_t *ps_handle, void *pv_api_ip, void *pv_api_op, + isvce_api_cmds_t *ps_iv_api_cmds); + +#ifdef __cplusplus +} /* closing brace for extern "C" */ +#endif + +#endif diff --git a/encoder/svc/isvce_api.c b/encoder/svc/isvce_api.c new file mode 100644 index 0000000..52a583b --- /dev/null +++ b/encoder/svc/isvce_api.c @@ -0,0 +1,6054 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** +******************************************************************************* +* @file +* isvce_api.c +* +* @brief +* Contains api function definitions for H264 encoder +* +* @author +* ittiam +* +* @par List of Functions: +* - api_check_struct_sanity() +* - isvce_codec_update_config() +* - isvce_set_default_params() +* - isvce_init() +* - isvce_get_num_rec() +* - isvce_fill_num_mem_rec() +* - isvce_init_mem_rec() +* - isvce_retrieve_memrec() +* - isvce_set_flush_mode() +* - isvce_get_buf_info() +* - isvce_set_dimensions() +* - isvce_set_frame_rate() +* - isvce_set_bit_rate() +* - isvce_set_frame_type() +* - isvce_set_qp() +* - isvce_set_enc_mode() +* - isvce_set_vbv_params() +* - isvc_set_air_params() +* - isvc_set_me_params() +* - isvc_set_ipe_params() +* - isvc_set_gop_params() +* - isvc_set_profile_params() +* - isvc_set_deblock_params() +* - isvce_set_num_cores() +* - isvce_reset() +* - isvce_ctl() +* - isvce_api_function() +* +* @remarks +* None +* +******************************************************************************* +*/ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ih264_typedefs.h" +/* Dependencies of ih264_buf_mgr.h */ +/* Dependencies of ih264_list.h */ +#include "ih264_error.h" +/* Dependencies of ih264_common_tables.h */ +#include "ih264_defs.h" +#include "ih264_structs.h" +#include "ih264_buf_mgr.h" +#include "ih264_common_tables.h" +#include "ih264_dpb_mgr.h" +#include "ih264_list.h" +#include "ih264_platform_macros.h" +#include "ih264_trans_data.h" +#include "ih264_size_defs.h" +/* Dependencies of ih264e_cabac_structs.h */ +#include "ih264_cabac_tables.h" +/* Dependencies of ime_structs.h */ +#include "ime_defs.h" +#include "ime_distortion_metrics.h" +/* Dependencies of ih264e_structs.h */ +#include "iv2.h" +#include "ive2.h" +#include "ih264_defs.h" +#include "ih264_deblk_edge_filters.h" +#include "ih264_inter_pred_filters.h" +#include "ih264_structs.h" +#include "ih264_trans_quant_itrans_iquant.h" +/* Dependencies of ih264e_bitstream.h */ +#include "ih264e_error.h" +#include "ih264e_bitstream.h" +#include "ih264e_cabac_structs.h" +#include "irc_cntrl_param.h" +#include "irc_frame_info_collector.h" +#include "ime_statistics.h" +#include "ime_structs.h" +/* Dependencies of 'ih264e_utils.h' */ +#include "ih264e_defs.h" +#include "ih264e_rc_mem_interface.h" +#include "ih264e_structs.h" +#include "ih264e_utils.h" +#include "ih264e_version.h" +#include "ime.h" +#include "isvce.h" +#include "isvce_cabac.h" +#include "isvce_deblk.h" +#include "isvce_defs.h" +#include "isvce_downscaler.h" +#include "isvce_encode.h" +#include "isvce_encode_header.h" +#include "isvce_ibl_eval.h" +#include "isvce_ilp_mv.h" +#include "isvce_intra_modes_eval.h" +#include "isvce_me.h" +#include "isvce_platform_macros.h" +#include "isvce_rate_control.h" +#include "isvce_rc_mem_interface.h" +#include "isvce_residual_pred.h" +#include "isvce_sub_pic_rc.h" +#include "isvce_utils.h" + +/*****************************************************************************/ +/* Function Declarations */ +/*****************************************************************************/ + +/*****************************************************************************/ +/* Function Definitions */ +/*****************************************************************************/ + +/** +******************************************************************************* +* +* @brief +* Used to test arguments for corresponding API call +* +* @par Description: +* For each command the arguments are validated +* +* @param[in] ps_handle +* Codec handle at API level +* +* @param[in] pv_api_ip +* Pointer to input structure +* +* @param[out] pv_api_op +* Pointer to output structure +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +static IV_STATUS_T api_check_struct_sanity(iv_obj_t *ps_handle, void *pv_api_ip, void *pv_api_op, + isvce_api_cmds_t *ps_iv_api_cmds) +{ + WORD32 i, j; + + /* output structure expected by the api call */ + UWORD32 *pu4_api_op = pv_api_op; + + ISVCE_API_COMMAND_TYPE_T e_cmd = ps_iv_api_cmds->e_cmd; + ISVCE_CONTROL_API_COMMAND_TYPE_T e_ctl_cmd = ps_iv_api_cmds->e_ctl_cmd; + + if(NULL == pv_api_op || NULL == pv_api_ip) + { + return (IV_FAIL); + } + + /* set error code */ + pu4_api_op[1] = 0; + + /* error checks on handle */ + switch(e_cmd) + { + case ISVCE_CMD_GET_NUM_MEM_REC: + case ISVCE_CMD_FILL_NUM_MEM_REC: + { + break; + } + + case ISVCE_CMD_INIT: + { + if(ps_handle == NULL) + { + *(pu4_api_op + 1) |= 1 << IVE_UNSUPPORTEDPARAM; + *(pu4_api_op + 1) |= IVE_ERR_HANDLE_NULL; + return IV_FAIL; + } + + if(ps_handle->u4_size != sizeof(iv_obj_t)) + { + *(pu4_api_op + 1) |= 1 << IVE_UNSUPPORTEDPARAM; + *(pu4_api_op + 1) |= IVE_ERR_HANDLE_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + break; + } + case ISVCE_CMD_RETRIEVE_MEMREC: + case ISVCE_CMD_VIDEO_CTL: + case ISVCE_CMD_VIDEO_ENCODE: + { + if(ps_handle == NULL) + { + *(pu4_api_op + 1) |= 1 << IVE_UNSUPPORTEDPARAM; + *(pu4_api_op + 1) |= IVE_ERR_HANDLE_NULL; + return IV_FAIL; + } + + if(ps_handle->u4_size != sizeof(iv_obj_t)) + { + *(pu4_api_op + 1) |= 1 << IVE_UNSUPPORTEDPARAM; + *(pu4_api_op + 1) |= IVE_ERR_HANDLE_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if(ps_handle->pv_fxns != isvce_api_function) + { + *(pu4_api_op + 1) |= 1 << IVE_UNSUPPORTEDPARAM; + *(pu4_api_op + 1) |= IVE_ERR_API_FUNCTION_PTR_NULL; + return IV_FAIL; + } + + if(ps_handle->pv_codec_handle == NULL) + { + *(pu4_api_op + 1) |= 1 << IVE_UNSUPPORTEDPARAM; + *(pu4_api_op + 1) |= IVE_ERR_INVALID_CODEC_HANDLE; + return IV_FAIL; + } + + break; + } + default: + { + *(pu4_api_op + 1) |= 1 << IVE_UNSUPPORTEDPARAM; + *(pu4_api_op + 1) |= IVE_ERR_INVALID_API_CMD; + + return IV_FAIL; + } + } + + /* error checks on input output structures */ + switch(e_cmd) + { + case ISVCE_CMD_GET_NUM_MEM_REC: + { + isvce_num_mem_rec_ip_t *ps_ip = pv_api_ip; + isvce_num_mem_rec_op_t *ps_op = pv_api_op; + + ps_op->s_ive_op.u4_error_code = 0; + + if(ps_ip->s_ive_ip.u4_size != sizeof(isvce_num_mem_rec_ip_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IVE_ERR_IP_GET_MEM_REC_API_STRUCT_SIZE_INCORRECT; + return (IV_FAIL); + } + + if(ps_op->s_ive_op.u4_size != sizeof(isvce_num_mem_rec_op_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IVE_ERR_OP_GET_MEM_REC_API_STRUCT_SIZE_INCORRECT; + return (IV_FAIL); + } + + break; + } + case ISVCE_CMD_FILL_NUM_MEM_REC: + { + isvce_fill_mem_rec_ip_t *ps_ip = pv_api_ip; + isvce_fill_mem_rec_op_t *ps_op = pv_api_op; + + iv_mem_rec_t *ps_mem_rec = NULL; + + WORD32 max_wd = ALIGN16(ps_ip->s_ive_ip.u4_max_wd); + WORD32 max_ht = ALIGN16(ps_ip->s_ive_ip.u4_max_ht); + + ps_op->s_ive_op.u4_error_code = 0; + + if(ps_ip->s_ive_ip.u4_size != sizeof(isvce_fill_mem_rec_ip_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IVE_ERR_IP_FILL_MEM_REC_API_STRUCT_SIZE_INCORRECT; + return (IV_FAIL); + } + + if(ps_op->s_ive_op.u4_size != sizeof(isvce_fill_mem_rec_op_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IVE_ERR_OP_FILL_MEM_REC_API_STRUCT_SIZE_INCORRECT; + return (IV_FAIL); + } + + if(max_wd < MIN_WD || max_wd > MAX_WD) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IH264E_WIDTH_NOT_SUPPORTED; + return (IV_FAIL); + } + + if(max_ht < MIN_HT || max_ht > MAX_HT) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IH264E_HEIGHT_NOT_SUPPORTED; + return (IV_FAIL); + } + + /* verify number of mem rec ptr */ + if(NULL == ps_ip->s_ive_ip.ps_mem_rec) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IVE_ERR_FILL_NUM_MEM_RECS_POINTER_NULL; + return (IV_FAIL); + } + + /* verify number of mem records */ + if(ps_ip->s_ive_ip.u4_num_mem_rec != ISVCE_MEM_REC_CNT) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IVE_ERR_NUM_MEM_REC_NOT_SUFFICIENT; + return IV_FAIL; + } + + /* check mem records sizes are correct */ + ps_mem_rec = ps_ip->s_ive_ip.ps_mem_rec; + for(i = 0; i < ISVCE_MEM_REC_CNT; i++) + { + if(ps_mem_rec[i].u4_size != sizeof(iv_mem_rec_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IVE_ERR_MEM_REC_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + } + + break; + } + case ISVCE_CMD_INIT: + { + isvce_init_ip_t *ps_ip = pv_api_ip; + isvce_init_op_t *ps_op = pv_api_op; + + iv_mem_rec_t *ps_mem_rec = NULL; + + WORD32 max_wd = ALIGN16(ps_ip->s_ive_ip.u4_max_wd); + WORD32 max_ht = ALIGN16(ps_ip->s_ive_ip.u4_max_ht); + WORD32 wd = ALIGN16(ps_ip->u4_wd); + WORD32 ht = ALIGN16(ps_ip->u4_ht); + + ps_op->s_ive_op.u4_error_code = 0; + + if(ps_ip->s_ive_ip.u4_size != sizeof(isvce_init_ip_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IVE_ERR_IP_INIT_API_STRUCT_SIZE_INCORRECT; + return (IV_FAIL); + } + + if(ps_op->s_ive_op.u4_size != sizeof(isvce_init_op_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IVE_ERR_OP_INIT_API_STRUCT_SIZE_INCORRECT; + return (IV_FAIL); + } + + if(max_wd < MIN_WD || max_wd > MAX_WD) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IH264E_WIDTH_NOT_SUPPORTED; + return (IV_FAIL); + } + + if(max_ht < MIN_HT || max_ht > MAX_HT) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IH264E_HEIGHT_NOT_SUPPORTED; + return (IV_FAIL); + } + + if(ps_ip->s_ive_ip.u4_max_ref_cnt > MAX_REF_PIC_CNT || + ps_ip->s_ive_ip.u4_max_ref_cnt < MIN_REF_PIC_CNT) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IH264E_NUM_REF_UNSUPPORTED; + return (IV_FAIL); + } + + if(ps_ip->s_ive_ip.u4_max_reorder_cnt != 0) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IH264E_NUM_REORDER_UNSUPPORTED; + return (IV_FAIL); + } + + if((ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_10) && + (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_1B) && + (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_11) && + (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_12) && + (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_13) && + (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_20) && + (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_21) && + (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_22) && + (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_30) && + (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_31) && + (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_32) && + (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_40) && + (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_41) && + (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_42) && + (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_50) && + (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_51)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IH264E_CODEC_LEVEL_NOT_SUPPORTED; + return (IV_FAIL); + } + + if(ps_ip->s_ive_ip.e_inp_color_fmt != IV_YUV_420P) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IH264E_INPUT_CHROMA_FORMAT_NOT_SUPPORTED; + return (IV_FAIL); + } + + if(ps_ip->s_ive_ip.e_recon_color_fmt != IV_YUV_420P) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IH264E_RECON_CHROMA_FORMAT_NOT_SUPPORTED; + return (IV_FAIL); + } + + if((ps_ip->s_ive_ip.e_rc_mode != IVE_RC_NONE) && + (ps_ip->s_ive_ip.e_rc_mode != IVE_RC_STORAGE) && + (ps_ip->s_ive_ip.e_rc_mode != IVE_RC_CBR_NON_LOW_DELAY)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IH264E_RATE_CONTROL_MODE_NOT_SUPPORTED; + return (IV_FAIL); + } + + if(ps_ip->s_ive_ip.u4_max_framerate > DEFAULT_MAX_FRAMERATE) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IH264E_FRAME_RATE_NOT_SUPPORTED; + return (IV_FAIL); + } + + for(i = 0; i < ps_ip->s_svc_inp_params.u1_num_spatial_layers; i++) + { + if(ps_ip->pu4_max_bitrate[i] > DEFAULT_MAX_BITRATE) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IH264E_BITRATE_NOT_SUPPORTED; + return (IV_FAIL); + } + } + + if(ps_ip->s_ive_ip.u4_num_bframes > SVC_MAX_NUM_BFRAMES) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IH264E_BFRAMES_NOT_SUPPORTED; + return (IV_FAIL); + } + + if(ps_ip->s_ive_ip.u4_num_bframes && (ps_ip->s_ive_ip.u4_max_ref_cnt < 2)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IH264E_BFRAMES_NOT_SUPPORTED; + return (IV_FAIL); + } + + if(ps_ip->s_ive_ip.e_content_type != IV_PROGRESSIVE) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IH264E_CONTENT_TYPE_NOT_SUPPORTED; + return (IV_FAIL); + } + + if(ps_ip->s_ive_ip.u4_max_srch_rng_x > DEFAULT_MAX_SRCH_RANGE_X) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IH264E_HORIZONTAL_SEARCH_RANGE_NOT_SUPPORTED; + return (IV_FAIL); + } + + if(ps_ip->s_ive_ip.u4_max_srch_rng_y > DEFAULT_MAX_SRCH_RANGE_Y) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IH264E_VERTICAL_SEARCH_RANGE_NOT_SUPPORTED; + return (IV_FAIL); + } + + if(ps_ip->s_ive_ip.e_slice_mode != IVE_SLICE_MODE_NONE) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IH264E_SLICE_TYPE_INPUT_INVALID; + return (IV_FAIL); + } + + if(NULL == ps_ip->s_ive_ip.ps_mem_rec) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IVE_ERR_FILL_NUM_MEM_RECS_POINTER_NULL; + return (IV_FAIL); + } + + /* verify number of mem records */ + if(ps_ip->s_ive_ip.u4_num_mem_rec != ISVCE_MEM_REC_CNT) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IVE_ERR_NUM_MEM_REC_NOT_SUFFICIENT; + return (IV_FAIL); + } + + ps_mem_rec = ps_ip->s_ive_ip.ps_mem_rec; + + /* check memrecords sizes are correct */ + for(i = 0; i < ((WORD32) ps_ip->s_ive_ip.u4_num_mem_rec); i++) + { + if(ps_mem_rec[i].u4_size != sizeof(iv_mem_rec_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IVE_ERR_MEM_REC_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + /* check memrecords pointers are not NULL */ + if(ps_mem_rec[i].pv_base == NULL) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IVE_ERR_MEM_REC_BASE_POINTER_NULL; + return IV_FAIL; + } + } + + /* verify memtabs for overlapping regions */ + { + void *start[ISVCE_MEM_REC_CNT]; + void *end[ISVCE_MEM_REC_CNT]; + + start[0] = (ps_mem_rec[0].pv_base); + end[0] = ((UWORD8 *) ps_mem_rec[0].pv_base) + ps_mem_rec[0].u4_mem_size - 1; + + for(i = 1; i < ISVCE_MEM_REC_CNT; i++) + { + /* This array is populated to check memtab overlap */ + start[i] = (ps_mem_rec[i].pv_base); + end[i] = ((UWORD8 *) ps_mem_rec[i].pv_base) + ps_mem_rec[i].u4_mem_size - 1; + + for(j = 0; j < i; j++) + { + if((start[i] >= start[j]) && (start[i] <= end[j])) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IVE_ERR_MEM_REC_OVERLAP_ERR; + return IV_FAIL; + } + + if((end[i] >= start[j]) && (end[i] <= end[j])) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IVE_ERR_MEM_REC_OVERLAP_ERR; + return IV_FAIL; + } + + if((start[i] < start[j]) && (end[i] > end[j])) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IVE_ERR_MEM_REC_OVERLAP_ERR; + return IV_FAIL; + } + } + } + } + + /* re-validate mem records with init config */ + { + /* mem records */ + iv_mem_rec_t s_mem_rec_ittiam_api[ISVCE_MEM_REC_CNT]; + + /* api interface structs */ + isvce_fill_mem_rec_ip_t s_ip; + isvce_fill_mem_rec_op_t s_op; + + /* error status */ + IV_STATUS_T e_status; + + /* temp var */ + WORD32 i; + + isvce_api_cmds_t s_api_cmds = {ISVCE_CMD_FILL_NUM_MEM_REC, ISVCE_CMD_CT_NA}; + + s_ip.s_ive_ip.u4_size = sizeof(isvce_fill_mem_rec_ip_t); + s_op.s_ive_op.u4_size = sizeof(isvce_fill_mem_rec_op_t); + + s_ip.s_ive_ip.ps_mem_rec = s_mem_rec_ittiam_api; + s_ip.s_ive_ip.u4_max_wd = max_wd; + s_ip.s_ive_ip.u4_max_ht = max_ht; + s_ip.u4_wd = wd; + s_ip.u4_ht = ht; + s_ip.s_ive_ip.u4_num_mem_rec = ps_ip->s_ive_ip.u4_num_mem_rec; + s_ip.s_ive_ip.u4_max_level = ps_ip->s_ive_ip.u4_max_level; + s_ip.s_ive_ip.u4_max_ref_cnt = ps_ip->s_ive_ip.u4_max_ref_cnt; + s_ip.s_ive_ip.u4_max_reorder_cnt = ps_ip->s_ive_ip.u4_max_reorder_cnt; + s_ip.s_ive_ip.e_color_format = ps_ip->s_ive_ip.e_inp_color_fmt; + s_ip.s_ive_ip.u4_max_srch_rng_x = ps_ip->s_ive_ip.u4_max_srch_rng_x; + s_ip.s_ive_ip.u4_max_srch_rng_y = ps_ip->s_ive_ip.u4_max_srch_rng_y; + + s_ip.s_svc_inp_params = ps_ip->s_svc_inp_params; + + for(i = 0; i < ISVCE_MEM_REC_CNT; i++) + { + s_mem_rec_ittiam_api[i].u4_size = sizeof(iv_mem_rec_t); + } + + /* fill mem records */ + e_status = isvce_api_function(NULL, (void *) &s_ip, (void *) &s_op, &s_api_cmds); + + if(IV_FAIL == e_status) + { + ps_op->s_ive_op.u4_error_code = s_op.s_ive_op.u4_error_code; + return (IV_FAIL); + } + + /* verify mem records */ + for(i = 0; i < ISVCE_MEM_REC_CNT; i++) + { + if(ps_mem_rec[i].u4_mem_size < s_mem_rec_ittiam_api[i].u4_mem_size) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IVE_ERR_MEM_REC_INSUFFICIENT_SIZE; + + return IV_FAIL; + } + + if(ps_mem_rec[i].u4_mem_alignment != s_mem_rec_ittiam_api[i].u4_mem_alignment) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IVE_ERR_MEM_REC_ALIGNMENT_ERR; + + return IV_FAIL; + } + + if(ps_mem_rec[i].e_mem_type != s_mem_rec_ittiam_api[i].e_mem_type) + { + UWORD32 check = IV_SUCCESS; + UWORD32 diff = + s_mem_rec_ittiam_api[i].e_mem_type - ps_mem_rec[i].e_mem_type; + + if((ps_mem_rec[i].e_mem_type <= IV_EXTERNAL_CACHEABLE_SCRATCH_MEM) && + (s_mem_rec_ittiam_api[i].e_mem_type >= + IV_INTERNAL_NONCACHEABLE_PERSISTENT_MEM)) + { + check = IV_FAIL; + } + + if(3 != (s_mem_rec_ittiam_api[i].e_mem_type % 4)) + { + /* It is not IV_EXTERNAL_NONCACHEABLE_PERSISTENT_MEM or + * IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM */ + + if((diff < 1) || (diff > 3)) + { + /* Difference between 1 and 3 is okay for all cases other than + * the two filtered with the MOD condition above */ + check = IV_FAIL; + } + } + else + { + if(diff == 1) + { + /* This particular case is when codec asked for External + * Persistent, but got Internal Scratch */ + check = IV_FAIL; + } + if((diff != 2) && (diff != 3)) + { + check = IV_FAIL; + } + } + + if(check == IV_FAIL) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IVE_ERR_MEM_REC_INCORRECT_TYPE; + + return IV_FAIL; + } + } + } + } + + break; + } + case ISVCE_CMD_RETRIEVE_MEMREC: + { + isvce_retrieve_mem_rec_ip_t *ps_ip = pv_api_ip; + isvce_retrieve_mem_rec_op_t *ps_op = pv_api_op; + + iv_mem_rec_t *ps_mem_rec = NULL; + + ps_op->s_ive_op.u4_error_code = 0; + + if(ps_ip->s_ive_ip.u4_size != sizeof(isvce_retrieve_mem_rec_ip_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_IP_RETRIEVE_MEM_REC_API_STRUCT_SIZE_INCORRECT; + return (IV_FAIL); + } + + if(ps_op->s_ive_op.u4_size != sizeof(isvce_retrieve_mem_rec_op_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_OP_RETRIEVE_MEM_REC_API_STRUCT_SIZE_INCORRECT; + return (IV_FAIL); + } + + if(NULL == ps_ip->s_ive_ip.ps_mem_rec) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IVE_ERR_FILL_NUM_MEM_RECS_POINTER_NULL; + return (IV_FAIL); + } + + ps_mem_rec = ps_ip->s_ive_ip.ps_mem_rec; + + /* check memrecords sizes are correct */ + for(i = 0; i < ISVCE_MEM_REC_CNT; i++) + { + if(ps_mem_rec[i].u4_size != sizeof(iv_mem_rec_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IVE_ERR_MEM_REC_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + } + + break; + } + case ISVCE_CMD_VIDEO_ENCODE: + { + isvce_video_encode_ip_t *ps_ip = pv_api_ip; + isvce_video_encode_op_t *ps_op = pv_api_op; + + if(ps_ip->s_ive_ip.u4_size != sizeof(isvce_video_encode_ip_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IVE_ERR_IP_ENCODE_API_STRUCT_SIZE_INCORRECT; + return (IV_FAIL); + } + + if(ps_op->s_ive_op.u4_size != sizeof(isvce_video_encode_op_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IVE_ERR_OP_ENCODE_API_STRUCT_SIZE_INCORRECT; + return (IV_FAIL); + } + + break; + } + case ISVCE_CMD_VIDEO_CTL: + { + switch(e_ctl_cmd) + { + case ISVCE_CMD_CTL_GET_ENC_FRAME_DIMENSIONS: + { + break; + } + case ISVCE_CMD_CTL_SETDEFAULT: + { + isvce_ctl_setdefault_ip_t *ps_ip = pv_api_ip; + isvce_ctl_setdefault_op_t *ps_op = pv_api_op; + + if(ps_ip->s_ive_ip.u4_size != sizeof(isvce_ctl_setdefault_ip_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_IP_CTL_SETDEF_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if(ps_op->s_ive_op.u4_size != sizeof(isvce_ctl_setdefault_op_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_OP_CTL_SETDEF_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + break; + } + case ISVCE_CMD_CTL_GETBUFINFO: + { + isvce_ctl_getbufinfo_ip_t *ps_ip = pv_api_ip; + isvce_ctl_getbufinfo_op_t *ps_op = pv_api_op; + + if(ps_ip->s_ive_ip.u4_size != sizeof(isvce_ctl_getbufinfo_ip_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_IP_CTL_GETBUFINFO_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if(ps_op->s_ive_op.u4_size != sizeof(isvce_ctl_getbufinfo_op_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_OP_CTL_GETBUFINFO_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if(ps_ip->s_ive_ip.u4_max_wd < MIN_WD) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IH264E_WIDTH_NOT_SUPPORTED; + return (IV_FAIL); + } + + if(ps_ip->s_ive_ip.u4_max_ht < MIN_HT) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IH264E_HEIGHT_NOT_SUPPORTED; + return (IV_FAIL); + } + + if((ps_ip->s_ive_ip.e_inp_color_fmt != IV_YUV_420P) && + (ps_ip->s_ive_ip.e_inp_color_fmt != IV_YUV_422ILE) && + (ps_ip->s_ive_ip.e_inp_color_fmt != IV_YUV_420SP_UV) && + (ps_ip->s_ive_ip.e_inp_color_fmt != IV_YUV_420SP_VU)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IH264E_INPUT_CHROMA_FORMAT_NOT_SUPPORTED; + return (IV_FAIL); + } + + break; + } + case ISVCE_CMD_CTL_GETVERSION: + { + isvce_ctl_getversioninfo_ip_t *ps_ip = pv_api_ip; + isvce_ctl_getversioninfo_op_t *ps_op = pv_api_op; + + if(ps_ip->s_ive_ip.u4_size != sizeof(isvce_ctl_getversioninfo_ip_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_IP_CTL_GETVERSION_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if(ps_op->s_ive_op.u4_size != sizeof(isvce_ctl_getversioninfo_op_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_OP_CTL_GETVERSION_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if(ps_ip->s_ive_ip.pu1_version == NULL) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IVE_ERR_CTL_GET_VERSION_BUFFER_IS_NULL; + return IV_FAIL; + } + + break; + } + case ISVCE_CMD_CTL_FLUSH: + { + isvce_ctl_flush_ip_t *ps_ip = pv_api_ip; + isvce_ctl_flush_op_t *ps_op = pv_api_op; + + if(ps_ip->s_ive_ip.u4_size != sizeof(isvce_ctl_flush_ip_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_IP_CTL_FLUSH_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if(ps_op->s_ive_op.u4_size != sizeof(isvce_ctl_flush_op_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_OP_CTL_FLUSH_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + break; + } + case ISVCE_CMD_CTL_RESET: + { + isvce_ctl_reset_ip_t *ps_ip = pv_api_ip; + isvce_ctl_reset_op_t *ps_op = pv_api_op; + + if(ps_ip->s_ive_ip.u4_size != sizeof(isvce_ctl_reset_ip_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_IP_CTL_RESET_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if(ps_op->s_ive_op.u4_size != sizeof(isvce_ctl_reset_op_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_OP_CTL_RESET_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + break; + } + case ISVCE_CMD_CTL_SET_NUM_CORES: + { + isvce_ctl_set_num_cores_ip_t *ps_ip = pv_api_ip; + isvce_ctl_set_num_cores_op_t *ps_op = pv_api_op; + + if(ps_ip->s_ive_ip.u4_size != sizeof(isvce_ctl_set_num_cores_ip_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_IP_CTL_SETCORES_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if(ps_op->s_ive_op.u4_size != sizeof(isvce_ctl_set_num_cores_op_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_OP_CTL_SETCORES_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if((ps_ip->s_ive_ip.u4_num_cores < 1) || + (ps_ip->s_ive_ip.u4_num_cores > MAX_NUM_CORES)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IH264E_INVALID_NUM_CORES; + return IV_FAIL; + } + + break; + } + case ISVCE_CMD_CTL_SET_DIMENSIONS: + { + isvce_codec_t *ps_codec = (isvce_codec_t *) (ps_handle->pv_codec_handle); + + isvce_ctl_set_dimensions_ip_t *ps_ip = pv_api_ip; + isvce_ctl_set_dimensions_op_t *ps_op = pv_api_op; + + if(ps_ip->s_ive_ip.u4_size != sizeof(isvce_ctl_set_dimensions_ip_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_IP_CTL_SETDIM_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if(ps_op->s_ive_op.u4_size != sizeof(isvce_ctl_set_dimensions_op_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_OP_CTL_SETDIM_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if(ps_ip->s_ive_ip.u4_wd < MIN_WD) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IH264E_WIDTH_NOT_SUPPORTED; + return (IV_FAIL); + } + + if(ps_ip->s_ive_ip.u4_wd > ps_codec->s_cfg.u4_max_wd) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IH264E_WIDTH_NOT_SUPPORTED; + return (IV_FAIL); + } + + if(ps_ip->s_ive_ip.u4_ht < MIN_HT) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IH264E_HEIGHT_NOT_SUPPORTED; + return (IV_FAIL); + } + + if(ps_ip->s_ive_ip.u4_ht > ps_codec->s_cfg.u4_max_ht) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IH264E_HEIGHT_NOT_SUPPORTED; + return (IV_FAIL); + } + + if(ps_ip->s_ive_ip.u4_wd & 1) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IH264E_WIDTH_NOT_SUPPORTED; + return (IV_FAIL); + } + + if(ps_ip->s_ive_ip.u4_ht & 1) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IH264E_HEIGHT_NOT_SUPPORTED; + return (IV_FAIL); + } + + break; + } + case ISVCE_CMD_CTL_SET_FRAMERATE: + { + isvce_ctl_set_frame_rate_ip_t *ps_ip = pv_api_ip; + isvce_ctl_set_frame_rate_op_t *ps_op = pv_api_op; + + if(ps_ip->s_ive_ip.u4_size != sizeof(isvce_ctl_set_frame_rate_ip_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_IP_CTL_SETFRAMERATE_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if(ps_op->s_ive_op.u4_size != sizeof(isvce_ctl_set_frame_rate_op_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_OP_CTL_SETFRAMERATE_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if(((ps_ip->s_ive_ip.u4_src_frame_rate * 1000) > DEFAULT_MAX_FRAMERATE) || + ((ps_ip->s_ive_ip.u4_tgt_frame_rate * 1000) > DEFAULT_MAX_FRAMERATE)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IH264E_FRAME_RATE_NOT_SUPPORTED; + return (IV_FAIL); + } + + if((ps_ip->s_ive_ip.u4_src_frame_rate == 0) || + (ps_ip->s_ive_ip.u4_tgt_frame_rate == 0)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IH264E_FRAME_RATE_NOT_SUPPORTED; + return (IV_FAIL); + } + + if(ps_ip->s_ive_ip.u4_tgt_frame_rate > ps_ip->s_ive_ip.u4_src_frame_rate) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IH264E_TGT_FRAME_RATE_EXCEEDS_SRC_FRAME_RATE; + return (IV_FAIL); + } + + break; + } + case ISVCE_CMD_CTL_SET_BITRATE: + { + isvce_ctl_set_bitrate_ip_t *ps_ip = pv_api_ip; + isvce_ctl_set_bitrate_op_t *ps_op = pv_api_op; + + isvce_codec_t *ps_codec = (isvce_codec_t *) (ps_handle->pv_codec_handle); + + if(ps_ip->s_ive_ip.u4_size != sizeof(isvce_ctl_set_bitrate_ip_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_IP_CTL_SETBITRATE_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if(ps_op->s_ive_op.u4_size != sizeof(isvce_ctl_set_bitrate_op_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_OP_CTL_SETBITRATE_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + for(i = 0; i < ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers; i++) + { + if((ps_ip->pu4_target_bitrate[i] > DEFAULT_MAX_BITRATE) || + (ps_ip->pu4_target_bitrate[i] == 0)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IH264E_BITRATE_NOT_SUPPORTED; + return (IV_FAIL); + } + } + + break; + } + case ISVCE_CMD_CTL_SET_FRAMETYPE: + { + isvce_ctl_set_frame_type_ip_t *ps_ip = pv_api_ip; + isvce_ctl_set_frame_type_op_t *ps_op = pv_api_op; + + if(ps_ip->s_ive_ip.u4_size != sizeof(isvce_ctl_set_frame_type_ip_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_IP_CTL_SETFRAMETYPE_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if(ps_op->s_ive_op.u4_size != sizeof(isvce_ctl_set_frame_type_op_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_OP_CTL_SETFRAMETYPE_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if((ps_ip->s_ive_ip.e_frame_type != IV_I_FRAME) && + (ps_ip->s_ive_ip.e_frame_type != IV_P_FRAME) && + (ps_ip->s_ive_ip.e_frame_type != IV_IDR_FRAME)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IH264E_INVALID_FORCE_FRAME_INPUT; + return IV_FAIL; + } + + break; + } + case ISVCE_CMD_CTL_SET_ME_PARAMS: + { + isvce_codec_t *ps_codec = (isvce_codec_t *) (ps_handle->pv_codec_handle); + + isvce_ctl_set_me_params_ip_t *ps_ip = pv_api_ip; + isvce_ctl_set_me_params_op_t *ps_op = pv_api_op; + + if(ps_ip->s_ive_ip.u4_size != sizeof(isvce_ctl_set_me_params_ip_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_IP_CTL_SETMEPARAMS_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if(ps_op->s_ive_op.u4_size != sizeof(isvce_ctl_set_me_params_op_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_OP_CTL_SETMEPARAMS_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if(ps_ip->s_ive_ip.u4_me_speed_preset != DMND_SRCH) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IH264E_INVALID_ME_SPEED_PRESET; + return IV_FAIL; + } + + if((ps_ip->s_ive_ip.u4_enable_hpel != 0) && + (ps_ip->s_ive_ip.u4_enable_hpel != 1)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IH264E_INVALID_HALFPEL_OPTION; + return IV_FAIL; + } + + if((ps_ip->s_ive_ip.u4_enable_qpel != 0) && + (ps_ip->s_ive_ip.u4_enable_qpel != 1)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IH264E_INVALID_QPEL_OPTION; + return IV_FAIL; + } + + if((ps_ip->s_ive_ip.u4_enable_fast_sad != 0)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IH264E_INVALID_FAST_SAD_OPTION; + return IV_FAIL; + } + + if(ps_ip->s_ive_ip.u4_enable_alt_ref > 0) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IH264E_INVALID_ALT_REF_OPTION; + return IV_FAIL; + } + + if(ps_ip->s_ive_ip.u4_srch_rng_x > ps_codec->s_cfg.u4_max_srch_rng_x) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IH264E_HORIZONTAL_SEARCH_RANGE_NOT_SUPPORTED; + return (IV_FAIL); + } + + if(ps_ip->s_ive_ip.u4_srch_rng_y > ps_codec->s_cfg.u4_max_srch_rng_y) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IH264E_VERTICAL_SEARCH_RANGE_NOT_SUPPORTED; + return (IV_FAIL); + } + + break; + } + case ISVCE_CMD_CTL_SET_IPE_PARAMS: + { + isvce_ctl_set_ipe_params_ip_t *ps_ip = pv_api_ip; + isvce_ctl_set_ipe_params_op_t *ps_op = pv_api_op; + + if(ps_ip->s_ive_ip.u4_size != sizeof(isvce_ctl_set_ipe_params_ip_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_IP_CTL_SETIPEPARAMS_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if(ps_op->s_ive_op.u4_size != sizeof(isvce_ctl_set_ipe_params_op_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_OP_CTL_SETIPEPARAMS_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if((ps_ip->s_ive_ip.u4_enable_intra_4x4 != 0) && + (ps_ip->s_ive_ip.u4_enable_intra_4x4 != 1)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IH264E_INVALID_INTRA4x4_OPTION; + return IV_FAIL; + } + + if((ps_ip->s_ive_ip.u4_enc_speed_preset != IVE_CONFIG) && + (ps_ip->s_ive_ip.u4_enc_speed_preset != IVE_SLOWEST) && + (ps_ip->s_ive_ip.u4_enc_speed_preset != IVE_NORMAL) && + (ps_ip->s_ive_ip.u4_enc_speed_preset != IVE_FAST) && + (ps_ip->s_ive_ip.u4_enc_speed_preset != IVE_HIGH_SPEED) && + (ps_ip->s_ive_ip.u4_enc_speed_preset != IVE_FASTEST)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IH264E_INVALID_ENC_SPEED_PRESET; + return IV_FAIL; + } + + break; + } + case ISVCE_CMD_CTL_SET_GOP_PARAMS: + { + isvce_ctl_set_gop_params_ip_t *ps_ip = pv_api_ip; + isvce_ctl_set_gop_params_op_t *ps_op = pv_api_op; + + if(ps_ip->s_ive_ip.u4_size != sizeof(isvce_ctl_set_gop_params_ip_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_IP_CTL_SETGOPPARAMS_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if(ps_op->s_ive_op.u4_size != sizeof(isvce_ctl_set_gop_params_op_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_OP_CTL_SETGOPPARAMS_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if((ps_ip->s_ive_ip.u4_i_frm_interval < DEFAULT_MIN_INTRA_FRAME_RATE) || + (ps_ip->s_ive_ip.u4_i_frm_interval > DEFAULT_MAX_INTRA_FRAME_RATE)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IH264E_INVALID_INTRA_FRAME_INTERVAL; + return IV_FAIL; + } + + if((ps_ip->s_ive_ip.u4_idr_frm_interval < DEFAULT_MIN_INTRA_FRAME_RATE) || + (ps_ip->s_ive_ip.u4_idr_frm_interval > DEFAULT_MAX_INTRA_FRAME_RATE)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IH264E_INVALID_IDR_FRAME_INTERVAL; + return IV_FAIL; + } + + break; + } + case ISVCE_CMD_CTL_SET_DEBLOCK_PARAMS: + { + isvce_ctl_set_deblock_params_ip_t *ps_ip = pv_api_ip; + isvce_ctl_set_deblock_params_op_t *ps_op = pv_api_op; + + if(ps_ip->s_ive_ip.u4_size != sizeof(isvce_ctl_set_deblock_params_ip_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_IP_CTL_SETDEBLKPARAMS_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if(ps_op->s_ive_op.u4_size != sizeof(isvce_ctl_set_deblock_params_op_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_OP_CTL_SETDEBLKPARAMS_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if((ps_ip->s_ive_ip.u4_disable_deblock_level != DISABLE_DEBLK_LEVEL_0) && + (ps_ip->s_ive_ip.u4_disable_deblock_level != DISABLE_DEBLK_LEVEL_2) && + (ps_ip->s_ive_ip.u4_disable_deblock_level != DISABLE_DEBLK_LEVEL_3) && + (ps_ip->s_ive_ip.u4_disable_deblock_level != DISABLE_DEBLK_LEVEL_4)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IH264E_INVALID_DEBLOCKING_TYPE_INPUT; + return IV_FAIL; + } + + break; + } + case ISVCE_CMD_CTL_SET_QP: + { + isvce_ctl_set_qp_ip_t *ps_ip = pv_api_ip; + isvce_ctl_set_qp_op_t *ps_op = pv_api_op; + + isvce_codec_t *ps_codec = (isvce_codec_t *) (ps_handle->pv_codec_handle); + + if(ps_ip->s_ive_ip.u4_size != sizeof(isvce_ctl_set_qp_ip_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_IP_CTL_SETQPPARAMS_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if(ps_op->s_ive_op.u4_size != sizeof(isvce_ctl_set_qp_op_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_OP_CTL_SETQPPARAMS_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + for(i = 0; i < ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers; i++) + { + if((ps_ip->pu4_i_qp_max[i] > MAX_H264_QP) || + (ps_ip->pu4_p_qp_max[i] > MAX_H264_QP) || + (ps_ip->pu4_b_qp_max[i] > MAX_H264_QP)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IH264E_INVALID_MAX_FRAME_QP; + return IV_FAIL; + } + + /* We donot support QP < 4 */ + if((((WORD32) ps_ip->pu4_i_qp_min[i]) < MIN_H264_QP) || + ((WORD32) ps_ip->pu4_p_qp_min[i] < MIN_H264_QP) || + (((WORD32) ps_ip->pu4_b_qp_min[i]) < MIN_H264_QP) || + (ps_ip->pu4_i_qp_min[i] > ps_ip->pu4_i_qp_max[i]) || + (ps_ip->pu4_p_qp_min[i] > ps_ip->pu4_p_qp_max[i]) || + (ps_ip->pu4_b_qp_min[i] > ps_ip->pu4_b_qp_max[i])) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IH264E_INVALID_MIN_FRAME_QP; + return IV_FAIL; + } + + if((ps_ip->pu4_i_qp[i] > ps_ip->pu4_i_qp_max[i]) || + (ps_ip->pu4_p_qp[i] > ps_ip->pu4_p_qp_max[i]) || + (ps_ip->pu4_b_qp[i] > ps_ip->pu4_b_qp_max[i])) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IH264E_INVALID_INIT_QP; + return IV_FAIL; + } + + if((ps_ip->pu4_i_qp[i] < ps_ip->pu4_i_qp_min[i]) || + (ps_ip->pu4_p_qp[i] < ps_ip->pu4_p_qp_min[i]) || + (ps_ip->pu4_b_qp[i] < ps_ip->pu4_b_qp_min[i])) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IH264E_INVALID_INIT_QP; + return IV_FAIL; + } + } + + break; + } + case ISVCE_CMD_CTL_SET_VUI_PARAMS: + { + isvce_vui_ip_t *ps_ip = pv_api_ip; + isvce_vui_op_t *ps_op = pv_api_op; + + if(ps_ip->u4_size != sizeof(isvce_vui_ip_t)) + { + ps_op->u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->u4_error_code |= IVE_ERR_IP_CTL_SET_VUI_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if(ps_op->u4_size != sizeof(isvce_vui_op_t)) + { + ps_op->u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->u4_error_code |= IVE_ERR_OP_CTL_SET_VUI_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + break; + } + case ISVCE_CMD_CTL_SET_SEI_MDCV_PARAMS: + { + isvce_ctl_set_sei_mdcv_params_ip_t *ps_ip = pv_api_ip; + isvce_ctl_set_sei_mdcv_params_op_t *ps_op = pv_api_op; + + if(ps_ip->u4_size != sizeof(isvce_ctl_set_sei_mdcv_params_ip_t)) + { + ps_op->u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->u4_error_code |= IVE_ERR_IP_CTL_SET_SEI_MDCV_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if(ps_op->u4_size != sizeof(isvce_ctl_set_sei_mdcv_params_op_t)) + { + ps_op->u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->u4_error_code |= IVE_ERR_OP_CTL_SET_SEI_MDCV_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if((ps_ip->u1_sei_mdcv_params_present_flag != 0) && + (ps_ip->u1_sei_mdcv_params_present_flag) != 1) + { + ps_op->u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->u4_error_code |= IH264E_INVALID_SEI_MDCV_PARAMS; + return IV_FAIL; + } + + if(1 == ps_ip->u1_sei_mdcv_params_present_flag) + { + /* Check values for u2_display_primaries_x and + * u2_display_primaries_y */ + for(i = 0; i < 3; i++) + { + if((ps_ip->au2_display_primaries_x[i] > + DISPLAY_PRIMARIES_X_UPPER_LIMIT) || + (ps_ip->au2_display_primaries_x[i] < + DISPLAY_PRIMARIES_X_LOWER_LIMIT) || + ((ps_ip->au2_display_primaries_x[i] % + DISPLAY_PRIMARIES_X_DIVISION_FACTOR) != 0)) + { + ps_op->u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->u4_error_code |= IH264E_INVALID_SEI_MDCV_PARAMS; + return IV_FAIL; + } + + if((ps_ip->au2_display_primaries_y[i] > + DISPLAY_PRIMARIES_Y_UPPER_LIMIT) || + (ps_ip->au2_display_primaries_y[i] < + DISPLAY_PRIMARIES_Y_LOWER_LIMIT) || + ((ps_ip->au2_display_primaries_y[i] % + DISPLAY_PRIMARIES_Y_DIVISION_FACTOR) != 0)) + { + ps_op->u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->u4_error_code |= IH264E_INVALID_SEI_MDCV_PARAMS; + return IV_FAIL; + } + } + + if((ps_ip->u2_white_point_x > WHITE_POINT_X_UPPER_LIMIT) || + (ps_ip->u2_white_point_x < WHITE_POINT_X_LOWER_LIMIT) || + ((ps_ip->u2_white_point_x % WHITE_POINT_X_DIVISION_FACTOR) != 0)) + { + ps_op->u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->u4_error_code |= IH264E_INVALID_SEI_MDCV_PARAMS; + return IV_FAIL; + } + + if((ps_ip->u2_white_point_y > WHITE_POINT_Y_UPPER_LIMIT) || + (ps_ip->u2_white_point_y < WHITE_POINT_Y_LOWER_LIMIT) || + ((ps_ip->u2_white_point_y % WHITE_POINT_Y_DIVISION_FACTOR) != 0)) + { + ps_op->u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->u4_error_code |= IH264E_INVALID_SEI_MDCV_PARAMS; + return IV_FAIL; + } + + if((ps_ip->u4_max_display_mastering_luminance > + MAX_DISPLAY_MASTERING_LUMINANCE_UPPER_LIMIT) || + (ps_ip->u4_max_display_mastering_luminance < + MAX_DISPLAY_MASTERING_LUMINANCE_LOWER_LIMIT) || + ((ps_ip->u4_max_display_mastering_luminance % + MAX_DISPLAY_MASTERING_LUMINANCE_DIVISION_FACTOR) != 0)) + { + ps_op->u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->u4_error_code |= IH264E_INVALID_SEI_MDCV_PARAMS; + return IV_FAIL; + } + + if((ps_ip->u4_min_display_mastering_luminance > + MIN_DISPLAY_MASTERING_LUMINANCE_UPPER_LIMIT) || + (ps_ip->u4_min_display_mastering_luminance < + MIN_DISPLAY_MASTERING_LUMINANCE_LOWER_LIMIT)) + { + ps_op->u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->u4_error_code |= IH264E_INVALID_SEI_MDCV_PARAMS; + return IV_FAIL; + } + + if(ps_ip->u4_max_display_mastering_luminance <= + ps_ip->u4_min_display_mastering_luminance) + { + ps_op->u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->u4_error_code |= IH264E_INVALID_SEI_MDCV_PARAMS; + return IV_FAIL; + } + } + + break; + } + case ISVCE_CMD_CTL_SET_SEI_CLL_PARAMS: + { + isvce_ctl_set_sei_cll_params_ip_t *ps_ip = pv_api_ip; + isvce_ctl_set_sei_cll_params_op_t *ps_op = pv_api_op; + + if(ps_ip->u4_size != sizeof(isvce_ctl_set_sei_cll_params_ip_t)) + { + ps_op->u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->u4_error_code |= IVE_ERR_IP_CTL_SET_SEI_CLL_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if(ps_op->u4_size != sizeof(isvce_ctl_set_sei_cll_params_op_t)) + { + ps_op->u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->u4_error_code |= IVE_ERR_OP_CTL_SET_SEI_CLL_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if((ps_ip->u1_sei_cll_params_present_flag != 0) && + (ps_ip->u1_sei_cll_params_present_flag != 1)) + { + ps_op->u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->u4_error_code |= IH264E_INVALID_SEI_CLL_PARAMS; + return IV_FAIL; + } + + break; + } + case ISVCE_CMD_CTL_SET_SEI_AVE_PARAMS: + { + isvce_ctl_set_sei_ave_params_ip_t *ps_ip = pv_api_ip; + isvce_ctl_set_sei_ave_params_op_t *ps_op = pv_api_op; + + if(ps_ip->u4_size != sizeof(isvce_ctl_set_sei_ave_params_ip_t)) + { + ps_op->u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->u4_error_code |= IVE_ERR_IP_CTL_SET_SEI_AVE_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if(ps_op->u4_size != sizeof(isvce_ctl_set_sei_ave_params_op_t)) + { + ps_op->u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->u4_error_code |= IVE_ERR_OP_CTL_SET_SEI_AVE_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if((ps_ip->u1_sei_ave_params_present_flag != 0) && + (ps_ip->u1_sei_ave_params_present_flag != 1)) + { + ps_op->u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->u4_error_code |= IH264E_INVALID_SEI_AVE_PARAMS; + return IV_FAIL; + } + + if(1 == ps_ip->u1_sei_ave_params_present_flag) + { + if((0 == ps_ip->u4_ambient_illuminance)) + { + ps_op->u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->u4_error_code |= IH264E_INVALID_SEI_AVE_PARAMS; + return IV_FAIL; + } + + if(ps_ip->u2_ambient_light_x > AMBIENT_LIGHT_X_UPPER_LIMIT) + { + ps_op->u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->u4_error_code |= IH264E_INVALID_SEI_AVE_PARAMS; + return IV_FAIL; + } + + if(ps_ip->u2_ambient_light_y > AMBIENT_LIGHT_Y_UPPER_LIMIT) + { + ps_op->u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->u4_error_code |= IH264E_INVALID_SEI_AVE_PARAMS; + return IV_FAIL; + } + } + + break; + } + case ISVCE_CMD_CTL_SET_SEI_CCV_PARAMS: + { + isvce_ctl_set_sei_ccv_params_ip_t *ps_ip = pv_api_ip; + isvce_ctl_set_sei_ccv_params_op_t *ps_op = pv_api_op; + + if(ps_ip->u4_size != sizeof(isvce_ctl_set_sei_ccv_params_ip_t)) + { + ps_op->u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->u4_error_code |= IVE_ERR_IP_CTL_SET_SEI_CCV_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if(ps_op->u4_size != sizeof(isvce_ctl_set_sei_ccv_params_op_t)) + { + ps_op->u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->u4_error_code |= IVE_ERR_OP_CTL_SET_SEI_CCV_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if((ps_ip->u1_sei_ccv_params_present_flag != 0) && + (ps_ip->u1_sei_ccv_params_present_flag != 1)) + { + ps_op->u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->u4_error_code |= IH264E_INVALID_SEI_CCV_PARAMS; + return IV_FAIL; + } + + if(1 == ps_ip->u1_sei_ccv_params_present_flag) + { + if((ps_ip->u1_ccv_cancel_flag != 0) && (ps_ip->u1_ccv_cancel_flag != 1)) + { + ps_op->u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->u4_error_code |= IH264E_INVALID_SEI_CCV_PARAMS; + return IV_FAIL; + } + + if(0 == ps_ip->u1_ccv_cancel_flag) + { + if((ps_ip->u1_ccv_persistence_flag != 0) && + (ps_ip->u1_ccv_persistence_flag != 1)) + { + ps_op->u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->u4_error_code |= IH264E_INVALID_SEI_CCV_PARAMS; + return IV_FAIL; + } + if((ps_ip->u1_ccv_primaries_present_flag != 0) && + (ps_ip->u1_ccv_primaries_present_flag != 1)) + { + ps_op->u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->u4_error_code |= IH264E_INVALID_SEI_CCV_PARAMS; + return IV_FAIL; + } + if((ps_ip->u1_ccv_min_luminance_value_present_flag != 0) && + (ps_ip->u1_ccv_min_luminance_value_present_flag != 1)) + { + ps_op->u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->u4_error_code |= IH264E_INVALID_SEI_CCV_PARAMS; + return IV_FAIL; + } + if((ps_ip->u1_ccv_max_luminance_value_present_flag != 0) && + (ps_ip->u1_ccv_max_luminance_value_present_flag != 1)) + { + ps_op->u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->u4_error_code |= IH264E_INVALID_SEI_CCV_PARAMS; + return IV_FAIL; + } + if((ps_ip->u1_ccv_avg_luminance_value_present_flag != 0) && + (ps_ip->u1_ccv_avg_luminance_value_present_flag != 1)) + { + ps_op->u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->u4_error_code |= IH264E_INVALID_SEI_CCV_PARAMS; + return IV_FAIL; + } + if((ps_ip->u1_ccv_primaries_present_flag == 0) && + (ps_ip->u1_ccv_min_luminance_value_present_flag == 0) && + (ps_ip->u1_ccv_max_luminance_value_present_flag == 0) && + (ps_ip->u1_ccv_avg_luminance_value_present_flag == 0)) + { + ps_op->u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->u4_error_code |= IH264E_INVALID_SEI_CCV_PARAMS; + return IV_FAIL; + } + + if((ps_ip->u1_ccv_reserved_zero_2bits != 0)) + { + ps_op->u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->u4_error_code |= IH264E_INVALID_SEI_CCV_PARAMS; + return IV_FAIL; + } + + if(1 == ps_ip->u1_ccv_primaries_present_flag) + { + for(i = 0; i < 3; i++) + { + if((ps_ip->ai4_ccv_primaries_x[i] > + CCV_PRIMARIES_X_UPPER_LIMIT) || + (ps_ip->ai4_ccv_primaries_x[i] < + CCV_PRIMARIES_X_LOWER_LIMIT)) + { + ps_op->u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->u4_error_code |= IH264E_INVALID_SEI_CCV_PARAMS; + return IV_FAIL; + } + + if((ps_ip->ai4_ccv_primaries_y[i] > + CCV_PRIMARIES_Y_UPPER_LIMIT) || + (ps_ip->ai4_ccv_primaries_y[i] < + CCV_PRIMARIES_Y_LOWER_LIMIT)) + { + ps_op->u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->u4_error_code |= IH264E_INVALID_SEI_CCV_PARAMS; + return IV_FAIL; + } + } + } + + if((1 == ps_ip->u1_ccv_min_luminance_value_present_flag) && + (1 == ps_ip->u1_ccv_avg_luminance_value_present_flag)) + { + if((ps_ip->u4_ccv_avg_luminance_value < + ps_ip->u4_ccv_min_luminance_value)) + { + ps_op->u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->u4_error_code |= IH264E_INVALID_SEI_CCV_PARAMS; + return IV_FAIL; + } + } + + if((1 == ps_ip->u1_ccv_min_luminance_value_present_flag) && + (1 == ps_ip->u1_ccv_max_luminance_value_present_flag)) + { + if((ps_ip->u4_ccv_max_luminance_value < + ps_ip->u4_ccv_min_luminance_value)) + { + ps_op->u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->u4_error_code |= IH264E_INVALID_SEI_CCV_PARAMS; + return IV_FAIL; + } + } + if((1 == ps_ip->u1_ccv_avg_luminance_value_present_flag) && + (1 == ps_ip->u1_ccv_max_luminance_value_present_flag)) + { + if((ps_ip->u4_ccv_max_luminance_value < + ps_ip->u4_ccv_avg_luminance_value)) + { + ps_op->u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->u4_error_code |= IH264E_INVALID_SEI_CCV_PARAMS; + return IV_FAIL; + } + } + } + } + + break; + } + case ISVCE_CMD_CTL_SET_ENC_MODE: + { + isvce_ctl_set_enc_mode_ip_t *ps_ip = pv_api_ip; + isvce_ctl_set_enc_mode_op_t *ps_op = pv_api_op; + + if(ps_ip->s_ive_ip.u4_size != sizeof(isvce_ctl_set_enc_mode_ip_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_IP_CTL_SETENCMODE_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if(ps_op->s_ive_op.u4_size != sizeof(isvce_ctl_set_enc_mode_op_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_OP_CTL_SETENCMODE_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if((ps_ip->s_ive_ip.e_enc_mode != IVE_ENC_MODE_HEADER) && + (ps_ip->s_ive_ip.e_enc_mode != IVE_ENC_MODE_PICTURE)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IH264E_INVALID_ENC_OPERATION_MODE; + return IV_FAIL; + } + + break; + } + case ISVCE_CMD_CTL_SET_VBV_PARAMS: + { + isvce_ctl_set_vbv_params_ip_t *ps_ip = pv_api_ip; + isvce_ctl_set_vbv_params_op_t *ps_op = pv_api_op; + + isvce_codec_t *ps_codec = (isvce_codec_t *) (ps_handle->pv_codec_handle); + + if(ps_ip->s_ive_ip.u4_size != sizeof(isvce_ctl_set_vbv_params_ip_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_IP_CTL_SETVBVPARAMS_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if(ps_op->s_ive_op.u4_size != sizeof(isvce_ctl_set_vbv_params_op_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_OP_CTL_SETVBVPARAMS_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + for(i = 0; i < ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers; i++) + { + if((ps_ip->pu4_vbv_buffer_delay[i] < DEFAULT_MIN_BUFFER_DELAY) || + (ps_ip->pu4_vbv_buffer_delay[i] > DEFAULT_MAX_BUFFER_DELAY)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IH264E_INVALID_BUFFER_DELAY; + return IV_FAIL; + } + } + + break; + } + case ISVCE_CMD_CTL_SET_AIR_PARAMS: + { + isvce_ctl_set_air_params_ip_t *ps_ip = pv_api_ip; + isvce_ctl_set_air_params_op_t *ps_op = pv_api_op; + + if(ps_ip->s_ive_ip.u4_size != sizeof(isvce_ctl_set_air_params_ip_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_IP_CTL_SETAIRPARAMS_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if(ps_op->s_ive_op.u4_size != sizeof(isvce_ctl_set_air_params_op_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_OP_CTL_SETAIRPARAMS_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if((ps_ip->s_ive_ip.e_air_mode != IVE_AIR_MODE_NONE) && + (ps_ip->s_ive_ip.e_air_mode != IVE_AIR_MODE_CYCLIC) && + (ps_ip->s_ive_ip.e_air_mode != IVE_AIR_MODE_RANDOM)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IH264E_INVALID_AIR_MODE; + return IV_FAIL; + } + + if(ps_ip->s_ive_ip.u4_air_refresh_period == 0) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IH264E_INVALID_AIR_REFRESH_PERIOD; + return IV_FAIL; + } + + break; + } + case ISVCE_CMD_CTL_SET_PROFILE_PARAMS: + { + isvce_ctl_set_profile_params_ip_t *ps_ip = pv_api_ip; + isvce_ctl_set_profile_params_op_t *ps_op = pv_api_op; + + if(ps_ip->s_ive_ip.u4_size != sizeof(isvce_ctl_set_profile_params_ip_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_IP_CTL_SETPROFILE_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if(ps_op->s_ive_op.u4_size != sizeof(isvce_ctl_set_profile_params_op_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_OP_CTL_SETPROFILE_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if(ps_ip->s_ive_ip.e_profile != IV_PROFILE_BASE && + ps_ip->s_ive_ip.e_profile != IV_PROFILE_MAIN) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IH264E_PROFILE_NOT_SUPPORTED; + return IV_FAIL; + } + + if(ps_ip->s_ive_ip.u4_entropy_coding_mode > 1) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IH264E_INVALID_ENTROPY_CODING_MODE; + return IV_FAIL; + } + + break; + } + default: + { + *(pu4_api_op + 1) |= 1 << IVE_UNSUPPORTEDPARAM; + *(pu4_api_op + 1) |= IVE_ERR_INVALID_API_SUB_CMD; + return IV_FAIL; + } + } + + break; + } + default: + { + *(pu4_api_op + 1) |= 1 << IVE_UNSUPPORTEDPARAM; + *(pu4_api_op + 1) |= IVE_ERR_INVALID_API_CMD; + return IV_FAIL; + } + } + + return IV_SUCCESS; +} + +/** +******************************************************************************* +* +* @brief +* Sets default encoder config parameters +* +* @par Description: +* Sets default dynamic parameters. Will be called in isvce_init() to ensure +* that even if set_params is not called, codec continues to work +* +* @param[in] ps_cfg +* Pointer to encoder config params +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +static WORD32 isvce_set_default_params(isvce_cfg_params_t *ps_cfg) +{ + WORD32 ret = IV_SUCCESS; + WORD32 i; + + ps_cfg->u4_max_wd = MAX_WD; + ps_cfg->u4_max_ht = MAX_HT; + ps_cfg->u4_max_ref_cnt = MAX_REF_CNT; + ps_cfg->u4_max_reorder_cnt = MAX_REF_CNT; + ps_cfg->u4_max_level = DEFAULT_MAX_LEVEL; + ps_cfg->e_inp_color_fmt = IV_YUV_420SP_UV; + ps_cfg->u4_enable_recon = DEFAULT_RECON_ENABLE; + ps_cfg->e_recon_color_fmt = IV_YUV_420P; + ps_cfg->u4_enc_speed_preset = IVE_FASTEST; + ps_cfg->e_rc_mode = DEFAULT_RC; + ps_cfg->u4_max_framerate = DEFAULT_MAX_FRAMERATE; + ps_cfg->u4_num_bframes = DEFAULT_MAX_NUM_BFRAMES; + ps_cfg->e_content_type = IV_PROGRESSIVE; + ps_cfg->u4_max_srch_rng_x = DEFAULT_MAX_SRCH_RANGE_X; + ps_cfg->u4_max_srch_rng_y = DEFAULT_MAX_SRCH_RANGE_Y; + ps_cfg->e_slice_mode = IVE_SLICE_MODE_NONE; + ps_cfg->u4_slice_param = DEFAULT_SLICE_PARAM; + ps_cfg->e_arch = isvce_default_arch(); + ps_cfg->e_soc = SOC_GENERIC; + ps_cfg->u4_disp_wd = MAX_WD; + ps_cfg->u4_disp_ht = MAX_HT; + ps_cfg->u4_wd = MAX_WD; + ps_cfg->u4_ht = MAX_HT; + ps_cfg->u4_src_frame_rate = DEFAULT_SRC_FRAME_RATE; + ps_cfg->u4_tgt_frame_rate = DEFAULT_TGT_FRAME_RATE; + ps_cfg->e_frame_type = IV_NA_FRAME; + ps_cfg->e_enc_mode = IVE_ENC_MODE_DEFAULT; + ps_cfg->e_air_mode = DEFAULT_AIR_MODE; + ps_cfg->u4_air_refresh_period = DEFAULT_AIR_REFRESH_PERIOD; + ps_cfg->u4_num_cores = DEFAULT_NUM_CORES; + ps_cfg->u4_me_speed_preset = DEFAULT_ME_SPEED_PRESET; + ps_cfg->u4_enable_hpel = DEFAULT_HPEL; + ps_cfg->u4_enable_qpel = DEFAULT_QPEL; + ps_cfg->u4_enable_intra_4x4 = DEFAULT_I4; + ps_cfg->u4_enable_intra_8x8 = DEFAULT_I8; + ps_cfg->u4_enable_intra_16x16 = DEFAULT_I16; + ps_cfg->u4_enable_fast_sad = DEFAULT_ENABLE_FAST_SAD; + ps_cfg->u4_enable_satqd = DEFAULT_ENABLE_SATQD; + ps_cfg->i4_min_sad = (ps_cfg->u4_enable_satqd == DEFAULT_ENABLE_SATQD) + ? DEFAULT_MIN_SAD_ENABLE + : DEFAULT_MIN_SAD_DISABLE; + ps_cfg->u4_srch_rng_x = DEFAULT_SRCH_RNG_X; + ps_cfg->u4_srch_rng_y = DEFAULT_SRCH_RNG_Y; + ps_cfg->u4_i_frm_interval = DEFAULT_I_INTERVAL; + ps_cfg->u4_idr_frm_interval = DEFAULT_IDR_INTERVAL; + ps_cfg->u4_disable_deblock_level = DEFAULT_DISABLE_DEBLK_LEVEL; + ps_cfg->e_profile = DEFAULT_PROFILE; + ps_cfg->u4_timestamp_low = 0; + ps_cfg->u4_timestamp_high = 0; + ps_cfg->u4_is_valid = 1; + ps_cfg->e_cmd = ISVCE_CMD_CT_NA; + ps_cfg->i4_wd_mbs = ps_cfg->u4_max_wd >> 4; + ps_cfg->i4_ht_mbs = ps_cfg->u4_max_ht >> 4; + ps_cfg->u4_entropy_coding_mode = CAVLC; + ps_cfg->u4_weighted_prediction = 0; + ps_cfg->u4_pic_info_type = 0; + ps_cfg->u4_isvce_mb_info_type = 0; + ps_cfg->s_vui.u1_video_signal_type_present_flag = 1; + ps_cfg->s_vui.u1_colour_description_present_flag = 1; + + ps_cfg->b_nalu_info_export_enable = false; + + for(i = 0; i < MAX_NUM_SPATIAL_LAYERS; i++) + { + ps_cfg->au4_i_qp_max[i] = MAX_H264_QP; + ps_cfg->au4_i_qp_min[i] = MIN_H264_QP; + ps_cfg->au4_i_qp[i] = DEFAULT_I_QP; + ps_cfg->au4_p_qp_max[i] = MAX_H264_QP; + ps_cfg->au4_p_qp_min[i] = MIN_H264_QP; + ps_cfg->au4_p_qp[i] = DEFAULT_P_QP; + ps_cfg->au4_b_qp_max[i] = MAX_H264_QP; + ps_cfg->au4_b_qp_min[i] = MIN_H264_QP; + ps_cfg->au4_b_qp[i] = DEFAULT_B_QP; + } + + ps_cfg->s_svc_params.d_spatial_res_ratio = 2.0; + ps_cfg->s_svc_params.u1_num_spatial_layers = 1; + ps_cfg->s_svc_params.u1_num_temporal_layers = 1; + + return ret; +} + +/** +******************************************************************************* +* +* @brief +* Initialize encoder context. This will be called by init_mem_rec and during +* codec reset +* +* @par Description: +* Initializes the context +* +* @param[in] ps_codec +* Codec context pointer +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +static WORD32 isvce_init(isvce_codec_t *ps_codec) +{ + /* enc config param set */ + isvce_cfg_params_t *ps_cfg = &(ps_codec->s_cfg); + + UWORD32 i; + + /* coded pic count */ + ps_codec->i4_poc = 0; + + /* Number of API calls to encode are made */ + ps_codec->i4_encode_api_call_cnt = -1; + + /* Indicates no header has been generated yet */ + ps_codec->u4_header_generated = 0; + + /* Number of pictures encoded */ + ps_codec->i4_pic_cnt = -1; + + /* Number of threads created */ + ps_codec->i4_proc_thread_cnt = 0; + + /* ctl mutex init */ + ithread_mutex_init(ps_codec->pv_ctl_mutex); + + /* Set encoder chroma format */ + ps_codec->e_codec_color_format = + (ps_cfg->e_inp_color_fmt == IV_YUV_420SP_VU) ? IV_YUV_420SP_VU : IV_YUV_420SP_UV; + + /* Number of continuous frames where deblocking was disabled */ + ps_codec->u4_disable_deblock_level_cnt = 0; + + /* frame num */ + ps_codec->i4_frame_num = 0; + + /* set the current frame type to I frame, since we are going to start + * encoding*/ + ps_codec->force_curr_frame_type = IV_NA_FRAME; + + /* idr_pic_id */ + ps_codec->i4_idr_pic_id = -1; + + /* Flush mode */ + ps_codec->i4_flush_mode = 0; + + /* Encode header mode */ + ps_codec->i4_header_mode = 0; + + /* Encode generate header */ + ps_codec->i4_gen_header = 0; + + /* To signal successful completion of init */ + ps_codec->i4_init_done = 1; + + /* To signal that at least one picture was decoded */ + ps_codec->i4_first_pic_done = 0; + + /* Reset Codec */ + ps_codec->i4_reset_flag = 0; + + /* Current error code */ + ps_codec->i4_error_code = IH264E_SUCCESS; + + /* threshold residue */ + ps_codec->u4_thres_resi = 1; + + /* inter gating enable */ + ps_codec->u4_inter_gate = 0; + + /* entropy mutex init */ + ithread_mutex_init(ps_codec->pv_entropy_mutex); + + /* Process thread created status */ + memset(ps_codec->ai4_process_thread_created, 0, sizeof(ps_codec->ai4_process_thread_created)); + + /* Number of MBs processed together */ + ps_codec->i4_proc_nmb = 8; + + /* Previous POC msb */ + ps_codec->i4_prev_poc_msb = 0; + + /* Previous POC lsb */ + ps_codec->i4_prev_poc_lsb = -1; + + /* max Previous POC lsb */ + ps_codec->i4_max_prev_poc_lsb = -1; + + /* sps, pps status */ + { + sps_t *ps_sps = ps_codec->ps_sps_base; + pps_t *ps_pps = ps_codec->ps_pps_base; + + for(i = 0; i < MAX_SPS_CNT; i++) + { + ps_sps->i1_sps_valid = 0; + ps_sps++; + } + + for(i = 0; i < MAX_PPS_CNT; i++) + { + ps_pps->i1_pps_valid = 0; + ps_pps++; + } + } + + { + WORD32 max_mb_rows; + UWORD32 u4_ht, u4_wd; + + isvce_get_svc_compliant_dimensions(ps_cfg->s_svc_params.u1_num_spatial_layers, + ps_cfg->s_svc_params.d_spatial_res_ratio, ps_cfg->u4_wd, + ps_cfg->u4_ht, &u4_wd, &u4_ht); + + /* frame dimensions */ + u4_ht = ALIGN16(u4_ht); + max_mb_rows = u4_ht / MB_SIZE; + + { + WORD32 clz; + + WORD32 num_jobs = max_mb_rows * MAX_CTXT_SETS; + + /* Use next power of two number of entries*/ + clz = CLZ(num_jobs); + num_jobs = 1 << (32 - clz); + + /* init process jobq */ + ps_codec->pv_proc_jobq = + ih264_list_init(ps_codec->pv_proc_jobq_buf, ps_codec->i4_proc_jobq_buf_size, + num_jobs, sizeof(job_t), 10); + RETURN_IF((ps_codec->pv_proc_jobq == NULL), IV_FAIL); + ih264_list_reset(ps_codec->pv_proc_jobq); + + /* init entropy jobq */ + ps_codec->pv_entropy_jobq = + ih264_list_init(ps_codec->pv_entropy_jobq_buf, ps_codec->i4_entropy_jobq_buf_size, + num_jobs, sizeof(job_t), 10); + RETURN_IF((ps_codec->pv_entropy_jobq == NULL), IV_FAIL); + ih264_list_reset(ps_codec->pv_entropy_jobq); + } + } + + /* Update the jobq context to all the threads */ + for(i = 0; i < MAX_PROCESS_CTXT; i++) + { + ps_codec->as_process[i].pv_proc_jobq = ps_codec->pv_proc_jobq; + ps_codec->as_process[i].pv_entropy_jobq = ps_codec->pv_entropy_jobq; + + /* i4_id always stays between 0 and MAX_PROCESS_THREADS */ + ps_codec->as_process[i].i4_id = i % MAX_PROCESS_THREADS; + ps_codec->as_process[i].ps_codec = ps_codec; + + ps_codec->as_process[i].s_entropy.pv_proc_jobq = ps_codec->pv_proc_jobq; + ps_codec->as_process[i].s_entropy.pv_entropy_jobq = ps_codec->pv_entropy_jobq; + ps_codec->as_process[i].s_entropy.i4_abs_pic_order_cnt = -1; + } + + /* Initialize MV Bank buffer manager */ + ps_codec->pv_svc_au_data_store_mgr = + ih264_buf_mgr_init(ps_codec->pv_svc_au_data_store_mgr_base); + + /* Initialize Picture buffer manager for reference buffers*/ + ps_codec->pv_ref_buf_mgr = ih264_buf_mgr_init(ps_codec->pv_ref_buf_mgr_base); + + /* Initialize Picture buffer manager for input buffers*/ + ps_codec->pv_inp_buf_mgr = ih264_buf_mgr_init(ps_codec->pv_inp_buf_mgr_base); + + /* Initialize buffer manager for output buffers*/ + ps_codec->pv_out_buf_mgr = ih264_buf_mgr_init(ps_codec->pv_out_buf_mgr_base); + + /* buffer cnt in buffer manager */ + ps_codec->i4_inp_buf_cnt = 0; + ps_codec->i4_out_buf_cnt = 0; + ps_codec->i4_ref_buf_cnt = 0; + + ps_codec->ps_pic_buf = ps_codec->ps_pic_buf_base; + memset(ps_codec->ps_pic_buf, 0, BUF_MGR_MAX_CNT * sizeof(svc_au_buf_t)); + + for(i = 0; i < BUF_MGR_MAX_CNT; i++) + { + isvce_svc_au_buf_init(&((svc_au_buf_t *) ps_codec->ps_pic_buf)[i], &ps_cfg->s_svc_params); + } + + /* Initialize dpb manager */ + ih264_dpb_mgr_init((dpb_mgr_t *) ps_codec->pv_dpb_mgr); + + memset(ps_codec->as_ref_set, 0, sizeof(ps_codec->as_ref_set)); + for(i = 0; i < (sizeof(ps_codec->as_ref_set) / sizeof(ps_codec->as_ref_set[0])); i++) + { + ps_codec->as_ref_set[i].i4_pic_cnt = -1; + } + + /* fn ptr init */ + isvce_init_function_ptr(ps_codec); + + /* reset status flags */ + for(i = 0; i < MAX_CTXT_SETS; i++) + { + ps_codec->au4_entropy_thread_active[i] = 0; + ps_codec->ai4_pic_cnt[i] = -1; + + ps_codec->s_rate_control.pre_encode_skip[i] = 0; + ps_codec->s_rate_control.post_encode_skip[i] = 0; + } + + for(i = 0; i < ps_cfg->s_svc_params.u1_num_spatial_layers; i++) + { + ps_codec->s_rate_control.ai4_num_intra_in_prev_frame[i] = 0; + ps_codec->s_rate_control.ai4_avg_activity[i] = 0; + } + + ps_codec->i4_max_num_reference_frames = + MIN((gas_ih264_lvl_tbl[ih264e_get_lvl_idx(ps_codec->s_cfg.u4_max_level)].u4_max_dpb_size / + (ps_codec->s_cfg.i4_wd_mbs * ps_codec->s_cfg.i4_ht_mbs)), + 16); + + return IV_SUCCESS; +} + +/** +******************************************************************************* +* +* @brief +* Gets number of memory records required by the codec +* +* @par Description: +* Gets codec memory requirements +* +* @param[in] pv_api_ip +* Pointer to input argument structure +* +* @param[out] pv_api_op +* Pointer to output argument structure +* +* @returns status +* +* @remarks +* +******************************************************************************* +*/ +static WORD32 isvce_get_num_rec(void *pv_api_ip, void *pv_api_op) +{ + /* api call I/O structures */ + isvce_num_mem_rec_op_t *ps_op = pv_api_op; + + UNUSED(pv_api_ip); + + ps_op->s_ive_op.u4_num_mem_rec = ISVCE_MEM_REC_CNT; + + return IV_SUCCESS; +} + +/** +******************************************************************************* +* +* @brief +* Fills memory records of the codec +* +* @par Description: +* Fills codec memory requirements +* +* @param[in] pv_api_ip +* Pointer to input argument structure +* +* @param[out] pv_api_op +* Pointer to output argument structure +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +static WORD32 isvce_fill_num_mem_rec(void *pv_api_ip, void *pv_api_op) +{ + isvce_fill_mem_rec_ip_t *ps_ip = pv_api_ip; + isvce_fill_mem_rec_op_t *ps_op = pv_api_op; + + WORD32 level; + WORD32 num_reorder_frames; + WORD32 num_ref_frames; + + WORD32 no_of_mem_rec; + iv_mem_rec_t *ps_mem_rec_base, *ps_mem_rec; + + WORD32 max_wd_luma, max_ht_luma; + WORD32 max_mb_rows, max_mb_cols, max_mb_cnt; + UWORD32 u4_wd, u4_ht; + + WORD32 i; + + IV_STATUS_T status = IV_SUCCESS; + + num_reorder_frames = ps_ip->s_ive_ip.u4_max_reorder_cnt; + num_ref_frames = ps_ip->s_ive_ip.u4_max_ref_cnt; + + ps_mem_rec_base = ps_ip->s_ive_ip.ps_mem_rec; + no_of_mem_rec = ps_ip->s_ive_ip.u4_num_mem_rec; + + isvce_get_svc_compliant_dimensions(ps_ip->s_svc_inp_params.u1_num_spatial_layers, + ps_ip->s_svc_inp_params.d_spatial_res_ratio, ps_ip->u4_wd, + ps_ip->u4_ht, &u4_wd, &u4_ht); + + /* frame dimensions */ + max_ht_luma = ALIGN16(u4_ht); + max_wd_luma = ALIGN16(u4_wd); + max_mb_rows = max_ht_luma / MB_SIZE; + max_mb_cols = max_wd_luma / MB_SIZE; + max_mb_cnt = max_mb_rows * max_mb_cols; + + /* profile / level info */ + level = ih264e_get_min_level(max_ht_luma, max_wd_luma); + + /* Validate params */ + ps_op->s_ive_op.u4_error_code |= isvce_svc_au_props_validate( + &ps_ip->s_svc_inp_params, ps_ip->u4_wd, ps_ip->u4_ht, u4_wd, u4_ht); + + if(ps_op->s_ive_op.u4_error_code != IV_SUCCESS) + { + return IV_FAIL; + } + + if((level < MIN_LEVEL) || (level > MAX_LEVEL)) + { + ps_op->s_ive_op.u4_error_code |= IH264E_CODEC_LEVEL_NOT_SUPPORTED; + level = MAX_LEVEL; + } + + if(num_ref_frames > MAX_REF_CNT) + { + ps_op->s_ive_op.u4_error_code |= IH264E_NUM_REF_UNSUPPORTED; + num_ref_frames = MAX_REF_CNT; + } + + if(num_reorder_frames > MAX_REF_CNT) + { + ps_op->s_ive_op.u4_error_code |= IH264E_NUM_REORDER_UNSUPPORTED; + num_reorder_frames = MAX_REF_CNT; + } + + /* Set all memory records as persistent and alignment as 128 by default */ + ps_mem_rec = ps_mem_rec_base; + for(i = 0; i < no_of_mem_rec; i++) + { + ps_mem_rec->u4_mem_alignment = 128; + ps_mem_rec->e_mem_type = IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM; + ps_mem_rec++; + } + + /************************************************************************ + * Request memory for h264 encoder handle * + ***********************************************************************/ + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_IV_OBJ]; + { + ps_mem_rec->u4_mem_size = sizeof(iv_obj_t); + } + DEBUG("\nMemory record Id %d = %d \n", ISVCE_MEM_REC_IV_OBJ, ps_mem_rec->u4_mem_size); + + /************************************************************************ + * Request memory for h264 encoder context * + ***********************************************************************/ + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_CODEC]; + { + ps_mem_rec->u4_mem_size = sizeof(isvce_codec_t); + } + DEBUG("\nMemory record Id %d = %d \n", ISVCE_MEM_REC_CODEC, ps_mem_rec->u4_mem_size); + + /************************************************************************ + * Request memory for CABAC context * + ***********************************************************************/ + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_CABAC]; + { + ps_mem_rec->u4_mem_size = sizeof(isvce_cabac_ctxt_t); + } + DEBUG("\nMemory record Id %d = %d \n", ISVCE_MEM_REC_CABAC, ps_mem_rec->u4_mem_size); + + /************************************************************************ + * Request memory for CABAC MB info * + ***********************************************************************/ + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_CABAC_MB_INFO]; + { + ps_mem_rec->u4_mem_size = ((max_mb_cols + 1) + 1) * sizeof(isvce_mb_info_ctxt_t); + } + DEBUG("\nMemory record Id %d = %d \n", ISVCE_MEM_REC_CABAC_MB_INFO, ps_mem_rec->u4_mem_size); + + /************************************************************************ + * Request memory for entropy context * + * In multi core encoding, each row is assumed to be launched on a * + * thread. The rows below can only start after its neighbors are coded * + * The status of an mb coded/uncoded is signaled via entropy map. * + * 1. One word32 to store skip run cnt * + * 2. mb entropy map (mb status entropy coded/uncoded). The size* + * of the entropy map is max mb cols. Further allocate one * + * more additional row to evade checking for row -1. * + * 3. size of bit stream buffer to store bit stream ctxt. * + * 4. Entropy coding is dependent on nnz coefficient count for * + * the neighbor blocks. It is sufficient to maintain one row * + * worth of nnz as entropy for lower row waits on entropy map* + ************************************************************************/ + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_ENTROPY]; + { + /* total size of the mem record */ + WORD32 total_size = 0; + + /* size of skip mb run */ + total_size += sizeof(WORD32); + total_size = ALIGN8(total_size); + + /* size in bytes to store entropy status of an entire frame */ + total_size += (max_mb_cols * max_mb_rows); + /* add an additional 1 row of bytes to evade the special case of row 0 */ + total_size += max_mb_cols; + total_size = ALIGN128(total_size); + + /* size of bit stream buffer */ + total_size += sizeof(bitstrm_t); + total_size = ALIGN128(total_size); + + /* size of bit stream buffer */ + total_size += sizeof(bitstrm_t); + total_size = ALIGN128(total_size); + + /* top nnz luma */ + total_size += (max_mb_cols * 4 * sizeof(UWORD8)); + total_size = ALIGN128(total_size); + + /* top nnz cbcr */ + total_size += (max_mb_cols * 4 * sizeof(UWORD8)); + total_size = ALIGN128(total_size); + + /* ps_mb_qp_ctxt */ + total_size += ALIGN128(sizeof(mb_qp_ctxt_t)); + + /* total size per each proc ctxt */ + total_size *= MAX_CTXT_SETS; + + ps_mem_rec->u4_mem_size = total_size; + } + DEBUG("\nMemory record Id %d = %d \n", ISVCE_MEM_REC_ENTROPY, ps_mem_rec->u4_mem_size); + + /************************************************************************ + * The residue coefficients that needs to be entropy coded are packed * + * at a buffer space by the proc threads. The entropy thread shall * + * read from the buffer space, unpack them and encode the same. The * + * buffer space required to pack a row of mbs are as follows. * + * Assuming transform_8x8_flag is disabled, * + * In the worst case, 1 mb contains 1 dc 4x4 luma sub block, followed * + * by 16 ac 4x4 luma sub blocks, 2 dc chroma 2x2 sub blocks, followed * + * by 8 ac 4x4 chroma sub blocks. * + * For the sake of simplicity we assume that all sub blocks are of * + * type 4x4. The packing of each 4x4 is depicted by the structure * + * tu_sblk_coeff_data_t * + ************************************************************************/ + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_MB_COEFF_DATA]; + { + /* temp var */ + WORD32 size = 0; + + /* size of coeff data of 1 mb */ + size += sizeof(tu_sblk_coeff_data_t) * MAX_4x4_SUBBLKS; + + /* size of coeff data of 1 row of mb's */ + size *= max_mb_cols; + + /* align to avoid any false sharing across threads */ + size = ALIGN64(size); + + /* size for one full frame */ + size *= max_mb_rows; + + /* size of each proc buffer set (ping, pong) */ + size *= MAX_CTXT_SETS; + + ps_mem_rec->u4_mem_size = size; + } + DEBUG("\nMemory record Id %d = %d \n", ISVCE_MEM_REC_MB_COEFF_DATA, ps_mem_rec->u4_mem_size); + + /************************************************************************ + * while encoding an mb, the mb header data is signaled to the entropy* + * thread by writing to a buffer space. the size of header data per mb * + * is assumed to be 40 bytes * + * TODO: revisit this inference * + ************************************************************************/ + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_MB_HEADER_DATA]; + { + /* temp var */ + WORD32 size; + + /* size per MB */ + size = sizeof(isvce_mb_hdr_t); + + /* size for 1 row of mbs */ + size = size * max_mb_cols; + + /* align to avoid any false sharing across threads */ + size = ALIGN64(size); + + /* size for one full frame */ + size *= max_mb_rows; + + /* size of each proc buffer set (ping, pong) */ + size *= MAX_CTXT_SETS; + + ps_mem_rec->u4_mem_size = size; + } + DEBUG("\nMemory record Id %d = %d \n", ISVCE_MEM_REC_MB_HEADER_DATA, ps_mem_rec->u4_mem_size); + + /************************************************************************ + * While encoding inter slices, to compute the cost of encoding an mb * + * with the mv's at hand, we employ the expression cost = sad + lambda * + * x mv_bits. Here mv_bits is the total number of bits taken to represe* + * nt the mv in the stream. The mv bits for all the possible mv are * + * stored in the look up table. The mem record for this look up table * + * is given below. * + ************************************************************************/ + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_MVBITS]; + { + /* max srch range x */ + UWORD32 u4_srch_range_x = ps_ip->s_ive_ip.u4_max_srch_rng_x; + + /* max srch range y */ + UWORD32 u4_srch_range_y = ps_ip->s_ive_ip.u4_max_srch_rng_y; + + /* max srch range */ + UWORD32 u4_max_srch_range = MAX(u4_srch_range_x, u4_srch_range_y); + + /* due to subpel */ + u4_max_srch_range <<= 2; + + /* due to mv on either direction */ + u4_max_srch_range = (u4_max_srch_range << 1); + + /* due to pred mv + zero */ + u4_max_srch_range = (u4_max_srch_range << 1) + 1; + + u4_max_srch_range = ALIGN128(u4_max_srch_range); + + ps_mem_rec->u4_mem_size = u4_max_srch_range; + } + DEBUG("\nMemory record Id %d = %d \n", ISVCE_MEM_REC_MVBITS, ps_mem_rec->u4_mem_size); + + /************************************************************************ + * Request memory for SPS * + ***********************************************************************/ + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_SPS]; + { + ps_mem_rec->u4_mem_size = MAX_SPS_CNT * sizeof(sps_t); + } + DEBUG("\nMemory record Id %d = %d \n", ISVCE_MEM_REC_SPS, ps_mem_rec->u4_mem_size); + + /************************************************************************ + * Request memory for PPS * + ***********************************************************************/ + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_PPS]; + { + ps_mem_rec->u4_mem_size = MAX_PPS_CNT * sizeof(pps_t); + } + DEBUG("\nMemory record Id %d = %d \n", ISVCE_MEM_REC_PPS, ps_mem_rec->u4_mem_size); + + /************************************************************************ + * Request memory for SVC NALU Extension * + ************************************************************************/ + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_SVC_NALU_EXT]; + { + /* 2 implies allocation for NAL_PREFIX and NAL_CODED_SLICE_EXTENSION */ + ps_mem_rec->u4_mem_size = + 2 * MAX_CTXT_SETS * SVC_MAX_SLICE_HDR_CNT * sizeof(svc_nalu_ext_t); + } + DEBUG("\nMemory record Id %d = %d \n", ISVCE_MEM_REC_SVC_NALU_EXT, ps_mem_rec->u4_mem_size); + + /************************************************************************ + * Request memory for subset SPS * + ************************************************************************/ + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_SUBSET_SPS]; + { + ps_mem_rec->u4_mem_size = + MAX_SPS_CNT * ps_ip->s_svc_inp_params.u1_num_spatial_layers * sizeof(subset_sps_t); + } + DEBUG("\nMemory record Id %d = %d \n", ISVCE_MEM_REC_SUBSET_SPS, ps_mem_rec->u4_mem_size); + + /************************************************************************ + * Request memory for Slice Header * + ***********************************************************************/ + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_SLICE_HDR]; + { + ps_mem_rec->u4_mem_size = MAX_CTXT_SETS * SVC_MAX_SLICE_HDR_CNT * sizeof(slice_header_t); + } + DEBUG("\nMemory record Id %d = %d \n", ISVCE_MEM_REC_SLICE_HDR, ps_mem_rec->u4_mem_size); + + /************************************************************************ + * Request memory for SVC Slice Header * + ***********************************************************************/ + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_SVC_SLICE_HDR]; + { + ps_mem_rec->u4_mem_size = + MAX_CTXT_SETS * SVC_MAX_SLICE_HDR_CNT * sizeof(svc_slice_header_t); + } + DEBUG("\nMemory record Id %d = %d \n", ISVCE_MEM_REC_SVC_SLICE_HDR, ps_mem_rec->u4_mem_size); + + /************************************************************************ + * Request memory for Adaptive Intra Refresh * + ***********************************************************************/ + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_AIR_MAP]; + { + /* total size of the mem record */ + WORD32 total_size = 0; + + /* intra coded map */ + total_size += max_mb_cnt; + total_size *= MAX_CTXT_SETS; + + /* mb refresh map */ + total_size += sizeof(UWORD16) * max_mb_cnt; + + /* alignment */ + total_size = ALIGN128(total_size); + + ps_mem_rec->u4_mem_size = total_size; + } + DEBUG("\nMemory record Id %d = %d \n", ISVCE_MEM_REC_AIR_MAP, ps_mem_rec->u4_mem_size); + + /************************************************************************ + * In multi slice encoding, this memory record helps tracking the start* + * of slice with reference to mb. * + * MEM RECORD for holding * + * 1. mb slice map * + ************************************************************************/ + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_SLICE_MAP]; + { + /* total size of the mem record */ + WORD32 total_size = 0; + + /* size in bytes to slice index of all mbs of a frame */ + total_size = ALIGN64(max_mb_cnt); + + /* isvce_update_proc_ctxt can overread by 1 at the end */ + total_size += 1; + + /* total size per each proc ctxt */ + total_size *= MAX_CTXT_SETS; + ps_mem_rec->u4_mem_size = total_size; + } + DEBUG("\nMemory record Id %d = %d \n", ISVCE_MEM_REC_SLICE_MAP, ps_mem_rec->u4_mem_size); + + /************************************************************************ + * Request memory to hold thread handles for each processing thread * + ************************************************************************/ + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_THREAD_HANDLE]; + { + WORD32 handle_size = ithread_get_handle_size(); + + ps_mem_rec->u4_mem_size = MAX_PROCESS_THREADS * handle_size; + } + DEBUG("\nMemory record Id %d = %d \n", ISVCE_MEM_REC_THREAD_HANDLE, ps_mem_rec->u4_mem_size); + + /************************************************************************ + * Request memory to hold mutex for control calls * + ************************************************************************/ + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_CTL_MUTEX]; + { + ps_mem_rec->u4_mem_size = ithread_get_mutex_lock_size(); + } + DEBUG("\nMemory record Id %d = %d \n", ISVCE_MEM_REC_CTL_MUTEX, ps_mem_rec->u4_mem_size); + + /************************************************************************ + * Request memory to hold mutex for entropy calls * + ************************************************************************/ + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_ENTROPY_MUTEX]; + { + ps_mem_rec->u4_mem_size = ithread_get_mutex_lock_size(); + } + DEBUG("\nMemory record Id %d = %d \n", ISVCE_MEM_REC_ENTROPY_MUTEX, ps_mem_rec->u4_mem_size); + + /************************************************************************ + * Request memory to hold process jobs * + ***********************************************************************/ + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_PROC_JOBQ]; + { + /* One process job per row of MBs */ + /* Allocate for two pictures, so that wrap around can be handled easily */ + WORD32 num_jobs = max_mb_rows * MAX_CTXT_SETS; + + WORD32 job_queue_size = ih264_list_size(num_jobs, sizeof(job_t)); + + ps_mem_rec->u4_mem_size = job_queue_size; + } + DEBUG("\nMemory record Id %d = %d \n", ISVCE_MEM_REC_PROC_JOBQ, ps_mem_rec->u4_mem_size); + + /************************************************************************ + * Request memory to hold entropy jobs * + ***********************************************************************/ + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_ENTROPY_JOBQ]; + { + /* One process job per row of MBs */ + /* Allocate for two pictures, so that wrap around can be handled easily */ + WORD32 num_jobs = max_mb_rows * MAX_CTXT_SETS; + + WORD32 job_queue_size = ih264_list_size(num_jobs, sizeof(job_t)); + + ps_mem_rec->u4_mem_size = job_queue_size; + } + DEBUG("\nMemory record Id %d = %d \n", ISVCE_MEM_REC_ENTROPY_JOBQ, ps_mem_rec->u4_mem_size); + + /************************************************************************ + * In multi core encoding, each row is assumed to be launched on a * + * thread. The rows below can only start after its neighbors are coded * + * The status of an mb coded/uncoded is signaled via proc map. * + * MEM RECORD for holding * + * 1. mb proc map (mb status core coded/uncoded) * + ************************************************************************/ + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_PROC_MAP]; + { + /* total size of the mem record */ + WORD32 total_size = 0; + + /* size in bytes to mb core coding status of an entire frame */ + total_size = max_mb_cnt; + + /* add an additional 1 row of bytes to evade the special case of row 0 */ + total_size += max_mb_cols; + + /* total size per each proc ctxt */ + total_size *= MAX_CTXT_SETS; + ps_mem_rec->u4_mem_size = total_size; + } + DEBUG("\nMemory record Id %d = %d \n", ISVCE_MEM_REC_PROC_MAP, ps_mem_rec->u4_mem_size); + + /************************************************************************ + * mem record for holding a particular MB is deblocked or not * + * 1. mb deblk map (mb status deblocked/not deblocked) * + ************************************************************************/ + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_DBLK_MAP]; + { + /* total size of the mem record */ + WORD32 total_size = 0; + + /* size in bytes to mb core coding status of an entire frame */ + total_size = max_mb_cnt; + + /* add an additional 1 row of bytes to evade the special case of row 0 */ + total_size += max_mb_cols; + + total_size = ALIGN64(total_size); + + /* total size per each proc ctxt */ + total_size *= MAX_CTXT_SETS; + ps_mem_rec->u4_mem_size = total_size; + } + DEBUG("\nMemory record Id %d = %d \n", ISVCE_MEM_REC_DBLK_MAP, ps_mem_rec->u4_mem_size); + + /************************************************************************ + * mem record for holding a particular MB's me is done or not * + * 1. mb me map * + ************************************************************************/ + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_ME_MAP]; + { + /* total size of the mem record */ + WORD32 total_size = 0; + + /* size in bytes to mb core coding status of an entire frame */ + total_size = max_mb_cnt; + + /* add an additional 1 row of bytes to evade the special case of row 0 */ + total_size += max_mb_cols; + + /* total size per each proc ctxt */ + total_size *= MAX_CTXT_SETS; + + ps_mem_rec->u4_mem_size = total_size; + } + DEBUG("\nMemory record Id %d = %d \n", ISVCE_MEM_REC_ME_MAP, ps_mem_rec->u4_mem_size); + + /************************************************************************ + * size for holding dpb manager context * + ************************************************************************/ + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_DPB_MGR]; + { + ps_mem_rec->u4_mem_size = sizeof(dpb_mgr_t); + } + DEBUG("\nMemory record Id %d = %d \n", ISVCE_MEM_REC_DPB_MGR, ps_mem_rec->u4_mem_size); + + /************************************************************************ + * luma or chroma core coding involves mb estimation, error computation* + * between the estimated singnal and the actual signal, transform the * + * error, quantize the error, then inverse transform and inverse quant * + * ize the residue and add the result back to estimated signal. * + * To perform all these, a set of temporary buffers are needed. * + * MEM RECORD for holding scratch buffers * + * 1. prediction buffer used during mb mode analysis * + * 2 temp. reference buffer when intra 4x4 with rdopt on is * + * enabled * + * - when intra 4x4 is enabled, rdopt is on, to store the * + * reconstructed values and use them later this temp. buffer * + * is used. * + * 3. prediction buffer used during intra mode analysis * + * 4. prediction buffer used during intra 16x16 plane mode * + * analysis + * 5. prediction buffer used during intra chroma mode analysis * + * 6. prediction buffer used during intra chroma 16x16 plane * + * mode analysis + * 7. forward transform output buffer * + * - to store the error between estimated and the actual inp * + * ut and to store the fwd transformed quantized output * + * 8. forward transform output buffer * + * - when intra 4x4 is enabled, rdopt is on, to store the * + * fwd transform values and use them later this temp. buffer * + * is used. * + * 9. temporary buffer for inverse transform * + * - temporary buffer used in inverse transform and inverse * + * quantization * + * A. Buffers for holding half_x , half_y and half_xy planes * + ************************************************************************/ + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_PROC_SCRATCH]; + { + WORD32 total_size = 0; + WORD32 i4_tmp_size; + + /* size to hold prediction buffer */ + total_size += sizeof(UWORD8) * 16 * 16; + total_size = ALIGN64(total_size); + + /* size to hold recon for intra 4x4 buffer */ + total_size += sizeof(UWORD8) * 16 * 16; + total_size = ALIGN64(total_size); + + /* prediction buffer intra 16x16 */ + total_size += sizeof(UWORD8) * 16 * 16; + total_size = ALIGN64(total_size); + + /* prediction buffer intra 16x16 plane*/ + total_size += sizeof(UWORD8) * 16 * 16; + total_size = ALIGN64(total_size); + + /* prediction buffer intra chroma*/ + total_size += sizeof(UWORD8) * 16 * 8; + total_size = ALIGN64(total_size); + + /* prediction buffer intra chroma plane*/ + total_size += sizeof(UWORD8) * 16 * 8; + total_size = ALIGN64(total_size); + + /* size to hold fwd transform output */ + total_size += sizeof(WORD16) * SIZE_TRANS_BUFF; + total_size = ALIGN64(total_size); + + /* size to hold fwd transform output */ + total_size += sizeof(WORD16) * SIZE_TRANS_BUFF; + total_size = ALIGN64(total_size); + + /* size to hold temporary data during inverse transform */ + total_size += sizeof(WORD32) * SIZE_TMP_BUFF_ITRANS; + total_size = ALIGN64(total_size); + + /* Buffers for holding half_x , half_y and half_xy planes */ + i4_tmp_size = sizeof(UWORD8) * (HP_BUFF_WD * HP_BUFF_HT); + total_size += (ALIGN64(i4_tmp_size) * SUBPEL_BUFF_CNT); + + /* Allocate for each process thread */ + total_size *= MAX_PROCESS_CTXT; + + ps_mem_rec->u4_mem_size = total_size; + } + DEBUG("\nMemory record Id %d = %d \n", ISVCE_MEM_REC_PROC_SCRATCH, ps_mem_rec->u4_mem_size); + + /************************************************************************ + * When transform_8x8_flag is disabled, the size of a sub block is * + * 4x4 and when the transform_8x8_flag is enabled the size of the sub * + * block is 8x8. The threshold matrix and the forward scaling list * + * is of the size of the sub block. * + * MEM RECORD for holding * + * 1. quantization parameters for plane y, cb, cr * + * - threshold matrix for quantization * + * - forward weight matrix * + * - satqd threshold matrix * + ************************************************************************/ + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_QUANT_PARAM]; + { + /* total size of the mem record */ + WORD32 total_size = 0; + + /* quantization parameter list for planes y,cb and cr */ + total_size += ALIGN64(sizeof(quant_params_t)) * 3; + + /* size of threshold matrix for quantization + * (assuming the transform_8x8_flag is disabled). + * for all 3 planes */ + total_size += ALIGN64(sizeof(WORD16) * 4 * 4) * 3; + + /* size of forward weight matrix for quantization + * (assuming the transform_8x8_flag is disabled). + * for all 3 planes */ + total_size += ALIGN64(sizeof(WORD16) * 4 * 4) * 3; + + /* Size for SATDQ threshold matrix for palnes y, cb and cr */ + total_size += ALIGN64(sizeof(UWORD16) * 9) * 3; + + total_size = ALIGN128(total_size); + + /* total size per each proc thread */ + total_size *= MAX_PROCESS_CTXT; + + ps_mem_rec->u4_mem_size = total_size; + } + DEBUG("\nMemory record Id %d = %d \n", ISVCE_MEM_REC_QUANT_PARAM, ps_mem_rec->u4_mem_size); + + /************************************************************************ + * While computing blocking strength for the current mb, the csbp, mb * + * type for the neighboring mbs are necessary. memtab for storing top * + * row mbtype and csbp is evaluated here. * + * * + * when encoding intra 4x4 or intra 8x8 the submb types are estimated * + * and sent. The estimation is dependent on neighbor mbs. For this * + * store the top row sub mb types for intra mbs * + * * + * During motion vector prediction, the curr mb mv is predicted from * + * neigbors left, top, top right and sometimes top left depending on * + * the availability. The top and top right content is accessed from * + * the memtab specified below. * + ************************************************************************/ + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_TOP_ROW_SYN_INFO]; + { + UWORD32 total_size = isvce_get_svc_nbr_info_buf_size( + ps_ip->s_svc_inp_params.u1_num_spatial_layers, + ps_ip->s_svc_inp_params.d_spatial_res_ratio, u4_wd, u4_ht); + + total_size = ALIGN128(total_size); + ps_mem_rec->u4_mem_size = total_size; + } + DEBUG("\nMemory record Id %d = %d \n", ISVCE_MEM_REC_TOP_ROW_SYN_INFO, ps_mem_rec->u4_mem_size); + + /************************************************************************ + * When transform_8x8_flag is disabled, the mb is partitioned into * + * 4 sub blocks. This corresponds to 1 vertical left edge and 1 * + * vertical inner edge, 1 horizontal top edge and 1 horizontal * + * inner edge per mb. Further, When transform_8x8_flag is enabled, * + * the mb is partitioned in to 16 sub blocks. This corresponds to * + * 1 vertical left edge and 3 vertical inner edges, 1 horizontal top * + * edge and 3 horizontal inner edges per mb. * + * MEM RECORD for holding * + * 1. vertical edge blocking strength * + * 2. horizontal edge blocking strength * + * 3. mb qp * + * all are frame level * + ************************************************************************/ + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_BS_QP]; + { + /* total size of the mem record */ + WORD32 total_size = 0; + + /* size in bytes to store vertical edge bs, horizontal edge bs and qp of + * every mb*/ + WORD32 vert_bs_size, horz_bs_size, qp_size; + + /* vertical edge bs = total number of vertical edges * number of bytes per + * each edge */ + /* total num of v edges = total mb * 4 (assuming transform_8x8_flag = 0), + * each edge is formed by 4 pairs of subblks, requiring 4 bytes to storing + * bs */ + vert_bs_size = 2 * ALIGN64(max_mb_cnt * 4 * 4); + + /* horizontal edge bs = total number of horizontal edges * number of bytes + * per each edge */ + /* total num of h edges = total mb * 4 (assuming transform_8x8_flag = 0), + * each edge is formed by 4 pairs of subblks, requiring 4 bytes to storing + * bs */ + horz_bs_size = 2 * ALIGN64(max_mb_cnt * 4 * 4); + + /* qp of each mb requires 1 byte */ + qp_size = ALIGN64(max_mb_cnt); + + /* total size */ + total_size = vert_bs_size + horz_bs_size + qp_size; + + /* total size per each proc ctxt */ + total_size *= MAX_CTXT_SETS; + + ps_mem_rec->u4_mem_size = total_size; + } + DEBUG("\nMemory record Id %d = %d \n", ISVCE_MEM_REC_BS_QP, ps_mem_rec->u4_mem_size); + + /************************************************************************ + * size for holding input pic buf * + ************************************************************************/ + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_INP_PIC]; + { + ps_mem_rec->u4_mem_size = ih264_buf_mgr_size(); + } + DEBUG("\nMemory record Id %d = %d \n", ISVCE_MEM_REC_INP_PIC, ps_mem_rec->u4_mem_size); + + /************************************************************************ + * size for holding putput pic buf * + ************************************************************************/ + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_OUT]; + { + ps_mem_rec->u4_mem_size = ih264_buf_mgr_size(); + } + DEBUG("\nMemory record Id %d = %d \n", ISVCE_MEM_REC_OUT, ps_mem_rec->u4_mem_size); + + /************************************************************************ + * Size for color space conversion * + ************************************************************************/ + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_CSC]; + { + /* We need a total a memory for a single frame of 420 sp, ie + * (wd * ht) for luma and (wd * ht / 2) for chroma*/ + ps_mem_rec->u4_mem_size = MAX_CTXT_SETS * ((3 * max_ht_luma * max_wd_luma) >> 1); + /* Allocate an extra row, since inverse transform functions for + * chroma access(only read, not used) few extra bytes due to + * interleaved input + */ + ps_mem_rec->u4_mem_size += max_wd_luma; + } + DEBUG("\nMemory record Id %d = %d \n", ISVCE_MEM_REC_CSC, ps_mem_rec->u4_mem_size); + + /************************************************************************ + * Size for holding pic_buf_t for each reference picture * + * Note this allocation is done for BUF_MGR_MAX_CNT instead of * + * MAX_DPB_SIZE or max_dpb_size for following reasons * + * max_dpb_size will be based on max_wd and max_ht * + * For higher max_wd and max_ht this number will be smaller than * + * MAX_DPB_SIZE But during actual initialization number of buffers * + * allocated can be more. * + * * + * Also to handle display depth application can allocate more than * + * what codec asks for in case of non-shared mode * + * Since this is only a structure allocation and not actual buffer * + * allocation, it is allocated for BUF_MGR_MAX_CNT entries * + ************************************************************************/ + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_REF_PIC]; + { + ps_mem_rec->u4_mem_size = ih264_buf_mgr_size(); + ps_mem_rec->u4_mem_size += BUF_MGR_MAX_CNT * sizeof(svc_au_buf_t); + + /************************************************************************ + * Note: Number of luma samples is not max_wd * max_ht here, instead it * + * is set to maximum number of luma samples allowed at the given level. * + * This is done to ensure that any stream with width and height lesser * + * than max_wd and max_ht is supported. Number of buffers required can * + * be greater for lower width and heights at a given level and this * + * increased number of buffers might require more memory than what * + * max_wd and max_ht buffer would have required. Number of buffers is * + * doubled in order to return one frame at a time instead of sending * + * multiple outputs during dpb full case. Also note one extra buffer is * + * allocted to store current picture. * + * * + * Half-pel planes for each reference buffer are allocated along with * + * the reference buffer. So each reference buffer is 4 times the * + * required size. This way buffer management for the half-pel planes is * + * easier and while using the half-pel planes in MC, an offset can be * + * used from a single pointer * + ***********************************************************************/ + ps_mem_rec->u4_mem_size += + HPEL_PLANES_CNT * isvce_get_total_svc_au_buf_size(&ps_ip->s_svc_inp_params, + u4_wd * u4_ht, level, PAD_WD, PAD_HT, + num_ref_frames, num_reorder_frames); + } + DEBUG("\nMemory record Id %d = %d \n", ISVCE_MEM_REC_REF_PIC, ps_mem_rec->u4_mem_size); + + /************************************************************************ + * Size for holding svc_au_data_t for each MV Bank. * + * Note this allocation is done for BUF_MGR_MAX_CNT instead of * + * MAX_DPB_SIZE or max_dpb_size for following reasons * + * max_dpb_size will be based on max_wd and max_ht * + * For higher max_wd and max_ht this number will be smaller than * + * MAX_DPB_SIZE But during actual initialization number of buffers * + * allocated can be more. * + * * + * One extra MV Bank is needed to hold current pics MV bank. * + * Since this is only a structure allocation and not actual buffer * + * allocation, it is allocated for BUF_MGR_MAX_CNT entries * + ************************************************************************/ + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_MVBANK]; + { + ps_mem_rec->u4_mem_size = ih264_buf_mgr_size(); + + /************************************************************************ + * Allocate for pu_map, isvce_enc_pu_t and pic_pu_idx for each MV bank * + * Note: Number of luma samples is not max_wd * max_ht here, instead it * + * is set to maximum number of luma samples allowed at the given level. * + * This is done to ensure that any stream with width and height lesser * + * than max_wd and max_ht is supported. Number of buffers required can * + * be greater for lower width and heights at a given level and this * + * increased number of buffers might require more memory than what * + * max_wd and max_ht buffer would have required Also note one extra * + * buffer is allocated to store current pictures MV bank. * + ***********************************************************************/ + + ps_mem_rec->u4_mem_size += BUF_MGR_MAX_CNT * sizeof(svc_au_data_t); + + ps_mem_rec->u4_mem_size += + (num_ref_frames + num_reorder_frames + ps_ip->s_svc_inp_params.u1_num_temporal_layers + + MAX_CTXT_SETS) * + isvce_get_total_svc_au_data_size(u4_wd * u4_ht, + ps_ip->s_svc_inp_params.u1_num_spatial_layers, + ps_ip->s_svc_inp_params.d_spatial_res_ratio); + } + DEBUG("\nMemory record Id %d = %d \n", ISVCE_MEM_REC_MVBANK, ps_mem_rec->u4_mem_size); + + /************************************************************************ + * Request memory to hold mem recs to be returned during retrieve call * + ************************************************************************/ + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_BACKUP]; + { + ps_mem_rec->u4_mem_size = ISVCE_MEM_REC_CNT * sizeof(iv_mem_rec_t); + } + DEBUG("\nMemory record Id %d = %d \n", ISVCE_MEM_REC_BACKUP, ps_mem_rec->u4_mem_size); + + /************************************************************************ + * size for memory required by NMB info structs and buffer for storing * + * half pel plane * + ************************************************************************/ + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_MB_INFO_NMB]; + { + /* Additional 4 bytes to allow use of '_mm_loadl_epi64' */ + ps_mem_rec->u4_mem_size = + MAX_PROCESS_CTXT * max_mb_cols * + (sizeof(isvce_mb_info_nmb_t) + (MB_SIZE * MB_SIZE + 4) * sizeof(UWORD8)); + } + DEBUG("\nMemory record Id %d = %d \n", ISVCE_MEM_REC_MB_INFO_NMB, ps_mem_rec->u4_mem_size); + + /* Buffers for storing SVC Spatial data */ + { + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_SVC_SPAT_INP]; + + ps_mem_rec->u4_mem_size = + isvce_get_svc_inp_buf_size(ps_ip->s_svc_inp_params.u1_num_spatial_layers, + ps_ip->s_svc_inp_params.d_spatial_res_ratio, u4_wd, u4_ht); + + DEBUG("\nMemory record Id %d = %d \n", ISVCE_MEM_SVC_SPAT_INP, ps_mem_rec->u4_mem_size); + } + + /* Buffer for storing Downscaler data */ + { + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_DOWN_SCALER]; + + ps_mem_rec->u4_mem_size = isvce_get_downscaler_data_size( + ps_ip->s_svc_inp_params.u1_num_spatial_layers, + ps_ip->s_svc_inp_params.d_spatial_res_ratio, u4_wd, u4_ht); + + DEBUG("\nMemory record Id %d = %d \n", ISVCE_MEM_DOWN_SCALER, ps_mem_rec->u4_mem_size); + } + + { + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_SVC_ILP_DATA]; + + ps_mem_rec->u4_mem_size = + isvce_get_svc_ilp_buf_size(ps_ip->s_svc_inp_params.u1_num_spatial_layers, + ps_ip->s_svc_inp_params.d_spatial_res_ratio, u4_wd, u4_ht); + + DEBUG("\nMemory record Id %d = %d \n", ISVCE_MEM_SVC_ILP_DATA, ps_mem_rec->u4_mem_size); + } + + { + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_SVC_ILP_MV_CTXT]; + + ps_mem_rec->u4_mem_size = + isvce_get_ilp_mv_ctxt_size(ps_ip->s_svc_inp_params.u1_num_spatial_layers, + ps_ip->s_svc_inp_params.d_spatial_res_ratio, u4_wd, u4_ht); + + DEBUG("\nMemory record Id %d = %d \n", ISVCE_MEM_SVC_ILP_MV_CTXT, ps_mem_rec->u4_mem_size); + } + + { + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_SVC_RES_PRED_CTXT]; + + ps_mem_rec->u4_mem_size = isvce_get_svc_res_pred_ctxt_size( + ps_ip->s_svc_inp_params.u1_num_spatial_layers, + ps_ip->s_svc_inp_params.d_spatial_res_ratio, u4_wd, u4_ht); + + DEBUG("\nMemory record Id %d = %d \n", ISVCE_MEM_SVC_RES_PRED_CTXT, + ps_mem_rec->u4_mem_size); + } + + { + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_SVC_INTRA_PRED_CTXT]; + + ps_mem_rec->u4_mem_size = isvce_get_svc_intra_pred_ctxt_size( + ps_ip->s_svc_inp_params.u1_num_spatial_layers, + ps_ip->s_svc_inp_params.d_spatial_res_ratio, u4_wd, u4_ht); + + DEBUG("\nMemory record Id %d = %d \n", ISVCE_MEM_SVC_INTRA_PRED_CTXT, + ps_mem_rec->u4_mem_size); + } + + { + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_SVC_RC_UTILS_CTXT]; + + ps_mem_rec->u4_mem_size = isvce_get_rc_utils_data_size(); + + DEBUG("\nMemory record Id %d = %d \n", ISVCE_MEM_SVC_RC_UTILS_CTXT, + ps_mem_rec->u4_mem_size); + } + + { + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_SVC_SUB_PIC_RC_CTXT]; + + ps_mem_rec->u4_mem_size = isvce_get_sub_pic_rc_ctxt_size( + ps_ip->s_svc_inp_params.u1_num_spatial_layers, + ps_ip->s_svc_inp_params.d_spatial_res_ratio, u4_wd, u4_ht); + + DEBUG("\nMemory record Id %d = %d \n", ISVCE_MEM_SVC_SUB_PIC_RC_CTXT, + ps_mem_rec->u4_mem_size); + } + +#if ENABLE_MODE_STAT_VISUALISER + { + ps_mem_rec = &ps_mem_rec_base[MEM_MODE_STAT_VISUALISER_BUF]; + + ps_mem_rec->u4_mem_size = isvce_get_msv_ctxt_size(u4_wd, u4_ht); + + DEBUG("\nMemory record Id %d = %d \n", MEM_MODE_STAT_VISUALISER_BUF, + ps_mem_rec->u4_mem_size); + } +#endif + + /************************************************************************ + * RC mem records * + ************************************************************************/ + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_RC]; + { + isvce_get_rate_control_mem_tab(NULL, ps_mem_rec, FILL_MEMTAB); + } + DEBUG("\nMemory record Id %d = %d \n", ISVCE_MEM_REC_RC, ps_mem_rec->u4_mem_size); + + /* Each memtab size is aligned to next multiple of 128 bytes */ + /* This is to ensure all the memtabs start at different cache lines */ + ps_mem_rec = ps_mem_rec_base; + for(i = 0; i < ISVCE_MEM_REC_CNT; i++) + { + ps_mem_rec->u4_mem_size = ALIGN128(ps_mem_rec->u4_mem_size); + ps_mem_rec++; + } + + ps_op->s_ive_op.u4_num_mem_rec = ISVCE_MEM_REC_CNT; + + DEBUG("Num mem recs in fill call : %d\n", ps_op->s_ive_op.u4_num_mem_rec); + + return (status); +} + +/** +******************************************************************************* +* +* @brief +* Initializes from mem records passed to the codec +* +* @par Description: +* Initializes pointers based on mem records passed +* +* @param[in] ps_codec_obj +* Pointer to codec object at API level +* +* @param[in] pv_api_ip +* Pointer to input argument structure +* +* @param[out] pv_api_op +* Pointer to output argument structure +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +static WORD32 isvce_init_mem_rec(iv_obj_t *ps_codec_obj, void *pv_api_ip, void *pv_api_op) +{ + /* api call I/O structures */ + isvce_init_ip_t *ps_ip = pv_api_ip; + isvce_init_op_t *ps_op = pv_api_op; + + /* mem records */ + iv_mem_rec_t *ps_mem_rec_base, *ps_mem_rec; + + /* codec variables */ + isvce_codec_t *ps_codec; + isvce_cabac_ctxt_t *ps_cabac; + isvce_mb_info_ctxt_t *ps_mb_map_ctxt_inc; + + isvce_cfg_params_t *ps_cfg; + + /* frame dimensions */ + WORD32 max_wd_luma, max_ht_luma; + WORD32 max_mb_rows, max_mb_cols, max_mb_cnt; + + /* temp var */ + WORD32 i, j; + WORD32 status = IV_SUCCESS; + + /* mem records */ + ps_mem_rec_base = ps_ip->s_ive_ip.ps_mem_rec; + + /* memset all allocated memory, except the first one. First buffer (i.e. i == MEM_REC_IV_OBJ) + is initialized by application before calling this init function */ + for(i = ISVCE_MEM_REC_CODEC; i < ISVCE_MEM_REC_CNT; i++) + { + ps_mem_rec = &ps_mem_rec_base[i]; + memset(ps_mem_rec->pv_base, 0, ps_mem_rec->u4_mem_size); + } + + /* Init mem records */ + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_CODEC]; + { + ps_codec_obj->pv_codec_handle = ps_mem_rec->pv_base; + ps_codec = (isvce_codec_t *) (ps_codec_obj->pv_codec_handle); + } + /* Init mem records_cabac ctxt */ + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_CABAC]; + { + ps_cabac = (isvce_cabac_ctxt_t *) (ps_mem_rec->pv_base); + } + + /* Init mem records mb info array for CABAC */ + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_CABAC_MB_INFO]; + { + ps_mb_map_ctxt_inc = (isvce_mb_info_ctxt_t *) (ps_mem_rec->pv_base); + } + + /* Note this memset can not be done in init() call, since init will called + during reset as well. And calling this during reset will mean all pointers + need to reinitialized */ + memset(ps_codec, 0, sizeof(isvce_codec_t)); + memset(ps_cabac, 0, sizeof(isvce_cabac_ctxt_t)); + + /* Set default Config Params */ + ps_cfg = &ps_codec->s_cfg; + isvce_set_default_params(ps_cfg); + + /* get new input dimensions that satisfy the SVC and libavc constraints + constraint 1) All layers of SVC should have dimensions that are a multiple of + 16 constraint 2) Dimension of Li layer = dimension of Li-1 layer * scaling + factor*/ + + isvce_get_svc_compliant_dimensions(ps_ip->s_svc_inp_params.u1_num_spatial_layers, + ps_ip->s_svc_inp_params.d_spatial_res_ratio, ps_ip->u4_wd, + ps_ip->u4_ht, &ps_cfg->u4_wd, &ps_cfg->u4_ht); + + /* Update config params as per input */ + ps_cfg->u4_max_wd = ps_cfg->u4_disp_wd = ALIGN16(ps_cfg->u4_wd); + ps_cfg->u4_max_ht = ps_cfg->u4_disp_ht = ALIGN16(ps_cfg->u4_ht); + ps_cfg->i4_wd_mbs = ps_cfg->u4_max_wd >> 4; + ps_cfg->i4_ht_mbs = ps_cfg->u4_max_ht >> 4; + ps_cfg->u4_max_ref_cnt = ps_ip->s_ive_ip.u4_max_ref_cnt; + ps_cfg->u4_max_reorder_cnt = ps_ip->s_ive_ip.u4_max_reorder_cnt; + ps_cfg->u4_max_level = ps_ip->s_ive_ip.u4_max_level; + ps_cfg->e_inp_color_fmt = ps_ip->s_ive_ip.e_inp_color_fmt; + ps_cfg->e_recon_color_fmt = ps_ip->s_ive_ip.e_recon_color_fmt; + ps_cfg->u4_max_framerate = ps_ip->s_ive_ip.u4_max_framerate; + for(i = 0; i < ps_ip->s_svc_inp_params.u1_num_spatial_layers; i++) + { + ps_cfg->au4_max_bitrate[i] = ps_ip->pu4_max_bitrate[i]; + } + ps_cfg->u4_num_bframes = ps_ip->s_ive_ip.u4_num_bframes; + ps_cfg->e_content_type = ps_ip->s_ive_ip.e_content_type; + ps_cfg->u4_max_srch_rng_x = ps_ip->s_ive_ip.u4_max_srch_rng_x; + ps_cfg->u4_max_srch_rng_y = ps_ip->s_ive_ip.u4_max_srch_rng_y; + ps_cfg->e_slice_mode = ps_ip->s_ive_ip.e_slice_mode; + ps_cfg->u4_slice_param = ps_ip->s_ive_ip.u4_slice_param; + ps_cfg->e_arch = ps_ip->s_ive_ip.e_arch; + ps_cfg->e_soc = ps_ip->s_ive_ip.e_soc; + ps_cfg->u4_enable_recon = ps_ip->s_ive_ip.u4_enable_recon; + ps_cfg->e_rc_mode = ps_ip->s_ive_ip.e_rc_mode; + ps_cfg->u4_disable_vui = ps_ip->b_use_default_vui; + + ps_cfg->s_svc_params.u1_num_temporal_layers = ps_ip->s_svc_inp_params.u1_num_temporal_layers; + + ps_cfg->s_svc_params.u1_num_spatial_layers = ps_ip->s_svc_inp_params.u1_num_spatial_layers; + + ps_cfg->s_svc_params.d_spatial_res_ratio = ps_ip->s_svc_inp_params.d_spatial_res_ratio; + + ps_cfg->b_nalu_info_export_enable = ps_ip->b_nalu_info_export_enable; + + /* frame dimensions */ + max_ht_luma = ALIGN16(ps_cfg->u4_ht); + max_wd_luma = ALIGN16(ps_cfg->u4_wd); + max_mb_rows = max_ht_luma / MB_SIZE; + max_mb_cols = max_wd_luma / MB_SIZE; + max_mb_cnt = max_mb_rows * max_mb_cols; + + /* Validate params */ + ps_op->s_ive_op.u4_error_code |= isvce_svc_inp_params_validate(ps_ip, ps_cfg); + + if(ps_op->s_ive_op.u4_error_code != IV_SUCCESS) + { + return IV_FAIL; + } + +#if defined(X86) + if((ps_cfg->e_arch != ARCH_X86_GENERIC) && (ps_cfg->e_arch != ARCH_X86_SSSE3) && + (ps_cfg->e_arch != ARCH_X86_SSE42)) + { + ps_cfg->e_arch = ARCH_X86_SSE42; + } +#else + if((ps_cfg->e_arch == ARCH_X86_GENERIC) || (ps_cfg->e_arch == ARCH_X86_SSSE3) || + (ps_cfg->e_arch == ARCH_X86_SSE42)) + { +#if defined(DISABLE_NEON) + ps_cfg->e_arch = ARCH_ARM_NONEON; +#elif defined(ARMV8) + ps_cfg->e_arch = ARCH_ARM_V8_NEON; +#else + ps_cfg->e_arch = ARCH_ARM_A7; +#endif + } +#endif + + if((ps_ip->s_ive_ip.u4_max_level < MIN_LEVEL) || (ps_ip->s_ive_ip.u4_max_level > MAX_LEVEL)) + { + ps_op->s_ive_op.u4_error_code |= IH264E_CODEC_LEVEL_NOT_SUPPORTED; + ps_cfg->u4_max_level = DEFAULT_MAX_LEVEL; + } + + if(ps_ip->s_ive_ip.u4_max_ref_cnt > MAX_REF_CNT) + { + ps_op->s_ive_op.u4_error_code |= IH264E_NUM_REF_UNSUPPORTED; + ps_cfg->u4_max_ref_cnt = MAX_REF_CNT; + } + + if(ps_ip->s_ive_ip.u4_max_reorder_cnt > MAX_REF_CNT) + { + ps_op->s_ive_op.u4_error_code |= IH264E_NUM_REORDER_UNSUPPORTED; + ps_cfg->u4_max_reorder_cnt = MAX_REF_CNT; + } + + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_BACKUP]; + { + ps_codec->ps_mem_rec_backup = (iv_mem_rec_t *) ps_mem_rec->pv_base; + + memcpy(ps_codec->ps_mem_rec_backup, ps_mem_rec_base, + ISVCE_MEM_REC_CNT * sizeof(iv_mem_rec_t)); + } + + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_ENTROPY]; + { + /* temp var */ + WORD32 size = 0, offset; + + for(i = 0; i < MAX_PROCESS_CTXT; i++) + { + if(i < MAX_PROCESS_CTXT / MAX_CTXT_SETS) + { + /* base ptr */ + UWORD8 *pu1_buf = ps_mem_rec->pv_base; + + /* reset size */ + size = 0; + + /* skip mb run */ + ps_codec->as_process[i].s_entropy.pi4_mb_skip_run = (WORD32 *) (pu1_buf + size); + size += sizeof(WORD32); + size = ALIGN8(size); + + /* entropy map */ + ps_codec->as_process[i].s_entropy.pu1_entropy_map = + (UWORD8 *) (pu1_buf + size + max_mb_cols); + /* size in bytes to store entropy status of an entire frame */ + size += (max_mb_cols * max_mb_rows); + /* add an additional 1 row of bytes to evade the special case of row 0 + */ + size += max_mb_cols; + size = ALIGN128(size); + + /* bit stream ptr */ + ps_codec->as_process[i].s_entropy.ps_bitstrm = (bitstrm_t *) (pu1_buf + size); + size += sizeof(ps_codec->as_process[i].s_entropy.ps_bitstrm); + size = ALIGN128(size); + +#if ENABLE_RE_ENC_AS_SKIP + /* bit stream ptr */ + ps_codec->as_process[i].s_entropy.ps_bitstrm_after_slice_hdr = + (bitstrm_t *) (pu1_buf + size); + size += sizeof(ps_codec->as_process[i].s_entropy.ps_bitstrm_after_slice_hdr); + size = ALIGN128(size); +#endif + + /* nnz luma */ + ps_codec->as_process[i].s_entropy.pu1_top_nnz_luma = (UWORD8(*)[4])(pu1_buf + size); + size += (max_mb_cols * 4 * sizeof(UWORD8)); + size = ALIGN128(size); + + /* nnz chroma */ + ps_codec->as_process[i].s_entropy.pu1_top_nnz_cbcr = (UWORD8(*)[4])(pu1_buf + size); + size += (max_mb_cols * 4 * sizeof(UWORD8)); + size = ALIGN128(size); + + /* ps_mb_qp_ctxt */ + ps_codec->as_process[i].s_entropy.ps_mb_qp_ctxt = (mb_qp_ctxt_t *) (pu1_buf + size); + size = ALIGN128(sizeof(ps_codec->as_process[i].s_entropy.ps_mb_qp_ctxt[0])); + + offset = size; + + /* cabac Context */ + ps_codec->as_process[i].s_entropy.ps_cabac = ps_cabac; + } + else + { + /* base ptr */ + UWORD8 *pu1_buf = ps_mem_rec->pv_base; + + /* reset size */ + size = offset; + + /* skip mb run */ + ps_codec->as_process[i].s_entropy.pi4_mb_skip_run = (WORD32 *) (pu1_buf + size); + size += sizeof(WORD32); + size = ALIGN8(size); + + /* entropy map */ + ps_codec->as_process[i].s_entropy.pu1_entropy_map = + (UWORD8 *) (pu1_buf + size + max_mb_cols); + /* size in bytes to store entropy status of an entire frame */ + size += (max_mb_cols * max_mb_rows); + /* add an additional 1 row of bytes to evade the special case of row 0 + */ + size += max_mb_cols; + size = ALIGN128(size); + + /* bit stream ptr */ + ps_codec->as_process[i].s_entropy.ps_bitstrm = (bitstrm_t *) (pu1_buf + size); + size += sizeof(ps_codec->as_process[i].s_entropy.ps_bitstrm); + size = ALIGN128(size); + +#if ENABLE_RE_ENC_AS_SKIP + /* bit stream ptr */ + ps_codec->as_process[i].s_entropy.ps_bitstrm_after_slice_hdr = + (bitstrm_t *) (pu1_buf + size); + size += sizeof(ps_codec->as_process[i].s_entropy.ps_bitstrm_after_slice_hdr); + size = ALIGN128(size); +#endif + + /* nnz luma */ + ps_codec->as_process[i].s_entropy.pu1_top_nnz_luma = + (UWORD8(*)[4])(UWORD8(*)[4])(pu1_buf + size); + size += (max_mb_cols * 4 * sizeof(UWORD8)); + size = ALIGN128(size); + + /* nnz chroma */ + ps_codec->as_process[i].s_entropy.pu1_top_nnz_cbcr = (UWORD8(*)[4])(pu1_buf + size); + size += (max_mb_cols * 4 * sizeof(UWORD8)); + size = ALIGN128(size); + + /* ps_mb_qp_ctxt */ + ps_codec->as_process[i].s_entropy.ps_mb_qp_ctxt = (mb_qp_ctxt_t *) (pu1_buf + size); + size = ALIGN128(sizeof(ps_codec->as_process[i].s_entropy.ps_mb_qp_ctxt[0])); + + /* cabac Context */ + ps_codec->as_process[i].s_entropy.ps_cabac = ps_cabac; + } + } + ps_codec->as_process[0].s_entropy.ps_cabac->ps_mb_map_ctxt_inc_base = ps_mb_map_ctxt_inc; + } + + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_MB_COEFF_DATA]; + { + /* temp var */ + WORD32 size = 0, size_of_row; + UWORD8 *pu1_buf = ps_mem_rec->pv_base; + + /* size of coeff data of 1 mb */ + size += sizeof(tu_sblk_coeff_data_t) * MAX_4x4_SUBBLKS; + + /* size of coeff data of 1 row of mb's */ + size *= max_mb_cols; + + /* align to avoid false sharing */ + size = ALIGN64(size); + size_of_row = size; + + /* size for one full frame */ + size *= max_mb_rows; + + ps_codec->u4_size_coeff_data = size_of_row; + + for(i = 0; i < MAX_PROCESS_CTXT; i++) + { + if(i < MAX_PROCESS_CTXT / MAX_CTXT_SETS) + { + ps_codec->as_process[i].pv_pic_mb_coeff_data = pu1_buf; + ps_codec->as_process[i].s_entropy.pv_pic_mb_coeff_data = pu1_buf; + } + else + { + ps_codec->as_process[i].pv_pic_mb_coeff_data = pu1_buf + size; + ps_codec->as_process[i].s_entropy.pv_pic_mb_coeff_data = pu1_buf + size; + } + } + } + + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_MB_HEADER_DATA]; + { + /* temp var */ + WORD32 size, size_of_row; + UWORD8 *pu1_buf = ps_mem_rec->pv_base; + + /* size of header data of 1 mb */ + size = sizeof(isvce_mb_hdr_t); + + /* size for 1 row of mbs */ + size = size * max_mb_cols; + + /* align to avoid any false sharing across threads */ + size = ALIGN64(size); + size_of_row = size; + + /* size for one full frame */ + size *= max_mb_rows; + + ps_codec->u4_size_header_data = size_of_row; + + for(i = 0; i < MAX_PROCESS_CTXT; i++) + { + if(i < MAX_PROCESS_CTXT / MAX_CTXT_SETS) + { + ps_codec->as_process[i].pv_pic_mb_header_data = pu1_buf; + ps_codec->as_process[i].s_entropy.pv_pic_mb_header_data = pu1_buf; + } + else + { + ps_codec->as_process[i].pv_pic_mb_header_data = pu1_buf + size; + ps_codec->as_process[i].s_entropy.pv_pic_mb_header_data = pu1_buf + size; + } + } + } + + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_MVBITS]; + { + /* max srch range x */ + UWORD32 u4_srch_range_x = ps_ip->s_ive_ip.u4_max_srch_rng_x; + + /* max srch range y */ + UWORD32 u4_srch_range_y = ps_ip->s_ive_ip.u4_max_srch_rng_y; + + /* max srch range */ + UWORD32 u4_max_srch_range = MAX(u4_srch_range_x, u4_srch_range_y); + + /* temp var */ + UWORD8 *pu1_buf = ps_mem_rec->pv_base; + + /* due to subpel */ + u4_max_srch_range <<= 2; + + // /* due to mv on either direction */ + // u4_max_srch_range = (u4_max_srch_range << 1); + + /* due to pred mv + zero */ + u4_max_srch_range = (u4_max_srch_range << 1) + 1; + + for(i = 0; i < MAX_PROCESS_CTXT; i++) + { + /* me ctxt */ + isvce_me_ctxt_t *ps_mem_ctxt = &(ps_codec->as_process[i].s_me_ctxt); + + /* init at zero mv */ + ps_mem_ctxt->pu1_mv_bits = pu1_buf + u4_max_srch_range; + } + } + + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_SPS]; + { + ps_codec->ps_sps_base = (sps_t *) ps_mem_rec->pv_base; + } + + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_PPS]; + { + ps_codec->ps_pps_base = (pps_t *) ps_mem_rec->pv_base; + } + + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_SVC_NALU_EXT]; + { + ps_codec->ps_svc_nalu_ext_base = ps_mem_rec->pv_base; + + for(i = 0; i < MAX_PROCESS_CTXT; i++) + { + if(i < MAX_PROCESS_CTXT / MAX_CTXT_SETS) + { + ps_codec->as_process[i].ps_svc_nalu_ext_base = ps_mem_rec->pv_base; + } + else + { + WORD32 size = SVC_MAX_SLICE_HDR_CNT * sizeof(slice_header_t); + void *pv_buf = (UWORD8 *) ps_mem_rec->pv_base + size; + + ps_codec->as_process[i].ps_svc_nalu_ext_base = pv_buf; + } + } + } + + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_SUBSET_SPS]; + { + ps_codec->ps_subset_sps_base = ps_mem_rec->pv_base; + for(i = 0; i < MAX_PROCESS_CTXT; i++) + { + ps_codec->as_process[i].ps_subset_sps_base = ps_mem_rec->pv_base; + } + } + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_SLICE_HDR]; + { + ps_codec->ps_slice_hdr_base = ps_mem_rec->pv_base; + + for(i = 0; i < MAX_PROCESS_CTXT; i++) + { + if(i < MAX_PROCESS_CTXT / MAX_CTXT_SETS) + { + ps_codec->as_process[i].ps_slice_hdr_base = ps_mem_rec->pv_base; + } + else + { + /* temp var */ + WORD32 size = SVC_MAX_SLICE_HDR_CNT * sizeof(slice_header_t); + void *pv_buf = (UWORD8 *) ps_mem_rec->pv_base + size; + + ps_codec->as_process[i].ps_slice_hdr_base = pv_buf; + } + } + } + + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_SVC_SLICE_HDR]; + { + ps_codec->ps_svc_slice_hdr_base = ps_mem_rec->pv_base; + + for(i = 0; i < MAX_PROCESS_CTXT; i++) + { + ps_codec->as_process[i].ps_svc_slice_hdr_base = ps_mem_rec->pv_base; + } + } + + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_AIR_MAP]; + { + /* temp var */ + UWORD8 *pu1_buf = ps_mem_rec->pv_base; + + for(i = 0; i < MAX_PROCESS_CTXT; i++) + { + if(i < MAX_PROCESS_CTXT / MAX_CTXT_SETS) + { + ps_codec->as_process[i].pu1_is_intra_coded = pu1_buf; + } + else + { + ps_codec->as_process[i].pu1_is_intra_coded = pu1_buf + max_mb_cnt; + } + } + + ps_codec->pu2_intr_rfrsh_map = (UWORD16 *) (pu1_buf + max_mb_cnt * MAX_CTXT_SETS); + } + + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_SLICE_MAP]; + { + /* pointer to storage space */ + UWORD8 *pu1_buf_ping, *pu1_buf_pong; + + /* init pointer */ + pu1_buf_ping = ps_mem_rec->pv_base; + pu1_buf_pong = pu1_buf_ping + ALIGN64(max_mb_cnt); + + for(i = 0; i < MAX_PROCESS_CTXT; i++) + { + if(i < MAX_PROCESS_CTXT / MAX_CTXT_SETS) + { + ps_codec->as_process[i].pu1_slice_idx = pu1_buf_ping; + } + else + { + ps_codec->as_process[i].pu1_slice_idx = pu1_buf_pong; + } + } + } + + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_THREAD_HANDLE]; + { + WORD32 handle_size = ithread_get_handle_size(); + + for(i = 0; i < MAX_PROCESS_THREADS; i++) + { + ps_codec->apv_proc_thread_handle[i] = + (UWORD8 *) ps_mem_rec->pv_base + (i * handle_size); + } + } + + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_CTL_MUTEX]; + { + ps_codec->pv_ctl_mutex = ps_mem_rec->pv_base; + } + + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_ENTROPY_MUTEX]; + { + ps_codec->pv_entropy_mutex = ps_mem_rec->pv_base; + } + + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_PROC_JOBQ]; + { + ps_codec->pv_proc_jobq_buf = ps_mem_rec->pv_base; + ps_codec->i4_proc_jobq_buf_size = ps_mem_rec->u4_mem_size; + } + + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_ENTROPY_JOBQ]; + { + ps_codec->pv_entropy_jobq_buf = ps_mem_rec->pv_base; + ps_codec->i4_entropy_jobq_buf_size = ps_mem_rec->u4_mem_size; + } + + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_PROC_MAP]; + { + /* pointer to storage space */ + UWORD8 *pu1_buf = ps_mem_rec->pv_base; + + /* total size of the mem record */ + WORD32 total_size = 0; + + /* size in bytes to mb core coding status of an entire frame */ + total_size = max_mb_cnt; + + /* add an additional 1 row of bytes to evade the special case of row 0 */ + total_size += max_mb_cols; + + for(i = 0; i < MAX_PROCESS_CTXT; i++) + { + if(i < MAX_PROCESS_CTXT / MAX_CTXT_SETS) + { + ps_codec->as_process[i].pu1_proc_map = pu1_buf + max_mb_cols; + } + else + { + ps_codec->as_process[i].pu1_proc_map = pu1_buf + total_size + max_mb_cols; + } + } + } + + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_DBLK_MAP]; + { + /* pointer to storage space */ + UWORD8 *pu1_buf = ps_mem_rec->pv_base; + + /* total size of the mem record */ + WORD32 total_size = 0; + + /* size in bytes to mb core coding status of an entire frame */ + total_size = max_mb_cnt; + + /* add an additional 1 row of bytes to evade the special case of row 0 */ + total_size += max_mb_cols; + + /*Align the memory offsets*/ + total_size = ALIGN64(total_size); + + for(i = 0; i < MAX_PROCESS_CTXT; i++) + { + if(i < MAX_PROCESS_CTXT / MAX_CTXT_SETS) + { + ps_codec->as_process[i].pu1_deblk_map = pu1_buf + max_mb_cols; + } + else + { + ps_codec->as_process[i].pu1_deblk_map = pu1_buf + total_size + max_mb_cols; + } + } + } + + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_ME_MAP]; + { + /* pointer to storage space */ + UWORD8 *pu1_buf = (UWORD8 *) ps_mem_rec->pv_base; + + /* total size of the mem record */ + WORD32 total_size = 0; + + /* size in bytes to mb core coding status of an entire frame */ + total_size = max_mb_cnt; + + /* add an additional 1 row of bytes to evade the special case of row 0 */ + total_size += max_mb_cols; + + for(i = 0; i < MAX_PROCESS_CTXT; i++) + { + if(i < MAX_PROCESS_CTXT / MAX_CTXT_SETS) + { + ps_codec->as_process[i].pu1_me_map = pu1_buf + max_mb_cols; + } + else + { + ps_codec->as_process[i].pu1_me_map = pu1_buf + total_size + max_mb_cols; + } + } + } + + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_DPB_MGR]; + { + ps_codec->pv_dpb_mgr = ps_mem_rec->pv_base; + } + + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_PROC_SCRATCH]; + { + /* pointer to storage space */ + UWORD8 *pu1_buf = (UWORD8 *) ps_mem_rec->pv_base; + + /* size of pred buffer, fwd transform output, temp buffer for inv tra */ + WORD32 size_pred_luma, size_pred_chroma, size_fwd, size_inv, size_hp; + + /* temp var */ + WORD32 size = 0; + + /* size to hold intra/inter prediction buffer */ + size_pred_luma = sizeof(UWORD8) * 16 * 16; + size_pred_chroma = sizeof(UWORD8) * 8 * 16; + + /* size to hold fwd transform output */ + size_fwd = sizeof(WORD16) * SIZE_TRANS_BUFF; + + /* size to hold temporary data during inverse transform */ + size_inv = sizeof(WORD32) * SIZE_TMP_BUFF_ITRANS; + + /* size to hold half pel plane buffers */ + size_hp = sizeof(UWORD8) * (HP_BUFF_WD * HP_BUFF_HT); + + for(i = 0; i < MAX_PROCESS_CTXT; i++) + { + /* prediction buffer */ + ps_codec->as_process[i].pu1_pred_mb = (void *) (pu1_buf + size); + ps_codec->as_process[i].i4_pred_strd = 16; + size += size_pred_luma; + size = ALIGN64(size); + + /* prediction buffer */ + ps_codec->as_process[i].pu1_ref_mb_intra_4x4 = (void *) (pu1_buf + size); + size += size_pred_luma; + size = ALIGN64(size); + + /* prediction buffer intra 16x16 */ + ps_codec->as_process[i].pu1_pred_mb_intra_16x16 = (void *) (pu1_buf + size); + size += size_pred_luma; + size = ALIGN64(size); + + /* prediction buffer intra 16x16 plane*/ + ps_codec->as_process[i].pu1_pred_mb_intra_16x16_plane = (void *) (pu1_buf + size); + size += size_pred_luma; + size = ALIGN64(size); + + /* prediction buffer intra chroma*/ + ps_codec->as_process[i].pu1_pred_mb_intra_chroma = (void *) (pu1_buf + size); + size += size_pred_chroma; + size = ALIGN64(size); + + /* prediction buffer intra chroma plane*/ + ps_codec->as_process[i].pu1_pred_mb_intra_chroma_plane = (void *) (pu1_buf + size); + size += size_pred_chroma; + size = ALIGN64(size); + + /* Fwd transform output */ + ps_codec->as_process[i].pi2_res_buf = (void *) (pu1_buf + size); + ps_codec->as_process[i].i4_res_strd = 16; + size += size_fwd; + size = ALIGN64(size); + + /* Fwd transform output */ + ps_codec->as_process[i].pi2_res_buf_intra_4x4 = (void *) (pu1_buf + size); + size += size_fwd; + size = ALIGN64(size); + + /* scratch buffer used during inverse transform */ + ps_codec->as_process[i].pv_scratch_buff = (void *) (pu1_buf + size); + size += size_inv; + size = ALIGN64(size); + + for(j = 0; j < SUBPEL_BUFF_CNT; j++) + { + ps_codec->as_process[i].apu1_subpel_buffs[j] = (pu1_buf + size); + size += ALIGN64(size_hp); + } + } + } + + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_QUANT_PARAM]; + { + /* pointer to storage space */ + UWORD8 *pu1_buf = (UWORD8 *) ps_mem_rec->pv_base; + + /* size of qp, threshold matrix, fwd scaling list for one plane */ + WORD32 size_quant_param, size_thres_mat, size_fwd_weight_mat, size_satqd_weight_mat; + + /* temp var */ + WORD32 total_size = 0; + + /* size of quantization parameter list of 1 plane */ + size_quant_param = ALIGN64(sizeof(quant_params_t)); + + /* size of threshold matrix for quantization + * (assuming the transform_8x8_flag is disabled). + * for 1 plane */ + size_thres_mat = ALIGN64(sizeof(WORD16) * 4 * 4); + + /* size of forward weight matrix for quantization + * (assuming the transform_8x8_flag is disabled). + * for 1 plane */ + size_fwd_weight_mat = ALIGN64(sizeof(WORD16) * 4 * 4); + + /* size of SATQD matrix*/ + size_satqd_weight_mat = ALIGN64(sizeof(UWORD16) * 9); + + for(i = 0; i < MAX_PROCESS_CTXT; i++) + { + quant_params_t **ps_qp_params = ps_codec->as_process[i].ps_qp_params; + + /* quantization param structure */ + ps_qp_params[0] = (quant_params_t *) (pu1_buf + total_size); + total_size = total_size + size_quant_param; + ps_qp_params[1] = (quant_params_t *) (pu1_buf + total_size); + total_size = total_size + size_quant_param; + ps_qp_params[2] = (quant_params_t *) (pu1_buf + total_size); + total_size = total_size + size_quant_param; + + /* threshold matrix for quantization */ + ps_qp_params[0]->pu2_thres_mat = (void *) (pu1_buf + total_size); + total_size = total_size + size_thres_mat; + ps_qp_params[1]->pu2_thres_mat = (void *) (pu1_buf + total_size); + total_size = total_size + size_thres_mat; + ps_qp_params[2]->pu2_thres_mat = (void *) (pu1_buf + total_size); + total_size = total_size + size_thres_mat; + + /* fwd weight matrix */ + ps_qp_params[0]->pu2_weigh_mat = (void *) (pu1_buf + total_size); + total_size = total_size + size_fwd_weight_mat; + ps_qp_params[1]->pu2_weigh_mat = (void *) (pu1_buf + total_size); + total_size = total_size + size_fwd_weight_mat; + ps_qp_params[2]->pu2_weigh_mat = (void *) (pu1_buf + total_size); + total_size = total_size + size_fwd_weight_mat; + + /* threshold matrix for SATQD */ + ps_qp_params[0]->pu2_sad_thrsh = (void *) (pu1_buf + total_size); + total_size = total_size + size_satqd_weight_mat; + ps_qp_params[1]->pu2_sad_thrsh = (void *) (pu1_buf + total_size); + total_size = total_size + size_satqd_weight_mat; + ps_qp_params[2]->pu2_sad_thrsh = (void *) (pu1_buf + total_size); + total_size = total_size + size_satqd_weight_mat; + + total_size = ALIGN128(total_size); + } + } + + isvce_svc_nbr_info_buf_init(ps_codec, &ps_mem_rec_base[ISVCE_MEM_REC_TOP_ROW_SYN_INFO]); + + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_BS_QP]; + { + UWORD8 *pu1_buf_ping; + + /* size in bytes to store vertical edge bs, horizontal edge bs and qp of + * every mb*/ + WORD32 vert_bs_size, horz_bs_size, qp_size; + + /* vertical edge bs = total number of vertical edges * number of bytes per + * each edge */ + /* total num of v edges = total mb * 4 (assuming transform_8x8_flag = 0), + * each edge is formed by 4 pairs of subblks, requiring 4 bytes to storing + * bs */ + vert_bs_size = ALIGN64(max_mb_cnt * 4 * 4); + + /* horizontal edge bs = total number of horizontal edges * number of bytes + * per each edge */ + /* total num of h edges = total mb * 4 (assuming transform_8x8_flag = 0), + * each edge is formed by 4 pairs of subblks, requiring 4 bytes to storing + * bs */ + horz_bs_size = ALIGN64(max_mb_cnt * 4 * 4); + + /* qp of each mb requires 1 byte */ + qp_size = ALIGN64(max_mb_cnt); + + for(i = 0; i < MAX_PROCESS_CTXT; i++) + { + pu1_buf_ping = (UWORD8 *) ps_mem_rec->pv_base; + + /* vertical edge bs storage space */ + ps_codec->as_process[i].s_deblk_ctxt.s_bs_ctxt.pu4_pic_vert_bs = + (UWORD32 *) pu1_buf_ping; + pu1_buf_ping += vert_bs_size; + + ps_codec->as_process[i].s_deblk_ctxt.s_bs_ctxt.pu4_intra_base_vert_bs = + (UWORD32 *) pu1_buf_ping; + pu1_buf_ping += vert_bs_size; + + /* horizontal edge bs storage space */ + ps_codec->as_process[i].s_deblk_ctxt.s_bs_ctxt.pu4_pic_horz_bs = + (UWORD32 *) pu1_buf_ping; + pu1_buf_ping += horz_bs_size; + + ps_codec->as_process[i].s_deblk_ctxt.s_bs_ctxt.pu4_intra_base_horz_bs = + (UWORD32 *) pu1_buf_ping; + pu1_buf_ping += horz_bs_size; + + /* qp */ + ps_codec->as_process[i].s_deblk_ctxt.s_bs_ctxt.pu1_pic_qp = (UWORD8 *) pu1_buf_ping; + pu1_buf_ping += qp_size; + } + } + + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_INP_PIC]; + { + ps_codec->pv_inp_buf_mgr_base = ps_mem_rec->pv_base; + } + + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_OUT]; + { + ps_codec->pv_out_buf_mgr_base = ps_mem_rec->pv_base; + } + + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_CSC]; + { + ps_codec->pu1_y_csc_buf_base = ps_mem_rec->pv_base; + ps_codec->pu1_uv_csc_buf_base = + (UWORD8 *) ps_mem_rec->pv_base + (max_ht_luma * max_wd_luma); + } + + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_REF_PIC]; + { + /* size of buf mgr struct */ + WORD32 size = ih264_buf_mgr_size(); + + /* temp var */ + UWORD8 *pu1_buf = ps_mem_rec->pv_base; + + /* pic buffer mgr */ + ps_codec->pv_ref_buf_mgr_base = pu1_buf; + + /* picture bank */ + ps_codec->ps_pic_buf_base = (svc_au_buf_t *) (pu1_buf + size); + ps_codec->i4_total_pic_buf_size = ps_mem_rec->u4_mem_size - size; + } + + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_MVBANK]; + { + /* size of buf mgr struct */ + WORD32 size = ih264_buf_mgr_size(); + + /* temp var */ + UWORD8 *pu1_buf = ps_mem_rec->pv_base; + + /* mv buffer mgr */ + ps_codec->pv_svc_au_data_store_mgr_base = pu1_buf; + + /* mv bank */ + ps_codec->ps_svc_au_data_base = (svc_au_data_t *) (pu1_buf + size); + ps_codec->i4_svc_au_data_size = ps_mem_rec->u4_mem_size - size; + } + + ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_MB_INFO_NMB]; + { + /* temp var */ + UWORD8 *pu1_buf = ps_mem_rec->pv_base; + + /* size of nmb ctxt */ + WORD32 size = max_mb_cols * sizeof(isvce_mb_info_nmb_t); + + WORD32 nmb_cntr, subpel_buf_size; + + /* init nmb info structure pointer in all proc ctxts */ + for(i = 0; i < MAX_PROCESS_CTXT; i++) + { + ps_codec->as_process[i].ps_nmb_info = (isvce_mb_info_nmb_t *) (pu1_buf); + + pu1_buf += size; + } + + /* Additional 4 bytes to allow use of '_mm_loadl_epi64' */ + subpel_buf_size = (MB_SIZE * MB_SIZE + 4) * sizeof(UWORD8); + + /* adjusting pointers for nmb halfpel buffer */ + for(i = 0; i < MAX_PROCESS_CTXT; i++) + { + isvce_mb_info_nmb_t *ps_mb_info_nmb = &ps_codec->as_process[i].ps_nmb_info[0]; + + for(nmb_cntr = 0; nmb_cntr < max_mb_cols; nmb_cntr++) + { + ps_mb_info_nmb[nmb_cntr].pu1_best_sub_pel_buf = pu1_buf; + + pu1_buf = pu1_buf + subpel_buf_size; + + ps_mb_info_nmb[nmb_cntr].u4_bst_spel_buf_strd = MB_SIZE; + } + } + } + + isvce_svc_inp_buf_init(ps_codec, &ps_mem_rec_base[ISVCE_MEM_SVC_SPAT_INP]); + + isvce_initialize_downscaler(&ps_codec->s_scaler, &ps_mem_rec_base[ISVCE_MEM_DOWN_SCALER], + ps_codec->s_cfg.s_svc_params.d_spatial_res_ratio, + ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers, + ps_codec->s_cfg.u4_wd, ps_codec->s_cfg.u4_ht, + ps_codec->s_cfg.e_arch); + + isvce_svc_ilp_buf_init(ps_codec, &ps_mem_rec_base[ISVCE_MEM_SVC_ILP_DATA]); + + isvce_ilp_mv_ctxt_init(ps_codec, &ps_mem_rec_base[ISVCE_MEM_SVC_ILP_MV_CTXT]); + + isvce_svc_res_pred_ctxt_init(ps_codec, &ps_mem_rec_base[ISVCE_MEM_SVC_RES_PRED_CTXT]); + + isvce_intra_pred_ctxt_init(ps_codec, &ps_mem_rec_base[ISVCE_MEM_SVC_INTRA_PRED_CTXT]); + + isvce_rc_utils_init(&ps_codec->s_rate_control.s_rc_utils, + &ps_mem_rec_base[ISVCE_MEM_SVC_RC_UTILS_CTXT], ps_codec->s_cfg.e_arch); + +#if ENABLE_MODE_STAT_VISUALISER + isvce_msv_ctxt_init(ps_codec, &ps_mem_rec_base[MEM_MODE_STAT_VISUALISER_BUF]); +#endif + + isvce_get_rate_control_mem_tab(&ps_codec->s_rate_control, &ps_mem_rec_base[ISVCE_MEM_REC_RC], + USE_BASE); + + isvce_sub_pic_rc_ctxt_init(ps_codec, &ps_mem_rec_base[ISVCE_MEM_SVC_SUB_PIC_RC_CTXT]); + + status = isvce_init(ps_codec); + + return status; +} + +/** +******************************************************************************* +* +* @brief +* Retrieves mem records passed to the codec +* +* @par Description: +* Retrieves mem recs passed during init +* +* @param[in] ps_codec_obj +* Pointer to codec object at API level +* +* @param[in] pv_api_ip +* Pointer to input argument structure +* +* @param[out] pv_api_op +* Pointer to output argument structure +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +static WORD32 isvce_retrieve_memrec(iv_obj_t *ps_codec_obj, void *pv_api_ip, void *pv_api_op) +{ + isvce_codec_t *ps_codec = (isvce_codec_t *) ps_codec_obj->pv_codec_handle; + + /* ctrl call I/O structures */ + isvce_retrieve_mem_rec_ip_t *ps_ip = pv_api_ip; + isvce_retrieve_mem_rec_op_t *ps_op = pv_api_op; + + if(ps_codec->i4_init_done != 1) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_FATALERROR; + ps_op->s_ive_op.u4_error_code |= IH264E_INIT_NOT_DONE; + return IV_FAIL; + } + + /* join threads upon at end of sequence */ + isvce_join_threads(ps_codec); + + /* collect list of memory records used by the encoder library */ + memcpy(ps_ip->s_ive_ip.ps_mem_rec, ps_codec->ps_mem_rec_backup, + ISVCE_MEM_REC_CNT * (sizeof(iv_mem_rec_t))); + ps_op->s_ive_op.u4_num_mem_rec_filled = ISVCE_MEM_REC_CNT; + + /* clean up mutex memory */ + ih264_list_free(ps_codec->pv_entropy_jobq); + ih264_list_free(ps_codec->pv_proc_jobq); + ithread_mutex_destroy(ps_codec->pv_ctl_mutex); + ithread_mutex_destroy(ps_codec->pv_entropy_mutex); + + ih264_buf_mgr_free((buf_mgr_t *) ps_codec->pv_svc_au_data_store_mgr); + ih264_buf_mgr_free((buf_mgr_t *) ps_codec->pv_ref_buf_mgr); + ih264_buf_mgr_free((buf_mgr_t *) ps_codec->pv_inp_buf_mgr); + ih264_buf_mgr_free((buf_mgr_t *) ps_codec->pv_out_buf_mgr); + +#if ENABLE_MODE_STAT_VISUALISER + isvce_msv_ctxt_delete(ps_codec->ps_mode_stat_visualiser); +#endif + + isvce_sub_pic_rc_ctxt_delete(ps_codec->as_process->ps_sub_pic_rc_ctxt); + + return IV_SUCCESS; +} + +/** +******************************************************************************* +* +* @brief +* Sets the encoder in flush mode. +* +* @par Description: +* Sets the encoder in flush mode +* +* @param[in] ps_codec_obj +* Pointer to codec object at API level +* +* @param[in] pv_api_ip +* Pointer to input argument structure +* +* @param[out] pv_api_op +* Pointer to output argument structure +* +* @returns error status +* +* @remarks This call has no real effect on encoder +* +******************************************************************************* +*/ +static WORD32 isvce_set_flush_mode(iv_obj_t *ps_codec_obj, void *pv_api_ip, void *pv_api_op) +{ + /* codec ctxt */ + isvce_codec_t *ps_codec = (isvce_codec_t *) ps_codec_obj->pv_codec_handle; + + /* ctrl call I/O structures */ + isvce_ctl_flush_op_t *ps_ctl_op = pv_api_op; + + UNUSED(pv_api_ip); + + ps_ctl_op->s_ive_op.u4_error_code = 0; + + /* signal flush frame control call */ + ps_codec->i4_flush_mode = 1; + + return IV_SUCCESS; +} + +/** +******************************************************************************* +* +* @brief +* Gets encoder buffer requirements +* +* @par Description: +* Gets the encoder buffer requirements. Basing on max width and max height +* configuration settings, this routine, computes the sizes of necessary input, +* output buffers returns this info to callee. +* +* @param[in] ps_codec_obj +* Pointer to codec object at API level +* +* @param[in] pv_api_ip +* Pointer to input argument structure +* +* @param[out] pv_api_op +* Pointer to output argument structure +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +static WORD32 isvce_get_buf_info(void *pv_codec_handle, void *pv_api_ip, void *pv_api_op) +{ + WORD32 i; + UWORD32 wd, ht; + + isvce_codec_t *ps_codec = (isvce_codec_t *) pv_codec_handle; + isvce_ctl_getbufinfo_ip_t *ps_ip = pv_api_ip; + isvce_ctl_getbufinfo_op_t *ps_op = pv_api_op; + + isvce_get_svc_compliant_dimensions(ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers, + ps_codec->s_cfg.s_svc_params.d_spatial_res_ratio, + ALIGN16(ps_ip->s_ive_ip.u4_max_wd), + ALIGN16(ps_ip->s_ive_ip.u4_max_ht), &wd, &ht); + + ps_op->s_ive_op.u4_error_code = 0; + + /* Number of components in input buffers required for codec & + * Minimum sizes of each component in input buffer required */ + if(ps_ip->s_ive_ip.e_inp_color_fmt == IV_YUV_420P) + { + ps_op->s_ive_op.u4_inp_comp_cnt = MIN_RAW_BUFS_420_COMP; + + ps_op->s_ive_op.au4_min_in_buf_size[0] = wd * ht; + ps_op->s_ive_op.au4_min_in_buf_size[1] = (wd >> 1) * (ht >> 1); + ps_op->s_ive_op.au4_min_in_buf_size[2] = (wd >> 1) * (ht >> 1); + } + else if(ps_ip->s_ive_ip.e_inp_color_fmt == IV_YUV_422ILE) + { + ps_op->s_ive_op.u4_inp_comp_cnt = MIN_RAW_BUFS_422ILE_COMP; + + ps_op->s_ive_op.au4_min_in_buf_size[0] = wd * ht * 2; + ps_op->s_ive_op.au4_min_in_buf_size[1] = ps_op->s_ive_op.au4_min_in_buf_size[2] = 0; + } + else if(ps_ip->s_ive_ip.e_inp_color_fmt == IV_RGB_565) + { + ps_op->s_ive_op.u4_inp_comp_cnt = MIN_RAW_BUFS_RGB565_COMP; + + ps_op->s_ive_op.au4_min_in_buf_size[0] = wd * ht * 2; + ps_op->s_ive_op.au4_min_in_buf_size[1] = ps_op->s_ive_op.au4_min_in_buf_size[2] = 0; + } + else if(ps_ip->s_ive_ip.e_inp_color_fmt == IV_RGBA_8888) + { + ps_op->s_ive_op.u4_inp_comp_cnt = MIN_RAW_BUFS_RGBA8888_COMP; + + ps_op->s_ive_op.au4_min_in_buf_size[0] = wd * ht * 4; + ps_op->s_ive_op.au4_min_in_buf_size[1] = ps_op->s_ive_op.au4_min_in_buf_size[2] = 0; + } + else if((ps_ip->s_ive_ip.e_inp_color_fmt == IV_YUV_420SP_UV) || + (ps_ip->s_ive_ip.e_inp_color_fmt == IV_YUV_420SP_VU)) + { + ps_op->s_ive_op.u4_inp_comp_cnt = MIN_RAW_BUFS_420SP_COMP; + + ps_op->s_ive_op.au4_min_in_buf_size[0] = wd * ht; + ps_op->s_ive_op.au4_min_in_buf_size[1] = wd * (ht >> 1); + ps_op->s_ive_op.au4_min_in_buf_size[2] = 0; + } + + /* Number of components in output buffers required for codec & + * Minimum sizes of each component in output buffer required */ + ps_op->s_ive_op.u4_out_comp_cnt = MIN_BITS_BUFS_COMP; + + for(i = 0; i < (WORD32) ps_op->s_ive_op.u4_out_comp_cnt; i++) + { + ps_op->s_ive_op.au4_min_out_buf_size[i] = + MAX(((wd * ht * 3) >> 1) * ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers, + MIN_STREAM_SIZE); + } + + ps_op->u4_rec_comp_cnt = MIN_RAW_BUFS_420_COMP; + ps_op->au4_min_rec_buf_size[0] = wd * ht; + ps_op->au4_min_rec_buf_size[1] = (wd >> 1) * (ht >> 1); + ps_op->au4_min_rec_buf_size[2] = (wd >> 1) * (ht >> 1); + + if(ps_codec->s_cfg.b_nalu_info_export_enable) + { + ps_op->u4_min_nalu_info_buf_size = + isvce_get_nalu_info_buf_size(ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers); + } + else + { + ps_op->u4_min_nalu_info_buf_size = 0; + } + + ps_op->s_ive_op.u4_min_inp_bufs = MIN_INP_BUFS; + ps_op->s_ive_op.u4_min_out_bufs = MIN_OUT_BUFS; + ps_op->u4_min_rec_bufs = MIN_OUT_BUFS; + ps_op->u4_min_nalu_info_bufs = MIN_OUT_BUFS; + + return IV_SUCCESS; +} + +/** +******************************************************************************* +* +* @brief +* Sets the picture dimensions +* +* @par Description: +* Sets width, height, display width, display height and strides +* +* @param[in] pv_api_ip +* Pointer to input argument structure +* +* @param[out] pv_api_op +* Pointer to output argument structure +* +* @param[out] ps_cfg +* Pointer to config structure to be updated +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +static IV_STATUS_T isvce_set_dimensions(void *pv_api_ip, void *pv_api_op, + isvce_cfg_params_t *ps_cfg) +{ + isvce_ctl_set_dimensions_ip_t *ps_ip = pv_api_ip; + isvce_ctl_set_dimensions_op_t *ps_op = pv_api_op; + + ps_op->s_ive_op.u4_error_code = 0; + + isvce_get_svc_compliant_dimensions( + ps_cfg->s_svc_params.u1_num_spatial_layers, ps_cfg->s_svc_params.d_spatial_res_ratio, + ps_ip->s_ive_ip.u4_wd, ps_ip->s_ive_ip.u4_ht, &ps_cfg->u4_wd, &ps_cfg->u4_ht); + + ASSERT(0 == (ps_cfg->u4_wd % MB_SIZE)); + ASSERT(0 == (ps_cfg->u4_ht % MB_SIZE)); + + ps_cfg->i4_wd_mbs = ps_cfg->u4_wd / MB_SIZE; + ps_cfg->i4_ht_mbs = ps_cfg->u4_ht / MB_SIZE; + ps_cfg->u4_disp_wd = ps_cfg->u4_wd; + ps_cfg->u4_disp_ht = ps_cfg->u4_ht; + + ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high; + ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low; + + return IV_SUCCESS; +} + +/** +******************************************************************************* +* +* @brief +* Provide dimensions used for encoding +* +* @param[in] pv_api_ip +* Pointer to input argument structure +* +* @param[out] pv_api_op +* Pointer to output argument structure +* +* @param[out] ps_cfg +* Pointer to config structure +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +static IV_STATUS_T isvce_get_enc_frame_dimensions(isvce_ctl_get_enc_dimensions_ip_t *ps_ip, + isvce_ctl_get_enc_dimensions_op_t *ps_op, + isvce_cfg_params_t *ps_cfg) +{ + ps_op->u4_error_code = IVE_ERR_NONE; + + isvce_get_svc_compliant_dimensions(ps_cfg->s_svc_params.u1_num_spatial_layers, + ps_cfg->s_svc_params.d_spatial_res_ratio, + ps_ip->u4_inp_frame_wd, ps_ip->u4_inp_frame_ht, + &ps_op->u4_enc_frame_wd, &ps_op->u4_enc_frame_ht); + + ASSERT(ps_cfg->u4_wd == ps_op->u4_enc_frame_wd); + ASSERT(ps_cfg->u4_ht == ps_op->u4_enc_frame_ht); + + return IV_SUCCESS; +} + +/** +******************************************************************************* +* +* @brief +* Sets source and target frame rates +* +* @par Description: +* Sets source and target frame rates +* +* @param[in] pv_api_ip +* Pointer to input argument structure +* +* @param[out] pv_api_op +* Pointer to output argument structure +* +* @param[out] ps_cfg +* Pointer to config structure to be updated +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +static IV_STATUS_T isvce_set_frame_rate(void *pv_api_ip, void *pv_api_op, + isvce_cfg_params_t *ps_cfg) +{ + /* ctrl call I/O structures */ + isvce_ctl_set_frame_rate_ip_t *ps_ip = pv_api_ip; + isvce_ctl_set_frame_rate_op_t *ps_op = pv_api_op; + + ps_op->s_ive_op.u4_error_code = 0; + + ps_cfg->u4_src_frame_rate = ps_ip->s_ive_ip.u4_src_frame_rate; + ps_cfg->u4_tgt_frame_rate = ps_ip->s_ive_ip.u4_tgt_frame_rate; + + ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high; + ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low; + + return IV_SUCCESS; +} + +/** +******************************************************************************* +* +* @brief +* Sets target bit rate +* +* @par Description: +* Sets target bit rate +* +* @param[in] pv_api_ip +* Pointer to input argument structure +* +* @param[out] pv_api_op +* Pointer to output argument structure +* +* @param[out] ps_cfg +* Pointer to config structure to be updated +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +static IV_STATUS_T isvce_set_bit_rate(void *pv_api_ip, void *pv_api_op, isvce_cfg_params_t *ps_cfg) +{ + /* ctrl call I/O structures */ + isvce_ctl_set_bitrate_ip_t *ps_ip = pv_api_ip; + isvce_ctl_set_bitrate_op_t *ps_op = pv_api_op; + WORD8 i; + + ps_op->s_ive_op.u4_error_code = 0; + + for(i = 0; i < ps_cfg->s_svc_params.u1_num_spatial_layers; i++) + { + ps_cfg->au4_target_bitrate[i] = ps_ip->pu4_target_bitrate[i]; + } + + ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high; + ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low; + + return IV_SUCCESS; +} + +/** +******************************************************************************* +* +* @brief +* Sets frame type +* +* @par Description: +* Sets frame type +* +* @param[in] pv_api_ip +* Pointer to input argument structure +* +* @param[out] pv_api_op +* Pointer to output argument structure +* +* @param[out] ps_cfg +* Pointer to config structure to be updated +* +* @returns error status +* +* @remarks not a sticky tag +* +******************************************************************************* +*/ +static IV_STATUS_T isvce_set_frame_type(void *pv_api_ip, void *pv_api_op, + isvce_cfg_params_t *ps_cfg) +{ + /* ctrl call I/O structures */ + isvce_ctl_set_frame_type_ip_t *ps_ip = pv_api_ip; + isvce_ctl_set_frame_type_op_t *ps_op = pv_api_op; + + ps_op->s_ive_op.u4_error_code = 0; + + ps_cfg->e_frame_type = ps_ip->s_ive_ip.e_frame_type; + + ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high; + ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low; + + return IV_SUCCESS; +} + +/** +******************************************************************************* +* +* @brief +* Sets quantization params +* +* @par Description: +* Sets the max, min and default qp for I frame, P frame and B frame +* +* @param[in] pv_api_ip +* Pointer to input argument structure +* +* @param[out] pv_api_op +* Pointer to output argument structure +* +* @param[out] ps_cfg +* Pointer to config structure to be updated +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +static IV_STATUS_T isvce_set_qp(void *pv_api_ip, void *pv_api_op, isvce_cfg_params_t *ps_cfg) +{ + /* ctrl call I/O structures */ + isvce_ctl_set_qp_ip_t *ps_set_qp_ip = pv_api_ip; + isvce_ctl_set_qp_op_t *ps_set_qp_op = pv_api_op; + WORD8 i; + + ps_set_qp_op->s_ive_op.u4_error_code = 0; + + for(i = 0; i < ps_cfg->s_svc_params.u1_num_spatial_layers; i++) + { + ps_cfg->au4_i_qp_max[i] = + CLIP3(MIN_H264_QP, MAX_H264_QP, (WORD32) ps_set_qp_ip->pu4_i_qp_max[i]); + ps_cfg->au4_i_qp_min[i] = + CLIP3(MIN_H264_QP, MAX_H264_QP, (WORD32) ps_set_qp_ip->pu4_i_qp_min[i]); + ps_cfg->au4_i_qp[i] = CLIP3(ps_set_qp_ip->pu4_i_qp_min[i], ps_set_qp_ip->pu4_i_qp_max[i], + ps_set_qp_ip->pu4_i_qp[i]); + ps_cfg->au4_i_qp_max[i] = + CLIP3(MIN_H264_QP, MAX_H264_QP, (WORD32) ps_set_qp_ip->pu4_i_qp_max[i]); + ps_cfg->au4_i_qp_min[i] = + CLIP3(MIN_H264_QP, MAX_H264_QP, (WORD32) ps_set_qp_ip->pu4_i_qp_min[i]); + ps_cfg->au4_i_qp[i] = CLIP3(ps_set_qp_ip->pu4_i_qp_min[i], ps_set_qp_ip->pu4_i_qp_max[i], + ps_set_qp_ip->pu4_i_qp[i]); + ps_cfg->au4_i_qp_max[i] = + CLIP3(MIN_H264_QP, MAX_H264_QP, (WORD32) ps_set_qp_ip->pu4_i_qp_max[i]); + ps_cfg->au4_i_qp_min[i] = + CLIP3(MIN_H264_QP, MAX_H264_QP, (WORD32) ps_set_qp_ip->pu4_i_qp_min[i]); + ps_cfg->au4_i_qp[i] = CLIP3(ps_set_qp_ip->pu4_i_qp_min[i], ps_set_qp_ip->pu4_i_qp_max[i], + ps_set_qp_ip->pu4_i_qp[i]); + } + + ps_cfg->u4_timestamp_high = ps_set_qp_ip->s_ive_ip.u4_timestamp_high; + ps_cfg->u4_timestamp_low = ps_set_qp_ip->s_ive_ip.u4_timestamp_low; + + return IV_SUCCESS; +} + +/** +******************************************************************************* +* +* @brief +* Sets encoding mode +* +* @par Description: +* Sets encoding mode +* +* @param[in] pv_api_ip +* Pointer to input argument structure +* +* @param[out] pv_api_op +* Pointer to output argument structure +* +* @param[out] ps_cfg +* Pointer to config structure to be updated +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +static IV_STATUS_T isvce_set_enc_mode(void *pv_api_ip, void *pv_api_op, isvce_cfg_params_t *ps_cfg) +{ + /* ctrl call I/O structures */ + isvce_ctl_set_enc_mode_ip_t *ps_ip = pv_api_ip; + isvce_ctl_set_enc_mode_op_t *ps_op = pv_api_op; + + ps_op->s_ive_op.u4_error_code = 0; + + ps_cfg->e_enc_mode = ps_ip->s_ive_ip.e_enc_mode; + + ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high; + ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low; + + return IV_SUCCESS; +} + +/** +******************************************************************************* +* +* @brief +* Sets vbv parameters +* +* @par Description: +* Sets vbv parameters +* +* @param[in] pv_api_ip +* Pointer to input argument structure +* +* @param[out] pv_api_op +* Pointer to output argument structure +* +* @param[out] ps_cfg +* Pointer to config structure to be updated +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +static IV_STATUS_T isvce_set_vbv_params(void *pv_api_ip, void *pv_api_op, + isvce_cfg_params_t *ps_cfg) +{ + /* ctrl call I/O structures */ + isvce_ctl_set_vbv_params_ip_t *ps_ip = pv_api_ip; + isvce_ctl_set_vbv_params_op_t *ps_op = pv_api_op; + WORD8 i; + + ps_op->s_ive_op.u4_error_code = 0; + + for(i = 0; i < ps_cfg->s_svc_params.u1_num_spatial_layers; i++) + { + ps_cfg->au4_vbv_buffer_delay[i] = ps_ip->pu4_vbv_buffer_delay[i]; + } + + ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high; + ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low; + + return IV_SUCCESS; +} + +/** +******************************************************************************* +* +* @brief +* Sets AIR parameters +* +* @par Description: +* Sets AIR parameters +* +* @param[in] pv_api_ip +* Pointer to input argument structure +* +* @param[out] pv_api_op +* Pointer to output argument structure +* +* @param[out] ps_cfg +* Pointer to config structure to be updated +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +static IV_STATUS_T isvc_set_air_params(void *pv_api_ip, void *pv_api_op, isvce_cfg_params_t *ps_cfg) +{ + /* ctrl call I/O structures */ + isvce_ctl_set_air_params_ip_t *ps_ip = pv_api_ip; + isvce_ctl_set_air_params_op_t *ps_op = pv_api_op; + + ps_op->s_ive_op.u4_error_code = 0; + + ps_cfg->e_air_mode = ps_ip->s_ive_ip.e_air_mode; + ps_cfg->u4_air_refresh_period = ps_ip->s_ive_ip.u4_air_refresh_period; + + ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high; + ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low; + + return IV_SUCCESS; +} + +/** +******************************************************************************* +* +* @brief +* Sets motion estimation parameters +* +* @par Description: +* Sets motion estimation parameters +* +* @param[in] pv_api_ip +* Pointer to input argument structure +* +* @param[out] pv_api_op +* Pointer to output argument structure +* +* @param[out] ps_cfg +* Pointer to config structure to be updated +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +static IV_STATUS_T isvc_set_me_params(void *pv_api_ip, void *pv_api_op, isvce_cfg_params_t *ps_cfg) +{ + /* ctrl call I/O structures */ + isvce_ctl_set_me_params_ip_t *ps_ip = pv_api_ip; + isvce_ctl_set_me_params_op_t *ps_op = pv_api_op; + + ps_op->s_ive_op.u4_error_code = 0; + + ps_cfg->u4_enable_hpel = ps_ip->s_ive_ip.u4_enable_hpel; + ps_cfg->u4_enable_qpel = ps_ip->s_ive_ip.u4_enable_qpel; + ps_cfg->u4_enable_fast_sad = ps_ip->s_ive_ip.u4_enable_fast_sad; + ps_cfg->u4_enable_alt_ref = ps_ip->s_ive_ip.u4_enable_alt_ref; + ps_cfg->u4_srch_rng_x = ps_ip->s_ive_ip.u4_srch_rng_x; + ps_cfg->u4_srch_rng_y = ps_ip->s_ive_ip.u4_srch_rng_y; + ps_cfg->u4_me_speed_preset = ps_ip->s_ive_ip.u4_me_speed_preset; + + ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high; + ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low; + + return IV_SUCCESS; +} + +/** +******************************************************************************* +* +* @brief +* Sets Intra/Inter Prediction estimation parameters +* +* @par Description: +* Sets Intra/Inter Prediction estimation parameters +* +* @param[in] pv_api_ip +* Pointer to input argument structure +* +* @param[out] pv_api_op +* Pointer to output argument structure +* +* @param[out] ps_cfg +* Pointer to config structure to be updated +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +static IV_STATUS_T isvc_set_ipe_params(void *pv_api_ip, void *pv_api_op, isvce_cfg_params_t *ps_cfg) +{ + /* ctrl call I/O structures */ + isvce_ctl_set_ipe_params_ip_t *ps_ip = pv_api_ip; + isvce_ctl_set_ipe_params_op_t *ps_op = pv_api_op; + + ps_op->s_ive_op.u4_error_code = 0; + + ps_cfg->u4_enable_intra_4x4 = ps_ip->s_ive_ip.u4_enable_intra_4x4; + ps_cfg->u4_enc_speed_preset = ps_ip->s_ive_ip.u4_enc_speed_preset; + + ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high; + ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low; + + return IV_SUCCESS; +} + +/** +******************************************************************************* +* +* @brief +* Sets GOP parameters +* +* @par Description: +* Sets GOP parameters +* +* @param[in] pv_api_ip +* Pointer to input argument structure +* +* @param[out] pv_api_op +* Pointer to output argument structure +* +* @param[out] ps_cfg +* Pointer to config structure to be updated +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +static IV_STATUS_T isvc_set_gop_params(void *pv_api_ip, void *pv_api_op, isvce_cfg_params_t *ps_cfg) +{ + /* ctrl call I/O structures */ + isvce_ctl_set_gop_params_ip_t *ps_ip = pv_api_ip; + isvce_ctl_set_gop_params_op_t *ps_op = pv_api_op; + + ps_op->s_ive_op.u4_error_code = 0; + + ps_cfg->u4_i_frm_interval = ps_ip->s_ive_ip.u4_i_frm_interval; + ps_cfg->u4_idr_frm_interval = ps_ip->s_ive_ip.u4_idr_frm_interval; + + ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high; + ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low; + + return IV_SUCCESS; +} + +/** +******************************************************************************* +* +* @brief +* Sets profile parameters +* +* @par Description: +* Sets profile parameters +* +* @param[in] pv_api_ip +* Pointer to input argument structure +* +* @param[out] pv_api_op +* Pointer to output argument structure +* +* @param[out] ps_cfg +* Pointer to config structure to be updated +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +static IV_STATUS_T isvc_set_profile_params(void *pv_api_ip, void *pv_api_op, + isvce_cfg_params_t *ps_cfg) +{ + /* ctrl call I/O structures */ + isvce_ctl_set_profile_params_ip_t *ps_ip = pv_api_ip; + isvce_ctl_set_profile_params_op_t *ps_op = pv_api_op; + + ps_op->s_ive_op.u4_error_code = 0; + + ps_cfg->e_profile = ps_ip->s_ive_ip.e_profile; + + ps_cfg->u4_entropy_coding_mode = ps_ip->s_ive_ip.u4_entropy_coding_mode; + + ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high; + ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low; + + return IV_SUCCESS; +} + +/** +******************************************************************************* +* +* @brief +* Sets disable deblock level +* +* @par Description: +* Sets disable deblock level. Level 0 means no disabling and level 4 means +* disable completely. 1, 2, 3 are intermediate levels that control amount +* of deblocking done. +* +* @param[in] ps_codec_obj +* Pointer to codec object at API level +* +* @param[in] pv_api_ip +* Pointer to input argument structure +* +* @param[out] pv_api_op +* Pointer to output argument structure +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +static WORD32 isvc_set_deblock_params(void *pv_api_ip, void *pv_api_op, isvce_cfg_params_t *ps_cfg) +{ + /* ctrl call I/O structures */ + isvce_ctl_set_deblock_params_ip_t *ps_ip = pv_api_ip; + isvce_ctl_set_deblock_params_op_t *ps_op = pv_api_op; + + ps_op->s_ive_op.u4_error_code = 0; + + ps_cfg->u4_disable_deblock_level = ps_ip->s_ive_ip.u4_disable_deblock_level; + + ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high; + ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low; + + return IV_SUCCESS; +} +/** + ******************************************************************************* + * + * @brief + * Sets vui params + * + * @par Description: + * Video usability information + * + * @param[in] pv_api_ip + * Pointer to input argument structure + * + * @param[out] pv_api_op + * Pointer to output argument structure + * + * @param[out] ps_cfg + * Pointer to config structure to be updated + * + * @returns error status + * + * @remarks none + * + ******************************************************************************* + */ +static WORD32 isvce_set_vui_params(void *pv_api_ip, void *pv_api_op, isvce_cfg_params_t *ps_cfg) +{ + /* ctrl call I/O structures */ + isvce_vui_ip_t *ps_ip = pv_api_ip; + isvce_vui_op_t *ps_op = pv_api_op; + vui_t *ps_vui = &ps_cfg->s_vui; + + ps_op->u4_error_code = 0; + + ps_vui->u1_aspect_ratio_info_present_flag = ps_ip->u1_aspect_ratio_info_present_flag; + ps_vui->u1_aspect_ratio_idc = ps_ip->u1_aspect_ratio_idc; + ps_vui->u2_sar_width = ps_ip->u2_sar_width; + ps_vui->u2_sar_height = ps_ip->u2_sar_height; + ps_vui->u1_overscan_info_present_flag = ps_ip->u1_overscan_info_present_flag; + ps_vui->u1_overscan_appropriate_flag = ps_ip->u1_overscan_appropriate_flag; + ps_vui->u1_video_signal_type_present_flag = ps_ip->u1_video_signal_type_present_flag; + ps_vui->u1_video_format = ps_ip->u1_video_format; + ps_vui->u1_video_full_range_flag = ps_ip->u1_video_full_range_flag; + ps_vui->u1_colour_description_present_flag = ps_ip->u1_colour_description_present_flag; + ps_vui->u1_colour_primaries = ps_ip->u1_colour_primaries; + ps_vui->u1_transfer_characteristics = ps_ip->u1_transfer_characteristics; + ps_vui->u1_matrix_coefficients = ps_ip->u1_matrix_coefficients; + ps_vui->u1_chroma_loc_info_present_flag = ps_ip->u1_chroma_loc_info_present_flag; + ps_vui->u1_chroma_sample_loc_type_top_field = ps_ip->u1_chroma_sample_loc_type_top_field; + ps_vui->u1_chroma_sample_loc_type_bottom_field = ps_ip->u1_chroma_sample_loc_type_bottom_field; + ps_vui->u1_vui_timing_info_present_flag = ps_ip->u1_vui_timing_info_present_flag; + ps_vui->u4_vui_num_units_in_tick = ps_ip->u4_vui_num_units_in_tick; + ps_vui->u4_vui_time_scale = ps_ip->u4_vui_time_scale; + ps_vui->u1_fixed_frame_rate_flag = ps_ip->u1_fixed_frame_rate_flag; + ps_vui->u1_nal_hrd_parameters_present_flag = ps_ip->u1_nal_hrd_parameters_present_flag; + ps_vui->u1_vcl_hrd_parameters_present_flag = ps_ip->u1_vcl_hrd_parameters_present_flag; + ps_vui->u1_low_delay_hrd_flag = ps_ip->u1_low_delay_hrd_flag; + ps_vui->u1_pic_struct_present_flag = ps_ip->u1_pic_struct_present_flag; + ps_vui->u1_bitstream_restriction_flag = ps_ip->u1_bitstream_restriction_flag; + ps_vui->u1_motion_vectors_over_pic_boundaries_flag = + ps_ip->u1_motion_vectors_over_pic_boundaries_flag; + ps_vui->u1_max_bytes_per_pic_denom = ps_ip->u1_max_bytes_per_pic_denom; + ps_vui->u1_max_bits_per_mb_denom = ps_ip->u1_max_bits_per_mb_denom; + ps_vui->u1_log2_max_mv_length_horizontal = ps_ip->u1_log2_max_mv_length_horizontal; + ps_vui->u1_log2_max_mv_length_vertical = ps_ip->u1_log2_max_mv_length_vertical; + ps_vui->u1_num_reorder_frames = ps_ip->u1_num_reorder_frames; + ps_vui->u1_max_dec_frame_buffering = ps_ip->u1_max_dec_frame_buffering; + + return IV_SUCCESS; +} + +/** + ******************************************************************************* + * + * @brief + * Sets Mastering display color volume sei params + * + * @par Description: + * Supplemental enhancement information + * + * @param[in] pv_api_ip + * Pointer to input argument structure + * + * @param[out] pv_api_op + * Pointer to output argument structure + * + * @param[out] ps_cfg + * Pointer to config structure to be updated + * + * @return error status + * + * @remarks none + * + ******************************************************************************* + */ +static WORD32 isvce_set_sei_mdcv_params(void *pv_api_ip, void *pv_api_op, + isvce_cfg_params_t *ps_cfg) +{ + WORD32 i4_count; + /* ctrl call I/O structures */ + isvce_ctl_set_sei_mdcv_params_ip_t *ps_ip = pv_api_ip; + isvce_ctl_set_sei_mdcv_params_op_t *ps_op = pv_api_op; + sei_params_t *ps_sei = &ps_cfg->s_sei; + + ps_op->u4_error_code = 0; + + ps_sei->u1_sei_mdcv_params_present_flag = ps_ip->u1_sei_mdcv_params_present_flag; + for(i4_count = 0; i4_count < NUM_SEI_MDCV_PRIMARIES; i4_count++) + { + ps_sei->s_sei_mdcv_params.au2_display_primaries_x[i4_count] = + ps_ip->au2_display_primaries_x[i4_count]; + ps_sei->s_sei_mdcv_params.au2_display_primaries_y[i4_count] = + ps_ip->au2_display_primaries_y[i4_count]; + } + + ps_sei->s_sei_mdcv_params.u2_white_point_x = ps_ip->u2_white_point_x; + ps_sei->s_sei_mdcv_params.u2_white_point_y = ps_ip->u2_white_point_y; + ps_sei->s_sei_mdcv_params.u4_max_display_mastering_luminance = + ps_ip->u4_max_display_mastering_luminance; + ps_sei->s_sei_mdcv_params.u4_min_display_mastering_luminance = + ps_ip->u4_min_display_mastering_luminance; + + ps_cfg->u4_timestamp_high = ps_ip->u4_timestamp_high; + ps_cfg->u4_timestamp_low = ps_ip->u4_timestamp_low; + + return IV_SUCCESS; +} + +/** + ******************************************************************************* + * + * @brief + * Sets content light level sei params + * + * @par Description: + * Supplemental enhancement information + * + * @param[in] pv_api_ip + * Pointer to input argument structure + * + * @param[out] pv_api_op + * Pointer to output argument structure + * + * @param[out] ps_cfg + * Pointer to config structure to be updated + * + * @return error status + * + * @remarks none + * + ******************************************************************************* + */ +static WORD32 isvce_set_sei_cll_params(void *pv_api_ip, void *pv_api_op, isvce_cfg_params_t *ps_cfg) +{ + /* ctrl call I/O structures */ + isvce_ctl_set_sei_cll_params_ip_t *ps_ip = pv_api_ip; + isvce_ctl_set_sei_cll_params_op_t *ps_op = pv_api_op; + sei_params_t *ps_sei = &ps_cfg->s_sei; + + ps_op->u4_error_code = 0; + + ps_sei->u1_sei_cll_params_present_flag = ps_ip->u1_sei_cll_params_present_flag; + + ps_sei->s_sei_cll_params.u2_max_content_light_level = ps_ip->u2_max_content_light_level; + ps_sei->s_sei_cll_params.u2_max_pic_average_light_level = ps_ip->u2_max_pic_average_light_level; + + ps_cfg->u4_timestamp_high = ps_ip->u4_timestamp_high; + ps_cfg->u4_timestamp_low = ps_ip->u4_timestamp_low; + + return IV_SUCCESS; +} + +/** + ******************************************************************************* + * + * @brief + * Sets ambient viewing environment sei params + * + * @par Description: + * Supplemental enhancement information + * + * @param[in] pv_api_ip + * Pointer to input argument structure + * + * @param[out] pv_api_op + * Pointer to output argument structure + * + * @param[out] ps_cfg + * Pointer to config structure to be updated + * + * @return error status + * + * @remarks none + * + ******************************************************************************* + */ +static WORD32 isvce_set_sei_ave_params(void *pv_api_ip, void *pv_api_op, isvce_cfg_params_t *ps_cfg) +{ + /* ctrl call I/O structures */ + isvce_ctl_set_sei_ave_params_ip_t *ps_ip = pv_api_ip; + isvce_ctl_set_sei_ave_params_op_t *ps_op = pv_api_op; + sei_params_t *ps_sei = &ps_cfg->s_sei; + + ps_op->u4_error_code = 0; + + ps_sei->u1_sei_ave_params_present_flag = ps_ip->u1_sei_ave_params_present_flag; + + ps_sei->s_sei_ave_params.u4_ambient_illuminance = ps_ip->u4_ambient_illuminance; + ps_sei->s_sei_ave_params.u2_ambient_light_x = ps_ip->u2_ambient_light_x; + ps_sei->s_sei_ave_params.u2_ambient_light_y = ps_ip->u2_ambient_light_y; + + ps_cfg->u4_timestamp_high = ps_ip->u4_timestamp_high; + ps_cfg->u4_timestamp_low = ps_ip->u4_timestamp_low; + + return IV_SUCCESS; +} + +/** + ******************************************************************************* + * + * @brief + * Sets content color volume sei params + * + * @par Description: + * Supplemental enhancement information + * + * @param[in] pv_api_ip + * Pointer to input argument structure + * + * @param[out] pv_api_op + * Pointer to output argument structure + * + * @param[out] ps_cfg + * Pointer to config structure to be updated + * + * @return error status + * + * @remarks none + * + ******************************************************************************* + */ +static WORD32 isvce_set_sei_ccv_params(void *pv_api_ip, void *pv_api_op, isvce_cfg_params_t *ps_cfg) +{ + WORD32 i4_count; + /* ctrl call I/O structures */ + isvce_ctl_set_sei_ccv_params_ip_t *ps_ip = pv_api_ip; + isvce_ctl_set_sei_ccv_params_op_t *ps_op = pv_api_op; + sei_params_t *ps_sei = &ps_cfg->s_sei; + + ps_op->u4_error_code = 0; + + ps_sei->u1_sei_ccv_params_present_flag = ps_ip->u1_sei_ccv_params_present_flag; + + ps_sei->s_sei_ccv_params.u1_ccv_cancel_flag = ps_ip->u1_ccv_cancel_flag; + ps_sei->s_sei_ccv_params.u1_ccv_persistence_flag = ps_ip->u1_ccv_persistence_flag; + ps_sei->s_sei_ccv_params.u1_ccv_primaries_present_flag = ps_ip->u1_ccv_primaries_present_flag; + ps_sei->s_sei_ccv_params.u1_ccv_min_luminance_value_present_flag = + ps_ip->u1_ccv_min_luminance_value_present_flag; + ps_sei->s_sei_ccv_params.u1_ccv_max_luminance_value_present_flag = + ps_ip->u1_ccv_max_luminance_value_present_flag; + ps_sei->s_sei_ccv_params.u1_ccv_avg_luminance_value_present_flag = + ps_ip->u1_ccv_avg_luminance_value_present_flag; + ps_sei->s_sei_ccv_params.u1_ccv_reserved_zero_2bits = ps_ip->u1_ccv_reserved_zero_2bits; + + for(i4_count = 0; i4_count < NUM_SEI_CCV_PRIMARIES; i4_count++) + { + ps_sei->s_sei_ccv_params.ai4_ccv_primaries_x[i4_count] = + ps_ip->ai4_ccv_primaries_x[i4_count]; + ps_sei->s_sei_ccv_params.ai4_ccv_primaries_y[i4_count] = + ps_ip->ai4_ccv_primaries_y[i4_count]; + } + + ps_sei->s_sei_ccv_params.u4_ccv_min_luminance_value = ps_ip->u4_ccv_min_luminance_value; + ps_sei->s_sei_ccv_params.u4_ccv_max_luminance_value = ps_ip->u4_ccv_max_luminance_value; + ps_sei->s_sei_ccv_params.u4_ccv_avg_luminance_value = ps_ip->u4_ccv_avg_luminance_value; + + ps_cfg->u4_timestamp_high = ps_ip->u4_timestamp_high; + ps_cfg->u4_timestamp_low = ps_ip->u4_timestamp_low; + + return IV_SUCCESS; +} + +/** +******************************************************************************* +* +* @brief +* Sets number of cores +* +* @par Description: +* Sets number of cores +* +* @param[in] ps_codec_obj +* Pointer to codec object at API level +* +* @param[in] pv_api_ip +* Pointer to input argument structure +* +* @param[out] pv_api_op +* Pointer to output argument structure +* +* @returns error status +* +* @remarks The number of encoder threads is limited to MAX_PROCESS_THREADS +* +******************************************************************************* +*/ +static WORD32 isvce_set_num_cores(void *pv_api_ip, void *pv_api_op, isvce_cfg_params_t *ps_cfg) +{ + /* ctrl call I/O structures */ + isvce_ctl_set_num_cores_ip_t *ps_ip = pv_api_ip; + isvce_ctl_set_num_cores_op_t *ps_op = pv_api_op; + + ps_op->s_ive_op.u4_error_code = 0; + + ps_cfg->u4_num_cores = MIN(ps_ip->s_ive_ip.u4_num_cores, MAX_PROCESS_THREADS); + + ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high; + ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low; + + return IV_SUCCESS; +} + +/** +******************************************************************************* +* +* @brief +* Resets encoder state +* +* @par Description: +* Resets encoder state by calling isvce_init() +* +* @param[in] ps_codec_obj +* Pointer to codec object at API level +* +* @param[in] pv_api_ip +* Pointer to input argument structure +* +* @param[out] pv_api_op +* Pointer to output argument structure +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +static WORD32 isvce_reset(iv_obj_t *ps_codec_obj, void *pv_api_ip, void *pv_api_op) +{ + /* codec ctxt */ + isvce_codec_t *ps_codec = (isvce_codec_t *) (ps_codec_obj->pv_codec_handle); + + /* ctrl call I/O structures */ + isvce_ctl_reset_op_t *ps_op = pv_api_op; + + UNUSED(pv_api_ip); + + ps_op->s_ive_op.u4_error_code = 0; + + if(ps_codec != NULL) + { + isvce_init(ps_codec); + } + else + { + ps_op->s_ive_op.u4_error_code = IH264E_INIT_NOT_DONE; + } + + return IV_SUCCESS; +} + +static void isvce_ctl_set_error_code(void *pv_api_op, ISVCE_CONTROL_API_COMMAND_TYPE_T e_sub_cmd) +{ + switch(e_sub_cmd) + { + case ISVCE_CMD_CTL_SET_DIMENSIONS: + { + ((isvce_ctl_set_dimensions_op_t *) pv_api_op)->s_ive_op.u4_error_code |= + 1 << IVE_FATALERROR; + ((isvce_ctl_set_dimensions_op_t *) pv_api_op)->s_ive_op.u4_error_code |= + IH264E_INIT_NOT_DONE; + + break; + } + case ISVCE_CMD_CTL_SET_FRAMERATE: + { + ((isvce_ctl_set_frame_rate_op_t *) pv_api_op)->s_ive_op.u4_error_code |= + 1 << IVE_FATALERROR; + ((isvce_ctl_set_frame_rate_op_t *) pv_api_op)->s_ive_op.u4_error_code |= + IH264E_INIT_NOT_DONE; + + break; + } + case ISVCE_CMD_CTL_SET_BITRATE: + { + ((isvce_ctl_set_bitrate_op_t *) pv_api_op)->s_ive_op.u4_error_code |= 1 + << IVE_FATALERROR; + ((isvce_ctl_set_bitrate_op_t *) pv_api_op)->s_ive_op.u4_error_code |= + IH264E_INIT_NOT_DONE; + + break; + } + case ISVCE_CMD_CTL_SET_FRAMETYPE: + { + ((isvce_ctl_set_frame_type_op_t *) pv_api_op)->s_ive_op.u4_error_code |= + 1 << IVE_FATALERROR; + ((isvce_ctl_set_frame_type_op_t *) pv_api_op)->s_ive_op.u4_error_code |= + IH264E_INIT_NOT_DONE; + + break; + } + case ISVCE_CMD_CTL_SET_QP: + { + ((isvce_ctl_set_qp_op_t *) pv_api_op)->s_ive_op.u4_error_code |= 1 << IVE_FATALERROR; + ((isvce_ctl_set_qp_op_t *) pv_api_op)->s_ive_op.u4_error_code |= IH264E_INIT_NOT_DONE; + + break; + } + case ISVCE_CMD_CTL_SET_ENC_MODE: + { + ((isvce_ctl_set_enc_mode_op_t *) pv_api_op)->s_ive_op.u4_error_code |= + 1 << IVE_FATALERROR; + ((isvce_ctl_set_enc_mode_op_t *) pv_api_op)->s_ive_op.u4_error_code |= + IH264E_INIT_NOT_DONE; + + break; + } + case ISVCE_CMD_CTL_SET_VBV_PARAMS: + { + ((isvce_ctl_set_vbv_params_op_t *) pv_api_op)->s_ive_op.u4_error_code |= + 1 << IVE_FATALERROR; + ((isvce_ctl_set_vbv_params_op_t *) pv_api_op)->s_ive_op.u4_error_code |= + IH264E_INIT_NOT_DONE; + + break; + } + case ISVCE_CMD_CTL_SET_AIR_PARAMS: + { + ((isvce_ctl_set_air_params_op_t *) pv_api_op)->s_ive_op.u4_error_code |= + 1 << IVE_FATALERROR; + ((isvce_ctl_set_air_params_op_t *) pv_api_op)->s_ive_op.u4_error_code |= + IH264E_INIT_NOT_DONE; + + break; + } + case ISVCE_CMD_CTL_SET_ME_PARAMS: + { + ((isvce_ctl_set_me_params_op_t *) pv_api_op)->s_ive_op.u4_error_code |= + 1 << IVE_FATALERROR; + ((isvce_ctl_set_me_params_op_t *) pv_api_op)->s_ive_op.u4_error_code |= + IH264E_INIT_NOT_DONE; + + break; + } + case ISVCE_CMD_CTL_SET_IPE_PARAMS: + { + ((isvce_ctl_set_ipe_params_op_t *) pv_api_op)->s_ive_op.u4_error_code |= + 1 << IVE_FATALERROR; + ((isvce_ctl_set_ipe_params_op_t *) pv_api_op)->s_ive_op.u4_error_code |= + IH264E_INIT_NOT_DONE; + + break; + } + case ISVCE_CMD_CTL_SET_GOP_PARAMS: + { + ((isvce_ctl_set_gop_params_op_t *) pv_api_op)->s_ive_op.u4_error_code |= + 1 << IVE_FATALERROR; + ((isvce_ctl_set_gop_params_op_t *) pv_api_op)->s_ive_op.u4_error_code |= + IH264E_INIT_NOT_DONE; + + break; + } + case ISVCE_CMD_CTL_SET_PROFILE_PARAMS: + { + ((isvce_ctl_set_profile_params_op_t *) pv_api_op)->s_ive_op.u4_error_code |= + 1 << IVE_FATALERROR; + ((isvce_ctl_set_profile_params_op_t *) pv_api_op)->s_ive_op.u4_error_code |= + IH264E_INIT_NOT_DONE; + + break; + } + case ISVCE_CMD_CTL_SET_DEBLOCK_PARAMS: + { + ((isvce_ctl_set_deblock_params_op_t *) pv_api_op)->s_ive_op.u4_error_code |= + 1 << IVE_FATALERROR; + ((isvce_ctl_set_deblock_params_op_t *) pv_api_op)->s_ive_op.u4_error_code |= + IH264E_INIT_NOT_DONE; + + break; + } + case ISVCE_CMD_CTL_SET_VUI_PARAMS: + { + ((isvce_vui_op_t *) pv_api_op)->u4_error_code |= 1 << IVE_FATALERROR; + ((isvce_vui_op_t *) pv_api_op)->u4_error_code |= IH264E_INIT_NOT_DONE; + + break; + } + case ISVCE_CMD_CTL_SET_SEI_MDCV_PARAMS: + { + ((isvce_ctl_set_sei_mdcv_params_op_t *) pv_api_op)->u4_error_code |= 1 + << IVE_FATALERROR; + ((isvce_ctl_set_sei_mdcv_params_op_t *) pv_api_op)->u4_error_code |= + IH264E_INIT_NOT_DONE; + + break; + } + case ISVCE_CMD_CTL_SET_SEI_CLL_PARAMS: + { + ((isvce_ctl_set_sei_cll_params_op_t *) pv_api_op)->u4_error_code |= 1 << IVE_FATALERROR; + ((isvce_ctl_set_sei_cll_params_op_t *) pv_api_op)->u4_error_code |= + IH264E_INIT_NOT_DONE; + + break; + } + case ISVCE_CMD_CTL_SET_SEI_AVE_PARAMS: + { + ((isvce_ctl_set_sei_ave_params_op_t *) pv_api_op)->u4_error_code |= 1 << IVE_FATALERROR; + ((isvce_ctl_set_sei_ave_params_op_t *) pv_api_op)->u4_error_code |= + IH264E_INIT_NOT_DONE; + + break; + } + case ISVCE_CMD_CTL_SET_SEI_CCV_PARAMS: + { + ((isvce_ctl_set_sei_ccv_params_op_t *) pv_api_op)->u4_error_code |= 1 << IVE_FATALERROR; + ((isvce_ctl_set_sei_ccv_params_op_t *) pv_api_op)->u4_error_code |= + IH264E_INIT_NOT_DONE; + + break; + } + case ISVCE_CMD_CTL_RESET: + { + ((isvce_ctl_reset_op_t *) pv_api_op)->s_ive_op.u4_error_code |= 1 << IVE_FATALERROR; + ((isvce_ctl_reset_op_t *) pv_api_op)->s_ive_op.u4_error_code |= IH264E_INIT_NOT_DONE; + + break; + } + case ISVCE_CMD_CTL_SETDEFAULT: + { + ((isvce_ctl_setdefault_op_t *) pv_api_op)->s_ive_op.u4_error_code |= 1 + << IVE_FATALERROR; + ((isvce_ctl_setdefault_op_t *) pv_api_op)->s_ive_op.u4_error_code |= + IH264E_INIT_NOT_DONE; + + break; + } + case ISVCE_CMD_CTL_FLUSH: + { + ((isvce_ctl_flush_op_t *) pv_api_op)->s_ive_op.u4_error_code |= 1 << IVE_FATALERROR; + ((isvce_ctl_flush_op_t *) pv_api_op)->s_ive_op.u4_error_code |= IH264E_INIT_NOT_DONE; + + break; + } + case ISVCE_CMD_CTL_GETBUFINFO: + { + ((isvce_ctl_getbufinfo_op_t *) pv_api_op)->s_ive_op.u4_error_code |= 1 + << IVE_FATALERROR; + ((isvce_ctl_getbufinfo_op_t *) pv_api_op)->s_ive_op.u4_error_code |= + IH264E_INIT_NOT_DONE; + + break; + } + case ISVCE_CMD_CTL_GETVERSION: + { + ((isvce_ctl_getversioninfo_op_t *) pv_api_op)->s_ive_op.u4_error_code |= + 1 << IVE_FATALERROR; + ((isvce_ctl_getversioninfo_op_t *) pv_api_op)->s_ive_op.u4_error_code |= + IH264E_INIT_NOT_DONE; + + break; + } + case ISVCE_CMD_CTL_SET_NUM_CORES: + { + ((isvce_ctl_set_num_cores_op_t *) pv_api_op)->s_ive_op.u4_error_code |= + 1 << IVE_FATALERROR; + ((isvce_ctl_set_num_cores_op_t *) pv_api_op)->s_ive_op.u4_error_code |= + IH264E_INIT_NOT_DONE; + + break; + } + case ISVCE_CMD_CTL_GET_ENC_FRAME_DIMENSIONS: + { + ((isvce_ctl_get_enc_dimensions_op_t *) pv_api_op)->u4_error_code |= 1 << IVE_FATALERROR; + ((isvce_ctl_get_enc_dimensions_op_t *) pv_api_op)->u4_error_code |= + IH264E_INIT_NOT_DONE; + + break; + } + default: + { + ASSERT(0); + } + } +} + +/** +******************************************************************************* +* +* @brief +* Codec control call +* +* @par Description: +* Codec control call which in turn calls appropriate calls based on +*sub-command +* +* @param[in] ps_codec_obj +* Pointer to codec object at API level +* +* @param[in] pv_api_ip +* Pointer to input argument structure +* +* @param[out] pv_api_op +* Pointer to output argument structure +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +static WORD32 isvce_ctl(iv_obj_t *ps_codec_obj, void *pv_api_ip, void *pv_api_op, + ISVCE_CONTROL_API_COMMAND_TYPE_T e_ctl_cmd) +{ + WORD32 i; + + isvce_codec_t *ps_codec = (isvce_codec_t *) ps_codec_obj->pv_codec_handle; + isvce_cfg_params_t *ps_cfg = NULL; + + IV_STATUS_T ret = IV_SUCCESS; + + /* control call is for configuring encoding params, this is not to be called + * before a successful init call */ + if(ps_codec->i4_init_done != 1) + { + isvce_ctl_set_error_code(pv_api_op, e_ctl_cmd); + + return IV_FAIL; + } + + /* make it thread safe */ + ithread_mutex_lock(ps_codec->pv_ctl_mutex); + + /* find a free config param set to hold current parameters */ + if(e_ctl_cmd != ISVCE_CMD_CTL_GET_ENC_FRAME_DIMENSIONS) + { + for(i = 0; i < MAX_ACTIVE_CONFIG_PARAMS; i++) + { + if(0 == ps_codec->as_cfg[i].u4_is_valid) + { + ps_cfg = &ps_codec->as_cfg[i]; + break; + } + } + + /* If all are invalid, then start overwriting from the head config params */ + if(NULL == ps_cfg) + { + ps_cfg = &ps_codec->as_cfg[0]; + } + + ps_cfg->u4_is_valid = 1; + + ps_cfg->s_svc_params = ps_codec->s_cfg.s_svc_params; + ps_cfg->e_cmd = e_ctl_cmd; + } + + switch(e_ctl_cmd) + { + case ISVCE_CMD_CTL_SET_DIMENSIONS: + ret = isvce_set_dimensions(pv_api_ip, pv_api_op, ps_cfg); + break; + + case ISVCE_CMD_CTL_SET_FRAMERATE: + ret = isvce_set_frame_rate(pv_api_ip, pv_api_op, ps_cfg); + break; + + case ISVCE_CMD_CTL_SET_BITRATE: + ret = isvce_set_bit_rate(pv_api_ip, pv_api_op, ps_cfg); + break; + + case ISVCE_CMD_CTL_SET_FRAMETYPE: + ret = isvce_set_frame_type(pv_api_ip, pv_api_op, ps_cfg); + break; + + case ISVCE_CMD_CTL_SET_QP: + ret = isvce_set_qp(pv_api_ip, pv_api_op, ps_cfg); + break; + + case ISVCE_CMD_CTL_SET_ENC_MODE: + ret = isvce_set_enc_mode(pv_api_ip, pv_api_op, ps_cfg); + break; + + case ISVCE_CMD_CTL_SET_VBV_PARAMS: + ret = isvce_set_vbv_params(pv_api_ip, pv_api_op, ps_cfg); + break; + + case ISVCE_CMD_CTL_SET_AIR_PARAMS: + ret = isvc_set_air_params(pv_api_ip, pv_api_op, ps_cfg); + break; + + case ISVCE_CMD_CTL_SET_ME_PARAMS: + ret = isvc_set_me_params(pv_api_ip, pv_api_op, ps_cfg); + break; + + case ISVCE_CMD_CTL_SET_IPE_PARAMS: + ret = isvc_set_ipe_params(pv_api_ip, pv_api_op, ps_cfg); + break; + + case ISVCE_CMD_CTL_SET_GOP_PARAMS: + ret = isvc_set_gop_params(pv_api_ip, pv_api_op, ps_cfg); + break; + + case ISVCE_CMD_CTL_SET_PROFILE_PARAMS: + ret = isvc_set_profile_params(pv_api_ip, pv_api_op, ps_cfg); + break; + + case ISVCE_CMD_CTL_SET_DEBLOCK_PARAMS: + ret = isvc_set_deblock_params(pv_api_ip, pv_api_op, ps_cfg); + break; + + case ISVCE_CMD_CTL_SET_VUI_PARAMS: + ret = isvce_set_vui_params(pv_api_ip, pv_api_op, ps_cfg); + break; + + case ISVCE_CMD_CTL_SET_SEI_MDCV_PARAMS: + ret = isvce_set_sei_mdcv_params(pv_api_ip, pv_api_op, ps_cfg); + break; + + case ISVCE_CMD_CTL_SET_SEI_CLL_PARAMS: + ret = isvce_set_sei_cll_params(pv_api_ip, pv_api_op, ps_cfg); + break; + + case ISVCE_CMD_CTL_SET_SEI_AVE_PARAMS: + ret = isvce_set_sei_ave_params(pv_api_ip, pv_api_op, ps_cfg); + break; + + case ISVCE_CMD_CTL_SET_SEI_CCV_PARAMS: + ret = isvce_set_sei_ccv_params(pv_api_ip, pv_api_op, ps_cfg); + break; + + case ISVCE_CMD_CTL_RESET: + + /* invalidate config param struct as it is being served right away */ + ps_codec->as_cfg[i].u4_is_valid = 0; + + ret = isvce_reset(ps_codec_obj, pv_api_ip, pv_api_op); + break; + + case ISVCE_CMD_CTL_SETDEFAULT: + { + /* ctrl call I/O structures */ + isvce_ctl_setdefault_op_t *ps_op = pv_api_op; + + /* invalidate config param struct as it is being served right away */ + ps_codec->as_cfg[i].u4_is_valid = 0; + + /* error status */ + ret = isvce_set_default_params(ps_cfg); + + ps_op->s_ive_op.u4_error_code = ret; + + break; + } + + case ISVCE_CMD_CTL_FLUSH: + + /* invalidate config param struct as it is being served right away */ + ps_codec->as_cfg[i].u4_is_valid = 0; + + ret = isvce_set_flush_mode(ps_codec_obj, pv_api_ip, pv_api_op); + break; + + case ISVCE_CMD_CTL_GETBUFINFO: + + /* invalidate config param struct as it is being served right away */ + ps_codec->as_cfg[i].u4_is_valid = 0; + + ret = isvce_get_buf_info(ps_codec_obj->pv_codec_handle, pv_api_ip, pv_api_op); + break; + + case ISVCE_CMD_CTL_GETVERSION: + { + /* ctrl call I/O structures */ + isvce_ctl_getversioninfo_ip_t *ps_ip = pv_api_ip; + isvce_ctl_getversioninfo_op_t *ps_op = pv_api_op; + + /* invalidate config param struct as it is being served right away */ + ps_codec->as_cfg[i].u4_is_valid = 0; + + /* error status */ + ps_op->s_ive_op.u4_error_code = IV_SUCCESS; + + if(ps_ip->s_ive_ip.u4_version_bufsize <= 0) + { + ps_op->s_ive_op.u4_error_code = IH264E_CXA_VERS_BUF_INSUFFICIENT; + ret = IV_FAIL; + } + else + { + ret = ih264e_get_version((CHAR *) ps_ip->s_ive_ip.pu1_version, + ps_ip->s_ive_ip.u4_version_bufsize); + + if(ret != IV_SUCCESS) + { + ps_op->s_ive_op.u4_error_code = IH264E_CXA_VERS_BUF_INSUFFICIENT; + ret = IV_FAIL; + } + } + break; + } + + case ISVCE_CMD_CTL_SET_NUM_CORES: + ret = isvce_set_num_cores(pv_api_ip, pv_api_op, ps_cfg); + break; + + case ISVCE_CMD_CTL_GET_ENC_FRAME_DIMENSIONS: + { + ps_cfg = NULL; + + for(i = 0; i < MAX_ACTIVE_CONFIG_PARAMS; i++) + { + if(ps_codec->as_cfg[i].u4_is_valid && + (ps_codec->as_cfg[i].e_cmd == ISVCE_CMD_CTL_SET_DIMENSIONS)) + { + ps_cfg = &ps_codec->as_cfg[i]; + + break; + } + } + + if(NULL == ps_cfg) + { + ((isvce_ctl_get_enc_dimensions_op_t *) pv_api_op)->u4_error_code |= + 1 << IVE_FATALERROR; + ((isvce_ctl_get_enc_dimensions_op_t *) pv_api_op)->u4_error_code |= + IH264E_WIDTH_NOT_SUPPORTED; + ((isvce_ctl_get_enc_dimensions_op_t *) pv_api_op)->u4_error_code |= + IH264E_HEIGHT_NOT_SUPPORTED; + + return IV_FAIL; + } + + ret = isvce_get_enc_frame_dimensions((isvce_ctl_get_enc_dimensions_ip_t *) pv_api_ip, + (isvce_ctl_get_enc_dimensions_op_t *) pv_api_op, + ps_cfg); + + break; + } + + default: + /* invalidate config param struct as it is being served right away */ + ps_codec->as_cfg[i].u4_is_valid = 0; + + DEBUG("Warning !! unrecognized control api command \n"); + break; + } + + ithread_mutex_unlock(ps_codec->pv_ctl_mutex); + + return ret; +} + +/** +******************************************************************************* +* +* @brief +* Codec entry point function. All the function calls to the codec are done +* using this function with different values specified in command +* +* @par Description: +* Arguments are tested for validity and then based on the command +* appropriate function is called +* +* @param[in] ps_handle +* API level handle for codec +* +* @param[in] pv_api_ip +* Input argument structure +* +* @param[out] pv_api_op +* Output argument structure +* +* @returns error_status +* +* @remarks +* +******************************************************************************* +*/ +IV_STATUS_T isvce_api_function(iv_obj_t *ps_handle, void *pv_api_ip, void *pv_api_op, + isvce_api_cmds_t *ps_iv_api_cmds) +{ + IV_STATUS_T e_status; + WORD32 ret; + + ISVCE_API_COMMAND_TYPE_T e_cmd = ps_iv_api_cmds->e_cmd; + ISVCE_CONTROL_API_COMMAND_TYPE_T e_ctl_cmd = ps_iv_api_cmds->e_ctl_cmd; + + /* validate input / output structures */ + e_status = api_check_struct_sanity(ps_handle, pv_api_ip, pv_api_op, ps_iv_api_cmds); + + if(e_status != IV_SUCCESS) + { + DEBUG("error code = %d\n", *((UWORD32 *) pv_api_op + 1)); + return IV_FAIL; + } + + switch(e_cmd) + { + case ISVCE_CMD_GET_NUM_MEM_REC: + ret = isvce_get_num_rec(pv_api_ip, pv_api_op); + break; + + case ISVCE_CMD_FILL_NUM_MEM_REC: + ret = isvce_fill_num_mem_rec(pv_api_ip, pv_api_op); + break; + + case ISVCE_CMD_INIT: + ret = isvce_init_mem_rec(ps_handle, pv_api_ip, pv_api_op); + break; + + case ISVCE_CMD_RETRIEVE_MEMREC: + ret = isvce_retrieve_memrec(ps_handle, pv_api_ip, pv_api_op); + break; + + case ISVCE_CMD_VIDEO_CTL: + ret = isvce_ctl(ps_handle, pv_api_ip, pv_api_op, e_ctl_cmd); + break; + + case ISVCE_CMD_VIDEO_ENCODE: + ret = isvce_encode(ps_handle, pv_api_ip, pv_api_op); + break; + + default: + ret = IV_FAIL; + break; + } + + return (IV_STATUS_T) ret; +} diff --git a/encoder/svc/isvce_cabac.c b/encoder/svc/isvce_cabac.c new file mode 100644 index 0000000..e36025e --- /dev/null +++ b/encoder/svc/isvce_cabac.c @@ -0,0 +1,753 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** +******************************************************************************* +* @file +* isvce_cabac.c +* +* @brief +* Contains all leaf level functions for CABAC entropy coding. +* +* +* @author +* Doney Alex +* +* @par List of Functions: +* +* +* @remarks +* None +* +******************************************************************************* +*/ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System include files */ +#include +#include +#include +#include + +/* User include files */ +#include "ih264e_config.h" +#include "ih264_typedefs.h" +#include "iv2.h" +#include "ive2.h" +#include "ih264_debug.h" +#include "ih264_macros.h" +#include "isvc_defs.h" +#include "isvce_defs.h" +#include "isvc_macros.h" +#include "ih264e_error.h" +#include "ih264e_bitstream.h" +#include "ime_distortion_metrics.h" +#include "ime_defs.h" +#include "ime_structs.h" +#include "ih264_error.h" +#include "isvc_structs.h" +#include "isvc_trans_quant_itrans_iquant.h" +#include "isvc_inter_pred_filters.h" +#include "isvc_mem_fns.h" +#include "ih264_padding.h" +#include "ih264_platform_macros.h" +#include "ih264_intra_pred_filters.h" +#include "ih264_deblk_edge_filters.h" +#include "isvc_cabac_tables.h" +#include "irc_cntrl_param.h" +#include "irc_frame_info_collector.h" +#include "isvce_rate_control.h" +#include "isvce_cabac_structs.h" +#include "isvce_structs.h" +#include "isvce_cabac.h" +#include "isvce_encode_header.h" +#include "ih264_cavlc_tables.h" +#include "ih264e_statistics.h" +#include "ih264e_trace.h" + +/*****************************************************************************/ +/* Function Definitions */ +/*****************************************************************************/ + +/** + ******************************************************************************* + * + * @brief + * k-th order Exp-Golomb (UEGk) binarization process: Implements concatenated + * unary/ k-th order Exp-Golomb (UEGk) binarization process, + * where k = 0 as defined in 9.3.2.3 of ITU_T_H264-201402 + * + * @param[in] i2_sufs + * Suffix bit string + * + * @param[in] pi1_bins_len + * Pointer to length of tthe string + * + * @returns Binarized value + * + * @remarks + * None + * + ******************************************************************************* + */ + +UWORD32 isvce_cabac_UEGk0_binarization(WORD16 i2_sufs, WORD8 *pi1_bins_len) +{ + WORD32 unary_length; + UWORD32 u4_sufs_shiftk_plus1, u4_egk, u4_unary_bins; + + u4_sufs_shiftk_plus1 = i2_sufs + 1; + + unary_length = (32 - CLZ(u4_sufs_shiftk_plus1) + (0 == u4_sufs_shiftk_plus1)); + + /* unary code with (unary_length-1) '1's and terminating '0' bin */ + u4_unary_bins = (1 << unary_length) - 2; + + /* insert the symbol prefix of (unary length - 1) bins */ + u4_egk = (u4_unary_bins << (unary_length - 1)) | + (u4_sufs_shiftk_plus1 & ((1 << (unary_length - 1)) - 1)); + + /* length of the code = 2 *(unary_length - 1) + 1 + k */ + *pi1_bins_len = (2 * unary_length) - 1; + + return (u4_egk); +} + +/** + ******************************************************************************* + * + * @brief + * Get cabac context for the MB :calculates the pointers to Top and left + * cabac neighbor context depending upon neighbor availability. + * + * @param[in] ps_ent_ctxt + * Pointer to entropy context structure + * + * @param[in] u4_mb_type + * Type of MB + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ +void isvce_get_cabac_context(isvce_entropy_ctxt_t *ps_ent_ctxt, WORD32 u4_mb_type) +{ + /* CABAC context */ + isvce_cabac_ctxt_t *ps_cabac_ctxt = ps_ent_ctxt->ps_cabac; + isvce_mb_info_ctxt_t *ps_ctx_inc_mb_map; + cab_csbp_t *ps_lft_csbp; + + WORD32 i4_lft_avail, i4_top_avail, i4_is_intra; + WORD32 i4_mb_x, i4_mb_y; + UWORD8 *pu1_slice_idx = ps_ent_ctxt->pu1_slice_idx; + + i4_is_intra = ((u4_mb_type == I16x16) || (u4_mb_type == I8x8) || (u4_mb_type == I4x4)); + + /* derive neighbor availability */ + i4_mb_x = ps_ent_ctxt->i4_mb_x; + i4_mb_y = ps_ent_ctxt->i4_mb_y; + pu1_slice_idx += (i4_mb_y * ps_ent_ctxt->i4_wd_mbs); + /* left macroblock availability */ + i4_lft_avail = (i4_mb_x == 0 || (pu1_slice_idx[i4_mb_x - 1] != pu1_slice_idx[i4_mb_x])) ? 0 : 1; + /* top macroblock availability */ + i4_top_avail = (i4_mb_y == 0 || + (pu1_slice_idx[i4_mb_x - ps_ent_ctxt->i4_wd_mbs] != pu1_slice_idx[i4_mb_x])) + ? 0 + : 1; + i4_mb_x = ps_ent_ctxt->i4_mb_x; + ps_ctx_inc_mb_map = ps_cabac_ctxt->ps_mb_map_ctxt_inc; + ps_cabac_ctxt->ps_curr_ctxt_mb_info = ps_ctx_inc_mb_map + i4_mb_x; + ps_cabac_ctxt->ps_left_ctxt_mb_info = ps_cabac_ctxt->ps_def_ctxt_mb_info; + ps_cabac_ctxt->ps_top_ctxt_mb_info = ps_cabac_ctxt->ps_def_ctxt_mb_info; + ps_lft_csbp = ps_cabac_ctxt->ps_lft_csbp; + ps_cabac_ctxt->pu1_left_y_ac_csbp = &ps_lft_csbp->u1_y_ac_csbp_top_mb; + ps_cabac_ctxt->pu1_left_uv_ac_csbp = &ps_lft_csbp->u1_uv_ac_csbp_top_mb; + ps_cabac_ctxt->pu1_left_yuv_dc_csbp = &ps_lft_csbp->u1_yuv_dc_csbp_top_mb; + ps_cabac_ctxt->pi1_left_ref_idx_ctxt_inc = &ps_cabac_ctxt->i1_left_ref_idx_ctx_inc_arr[0][0]; + ps_cabac_ctxt->pu1_left_mv_ctxt_inc = ps_cabac_ctxt->u1_left_mv_ctxt_inc_arr[0]; + + if(i4_lft_avail) ps_cabac_ctxt->ps_left_ctxt_mb_info = ps_cabac_ctxt->ps_curr_ctxt_mb_info - 1; + if(i4_top_avail) ps_cabac_ctxt->ps_top_ctxt_mb_info = ps_cabac_ctxt->ps_curr_ctxt_mb_info; + + if(!i4_lft_avail) + { + UWORD8 u1_def_csbp = i4_is_intra ? 0xf : 0; + *(ps_cabac_ctxt->pu1_left_y_ac_csbp) = u1_def_csbp; + *(ps_cabac_ctxt->pu1_left_uv_ac_csbp) = u1_def_csbp; + *(ps_cabac_ctxt->pu1_left_yuv_dc_csbp) = u1_def_csbp; + *((UWORD32 *) ps_cabac_ctxt->pi1_left_ref_idx_ctxt_inc) = 0; + memset(ps_cabac_ctxt->pu1_left_mv_ctxt_inc, 0, 16); + } + if(!i4_top_avail) + { + UWORD8 u1_def_csbp = i4_is_intra ? 0xff : 0; + ps_cabac_ctxt->ps_top_ctxt_mb_info->u1_yuv_ac_csbp = u1_def_csbp; + ps_cabac_ctxt->ps_top_ctxt_mb_info->u1_yuv_dc_csbp = u1_def_csbp; + ps_cabac_ctxt->ps_curr_ctxt_mb_info->i1_ref_idx[0] = + ps_cabac_ctxt->ps_curr_ctxt_mb_info->i1_ref_idx[1] = + ps_cabac_ctxt->ps_curr_ctxt_mb_info->i1_ref_idx[2] = + ps_cabac_ctxt->ps_curr_ctxt_mb_info->i1_ref_idx[3] = 0; + memset(ps_cabac_ctxt->ps_curr_ctxt_mb_info->u1_mv, 0, 16); + } +} + +/** + ******************************************************************************* + * @brief + * flushing at termination: Explained in flowchart 9-12(ITU_T_H264-201402). + * + * @param[in] ps_cabac_ctxt + * pointer to cabac context (handle) + * + * @returns none + * + * @remarks + * None + * + ******************************************************************************* + */ +void isvce_cabac_flush(isvce_cabac_ctxt_t *ps_cabac_ctxt) +{ + /* bit stream ptr */ + bitstrm_t *ps_stream = ps_cabac_ctxt->ps_bitstrm; + encoding_envirnoment_t *ps_cab_enc_env = &(ps_cabac_ctxt->s_cab_enc_env); + UWORD32 u4_low = ps_cab_enc_env->u4_code_int_low; + UWORD32 u4_bits_gen = ps_cab_enc_env->u4_bits_gen; + UWORD8 *pu1_strm_buf = ps_stream->pu1_strm_buffer; + UWORD32 u4_strm_buf_offset = ps_stream->u4_strm_buf_offset; + WORD32 zero_run = ps_stream->i4_zero_bytes_run; + UWORD32 u4_out_standing_bytes = ps_cab_enc_env->u4_out_standing_bytes; + + /************************************************************************/ + /* Insert the carry (propogated in previous byte) along with */ + /* outstanding bytes (if any) and flush remaining bits */ + /************************************************************************/ + { + /* carry = 1 => putbit(1); carry propogated due to L renorm */ + WORD32 carry = (u4_low >> (u4_bits_gen + CABAC_BITS)) & 0x1; + WORD32 last_byte; + WORD32 bits_left; + WORD32 rem_bits; + + if(carry) + { + /* CORNER CASE: if the previous data is 0x000003, then EPB will be + inserted and the data will become 0x00000303 and if the carry is present, + it will be added with the last byte and it will become 0x00000304 which + is not correct as per standard */ + /* so check for previous four bytes and if it is equal to 0x00000303 + then subtract u4_strm_buf_offset by 1 */ + if(pu1_strm_buf[u4_strm_buf_offset - 1] == 0x03 && + pu1_strm_buf[u4_strm_buf_offset - 2] == 0x03 && + pu1_strm_buf[u4_strm_buf_offset - 3] == 0x00 && + pu1_strm_buf[u4_strm_buf_offset - 4] == 0x00) + { + u4_strm_buf_offset -= 1; + } + /* previous byte carry add will not result in overflow to */ + /* u4_strm_buf_offset - 2 as we track 0xff as outstanding bytes */ + pu1_strm_buf[u4_strm_buf_offset - 1] += carry; + zero_run = 0; + } + + /* Insert outstanding bytes (if any) */ + while(u4_out_standing_bytes) + { + UWORD8 u1_0_or_ff = carry ? 0 : 0xFF; + + PUTBYTE_EPB(pu1_strm_buf, u4_strm_buf_offset, u1_0_or_ff, zero_run); + u4_out_standing_bytes--; + } + + /* clear the carry in low */ + u4_low &= ((1 << (u4_bits_gen + CABAC_BITS)) - 1); + + /* extract the remaining bits; */ + /* includes additional msb bit of low as per Figure 9-12 */ + bits_left = u4_bits_gen + 1; + rem_bits = (u4_low >> (u4_bits_gen + CABAC_BITS - bits_left)); + + if(bits_left >= 8) + { + last_byte = (rem_bits >> (bits_left - 8)) & 0xFF; + PUTBYTE_EPB(pu1_strm_buf, u4_strm_buf_offset, last_byte, zero_run); + bits_left -= 8; + } + + /* insert last byte along with rbsp stop bit(1) and 0's in the end */ + last_byte = + (rem_bits << (8 - bits_left)) | (1 << (7 - bits_left) | (1 << (7 - bits_left - 1))); + last_byte &= 0xFF; + PUTBYTE_EPB(pu1_strm_buf, u4_strm_buf_offset, last_byte, zero_run); + + /* update the state variables and return success */ + ps_stream->u4_strm_buf_offset = u4_strm_buf_offset; + ps_stream->i4_zero_bytes_run = 0; + /* Default init values for scratch variables of bitstream context */ + ps_stream->u4_cur_word = 0; + ps_stream->i4_bits_left_in_cw = WORD_SIZE; + } +} + +/** + ****************************************************************************** + * + * @brief Puts new byte (and outstanding bytes) into bitstream after cabac + * renormalization + * + * @par Description + * 1. Extract the leading byte of low(L) + * 2. If leading byte=0xff increment outstanding bytes and return + * (as the actual bits depend on carry propogation later) + * 3. If leading byte is not 0xff check for any carry propogation + * 4. Insert the carry (propogated in previous byte) along with outstanding + * bytes (if any) and leading byte + * + * + * @param[in] ps_cabac_ctxt + * pointer to cabac context (handle) + * + * @return + * + ****************************************************************************** + */ +void isvce_cabac_put_byte(isvce_cabac_ctxt_t *ps_cabac_ctxt) +{ + /* bit stream ptr */ + bitstrm_t *ps_stream = ps_cabac_ctxt->ps_bitstrm; + encoding_envirnoment_t *ps_cab_enc_env = &(ps_cabac_ctxt->s_cab_enc_env); + UWORD32 u4_low = ps_cab_enc_env->u4_code_int_low; + UWORD32 u4_bits_gen = ps_cab_enc_env->u4_bits_gen; + WORD32 lead_byte = u4_low >> (u4_bits_gen + CABAC_BITS - 8); + + /* Sanity checks */ + ASSERT((ps_cab_enc_env->u4_code_int_range >= 256) && (ps_cab_enc_env->u4_code_int_range < 512)); + ASSERT((u4_bits_gen >= 8)); + + /* update bits generated and low after extracting leading byte */ + u4_bits_gen -= 8; + ps_cab_enc_env->u4_code_int_low &= ((1 << (CABAC_BITS + u4_bits_gen)) - 1); + ps_cab_enc_env->u4_bits_gen = u4_bits_gen; + + /************************************************************************/ + /* 1. Extract the leading byte of low(L) */ + /* 2. If leading byte=0xff increment outstanding bytes and return */ + /* (as the actual bits depend on carry propogation later) */ + /* 3. If leading byte is not 0xff check for any carry propogation */ + /* 4. Insert the carry (propogated in previous byte) along with */ + /* outstanding bytes (if any) and leading byte */ + /************************************************************************/ + if(lead_byte == 0xff) + { + /* actual bits depend on carry propogration */ + ps_cab_enc_env->u4_out_standing_bytes++; + return; + } + else + { + UWORD8 *pu1_strm_buf = ps_stream->pu1_strm_buffer; + UWORD32 u4_strm_buf_offset = ps_stream->u4_strm_buf_offset; + /* carry = 1 => putbit(1); carry propogated due to L renorm */ + WORD32 carry = (lead_byte >> 8) & 0x1; + WORD32 zero_run = ps_stream->i4_zero_bytes_run; + UWORD32 u4_out_standing_bytes = ps_cab_enc_env->u4_out_standing_bytes; + + /*********************************************************************/ + /* Insert the carry propogated in previous byte */ + /* */ + /* Note : Do not worry about corruption into slice header align byte */ + /* This is because the first bin cannot result in overflow */ + /*********************************************************************/ + if(carry) + { + /* CORNER CASE: if the previous data is 0x000003, then EPB will be + inserted and the data will become 0x00000303 and if the carry is present, + it will be added with the last byte and it will become 0x00000304 which + is not correct as per standard */ + /* so check for previous four bytes and if it is equal to 0x00000303 + then subtract u4_strm_buf_offset by 1 */ + if((u4_strm_buf_offset > 3) && (pu1_strm_buf[u4_strm_buf_offset - 1] == 0x03) && + (pu1_strm_buf[u4_strm_buf_offset - 2] == 0x03) && + (pu1_strm_buf[u4_strm_buf_offset - 3] == 0x00) && + (pu1_strm_buf[u4_strm_buf_offset - 4] == 0x00)) + { + u4_strm_buf_offset -= 1; + } + + /* previous byte carry add will not result in overflow to */ + /* u4_strm_buf_offset - 2 as we track 0xff as outstanding bytes */ + if(u4_strm_buf_offset > 0) + { + pu1_strm_buf[u4_strm_buf_offset - 1] += carry; + zero_run = 0; + } + } + + /* Insert outstanding bytes (if any) */ + while(u4_out_standing_bytes) + { + UWORD8 u1_0_or_ff = carry ? 0 : 0xFF; + + PUTBYTE_EPB(pu1_strm_buf, u4_strm_buf_offset, u1_0_or_ff, zero_run); + + u4_out_standing_bytes--; + } + ps_cab_enc_env->u4_out_standing_bytes = 0; + + /* Insert the leading byte */ + lead_byte &= 0xFF; + PUTBYTE_EPB(pu1_strm_buf, u4_strm_buf_offset, lead_byte, zero_run); + + /* update the state variables and return success */ + ps_stream->u4_strm_buf_offset = u4_strm_buf_offset; + ps_stream->i4_zero_bytes_run = zero_run; + } +} + +/** +****************************************************************************** +* +* @brief Codes a bin based on probablilty and mps packed context model +* +* @par Description +* 1. Apart from encoding bin, context model is updated as per state transition +* 2. Range and Low renormalization is done based on bin and original state +* 3. After renorm bistream is updated (if required) +* +* @param[in] ps_cabac +* pointer to cabac context (handle) +* +* @param[in] bin +* bin(boolean) to be encoded +* +* @param[in] pu1_bin_ctxts +* index of cabac context model containing pState[bits 5-0] | MPS[bit6] +* +* @return +* +****************************************************************************** +*/ +void isvce_cabac_encode_bin(isvce_cabac_ctxt_t *ps_cabac, WORD32 bin, bin_ctxt_model *pu1_bin_ctxts) +{ + encoding_envirnoment_t *ps_cab_enc_env = &(ps_cabac->s_cab_enc_env); + UWORD32 u4_range = ps_cab_enc_env->u4_code_int_range; + UWORD32 u4_low = ps_cab_enc_env->u4_code_int_low; + UWORD32 u4_rlps; + UWORD8 state_mps = (*pu1_bin_ctxts) & 0x3F; + UWORD8 u1_mps = !!((*pu1_bin_ctxts) & (0x40)); + WORD32 shift; + UWORD32 u4_table_val; + /* Sanity checks */ + ASSERT((bin == 0) || (bin == 1)); + ASSERT((u4_range >= 256) && (u4_range < 512)); + + /* Get the lps range from LUT based on quantized range and state */ + u4_table_val = gau4_isvc_cabac_table[state_mps][(u4_range >> 6) & 0x3]; + u4_rlps = u4_table_val & 0xFF; + u4_range -= u4_rlps; + + /* check if bin is mps or lps */ + if(u1_mps ^ bin) + { + /* lps path; L= L + R; R = RLPS */ + u4_low += u4_range; + u4_range = u4_rlps; + if(state_mps == 0) + { + /* MPS(CtxIdx) = 1 - MPS(CtxIdx) */ + u1_mps = 1 - u1_mps; + } /* update the context model from state transition LUT */ + + state_mps = (u4_table_val >> 15) & 0x3F; + } + else + { /* update the context model from state transition LUT */ + state_mps = (u4_table_val >> 8) & 0x3F; + } + + (*pu1_bin_ctxts) = (u1_mps << 6) | state_mps; + + /*****************************************************************/ + /* Renormalization; calculate bits generated based on range(R) */ + /* Note : 6 <= R < 512; R is 2 only for terminating encode */ + /*****************************************************************/ + GETRANGE(shift, u4_range); + shift = 9 - shift; + u4_low <<= shift; + u4_range <<= shift; + + /* bits to be inserted in the bitstream */ + ps_cab_enc_env->u4_bits_gen += shift; + ps_cab_enc_env->u4_code_int_range = u4_range; + ps_cab_enc_env->u4_code_int_low = u4_low; + + /* generate stream when a byte is ready */ + if(ps_cab_enc_env->u4_bits_gen > CABAC_BITS) + { + isvce_cabac_put_byte(ps_cabac); + } +} + +/** +******************************************************************************* +* +* @brief +* Encoding process for a binary decision :implements encoding process of a +decision +* as defined in 9.3.4.2 . This function encodes multiple bins, of a symbol. +Implements +* flowchart Figure 9-7( ITU_T_H264-201402) +* +* @param[in] u4_bins +* array of bin values +* +* @param[in] i1_bins_len +* Length of bins, maximum 32 +* +* @param[in] u4_ctx_inc +* CtxInc, byte0- bin0, byte1-bin1 .. +* +* @param[in] i1_valid_len +* valid length of bins, after that CtxInc is constant +* +* @param[in] pu1_bin_ctxt_type +* Pointer to binary contexts + +* @param[in] ps_cabac +* Pointer to cabac_context_structure +* +* @returns +* +* @remarks +* None +* +******************************************************************************* +*/ +void isvce_encode_decision_bins(UWORD32 u4_bins, WORD8 i1_bins_len, UWORD32 u4_ctx_inc, + WORD8 i1_valid_len, bin_ctxt_model *pu1_bin_ctxt_type, + isvce_cabac_ctxt_t *ps_cabac) +{ + WORD8 i; + UWORD8 u1_ctx_inc, u1_bin; + + for(i = 0; i < i1_bins_len; i++) + { + u1_bin = (u4_bins & 0x01); + u4_bins = u4_bins >> 1; + u1_ctx_inc = u4_ctx_inc & 0x0f; + if(i < i1_valid_len) u4_ctx_inc = u4_ctx_inc >> 4; + /* Encode the bin */ + isvce_cabac_encode_bin(ps_cabac, u1_bin, pu1_bin_ctxt_type + u1_ctx_inc); + } +} + +/** + ******************************************************************************* + * @brief + * Encoding process for a binary decision before termination:Encoding process + * of a termination(9.3.4.5 :ITU_T_H264-201402) . Explained in flowchart 9-11. + * + * @param[in] ps_cabac + * Pointer to cabac structure + * + * @param[in] term_bin + * Symbol value, end of slice or not, term_bin is binary + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ +void isvce_cabac_encode_terminate(isvce_cabac_ctxt_t *ps_cabac, WORD32 term_bin) +{ + encoding_envirnoment_t *ps_cab_enc_env = &(ps_cabac->s_cab_enc_env); + + UWORD32 u4_range = ps_cab_enc_env->u4_code_int_range; + UWORD32 u4_low = ps_cab_enc_env->u4_code_int_low; + UWORD32 u4_rlps; + WORD32 shift; + + /* Sanity checks */ + ASSERT((u4_range >= 256) && (u4_range < 512)); + ASSERT((term_bin == 0) || (term_bin == 1)); + + /* term_bin = 1 has lps range = 2 */ + u4_rlps = 2; + u4_range -= u4_rlps; + + /* if terminate L is incremented by curR and R=2 */ + if(term_bin) + { + /* lps path; L= L + R; R = RLPS */ + u4_low += u4_range; + u4_range = u4_rlps; + } + + /*****************************************************************/ + /* Renormalization; calculate bits generated based on range(R) */ + /* Note : 6 <= R < 512; R is 2 only for terminating encode */ + /*****************************************************************/ + GETRANGE(shift, u4_range); + shift = 9 - shift; + u4_low <<= shift; + u4_range <<= shift; + + /* bits to be inserted in the bitstream */ + ps_cab_enc_env->u4_bits_gen += shift; + ps_cab_enc_env->u4_code_int_range = u4_range; + ps_cab_enc_env->u4_code_int_low = u4_low; + + /* generate stream when a byte is ready */ + if(ps_cab_enc_env->u4_bits_gen > CABAC_BITS) + { + isvce_cabac_put_byte(ps_cabac); + } + + if(term_bin) + { + isvce_cabac_flush(ps_cabac); + } +} + +/** + ******************************************************************************* + * @brief + * Bypass encoding process for binary decisions: Explained (9.3.4.4 + *:ITU_T_H264-201402) , flowchart 9-10. + * + * @param[ino] ps_cabac : pointer to cabac context (handle) + * + * @param[in] bin : bypass bin(0/1) to be encoded + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ + +void isvce_cabac_encode_bypass_bin(isvce_cabac_ctxt_t *ps_cabac, WORD32 bin) +{ + encoding_envirnoment_t *ps_cab_enc_env = &(ps_cabac->s_cab_enc_env); + + UWORD32 u4_range = ps_cab_enc_env->u4_code_int_range; + UWORD32 u4_low = ps_cab_enc_env->u4_code_int_low; + + /* Sanity checks */ + ASSERT((u4_range >= 256) && (u4_range < 512)); + ASSERT((bin == 0) || (bin == 1)); + + u4_low <<= 1; + /* add range if bin is 1 */ + if(bin) + { + u4_low += u4_range; + } + + /* 1 bit to be inserted in the bitstream */ + ps_cab_enc_env->u4_bits_gen++; + ps_cab_enc_env->u4_code_int_low = u4_low; + + /* generate stream when a byte is ready */ + if(ps_cab_enc_env->u4_bits_gen > CABAC_BITS) + { + isvce_cabac_put_byte(ps_cabac); + } +} + +/** +****************************************************************************** +* +* @brief Encodes a series of bypass bins (FLC bypass bins) +* +* @par Description +* This function is more optimal than calling isvce_cabac_encode_bypass_bin() +* in a loop as cabac low, renorm and generating the stream (8bins at a time) +* can be done in one operation +* +* @param[inout]ps_cabac +* pointer to cabac context (handle) +* +* @param[in] u4_bins +* syntax element to be coded (as FLC bins) +* +* @param[in] num_bins +* This is the FLC length for u4_sym +* +* @return +* +****************************************************************************** +*/ + +void isvce_cabac_encode_bypass_bins(isvce_cabac_ctxt_t *ps_cabac, UWORD32 u4_bins, WORD32 num_bins) +{ + encoding_envirnoment_t *ps_cab_enc_env = &(ps_cabac->s_cab_enc_env); + + UWORD32 u4_range = ps_cab_enc_env->u4_code_int_range; + WORD32 next_byte; + + /* Sanity checks */ + ASSERT((num_bins < 33) && (num_bins > 0)); + ASSERT((u4_range >= 256) && (u4_range < 512)); + + /* Compute bit always to populate the trace */ + /* increment bits generated by num_bins */ + + /* Encode 8bins at a time and put in the bit-stream */ + while(num_bins > 8) + { + num_bins -= 8; + + next_byte = (u4_bins >> (num_bins)) & 0xff; + + /* L = (L << 8) + (R * next_byte) */ + ps_cab_enc_env->u4_code_int_low <<= 8; + ps_cab_enc_env->u4_code_int_low += (next_byte * u4_range); + ps_cab_enc_env->u4_bits_gen += 8; + + if(ps_cab_enc_env->u4_bits_gen > CABAC_BITS) + { + /* insert the leading byte of low into stream */ + isvce_cabac_put_byte(ps_cabac); + } + } + + /* Update low with remaining bins and return */ + next_byte = (u4_bins & ((1 << num_bins) - 1)); + + ps_cab_enc_env->u4_code_int_low <<= num_bins; + ps_cab_enc_env->u4_code_int_low += (next_byte * u4_range); + ps_cab_enc_env->u4_bits_gen += num_bins; + + if(ps_cab_enc_env->u4_bits_gen > CABAC_BITS) + { + /* insert the leading byte of low into stream */ + isvce_cabac_put_byte(ps_cabac); + } +} diff --git a/encoder/svc/isvce_cabac.h b/encoder/svc/isvce_cabac.h new file mode 100644 index 0000000..57ce5d6 --- /dev/null +++ b/encoder/svc/isvce_cabac.h @@ -0,0 +1,380 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** + ******************************************************************************* + * @file + * isvce_cabac_structs.h + * + * @brief + * This file contains cabac related macros, enums, tables and function + *declarations. + * + * @author + * Doney Alex + * + * @remarks + * none + * + ******************************************************************************* + */ + +#ifndef _ISVCE_CABAC_H_ +#define _ISVCE_CABAC_H_ + +#include "ih264e_cabac.h" +#include "isvce_cabac_structs.h" +#include "isvce_defs.h" +#include "isvce_structs.h" + +/*****************************************************************************/ +/* Function Declarations */ +/*****************************************************************************/ + +/** + ******************************************************************************* + * + * @brief + * Initialize default context values and pointers. + * + * @param[in] ps_ent_ctxt + * Pointer to entropy context structure + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ +void isvce_init_cabac_table(isvce_entropy_ctxt_t *ps_ent_ctxt); + +/** + ******************************************************************************* + * + * @brief + * Initialize cabac context: Intitalize all contest with init values given in + *the spec. Called at the beginning of entropy coding of each slice for CABAC + *encoding. + * + * @param[in] ps_ent_ctxt + * Pointer to entropy context structure + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ +extern void isvce_init_cabac_ctxt(isvce_entropy_ctxt_t *ps_ent_ctxt, slice_header_t *ps_slice_hdr); + +/** + ******************************************************************************* + * + * @brief + * k-th order Exp-Golomb (UEGk) binarization process: Implements concatenated + * unary/ k-th order Exp-Golomb (UEGk) binarization process, + * where k = 0 as defined in 9.3.2.3 of ITU_T_H264-201402 + * + * @param[in] i2_sufs + * Suffix bit string + * + * @param[in] pi1_bins_len + * Pointer to length of the string + * + * @returns Binarized value + * + * @remarks + * None + * + ******************************************************************************* + */ +UWORD32 isvce_cabac_UEGk0_binarization(WORD16 i2_sufs, WORD8 *pi1_bins_len); + +/** + ******************************************************************************* + * + * @brief + * Get cabac context for the MB :calculates the pointers to Top and left + * cabac neighbor context depending upon neighbor availability. + * + * @param[in] ps_ent_ctxt + * Pointer to entropy context structure + * + * @param[in] u4_mb_type + * Type of MB + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ +void isvce_get_cabac_context(isvce_entropy_ctxt_t *ps_ent_ctxt, WORD32 u4_mb_type); + +/** + ******************************************************************************* + * @brief + * flushing at termination: Explained in flowchart 9-12(ITU_T_H264-201402). + * + * @param[in] ps_cabac_ctxt + * pointer to cabac context (handle) + * + * @returns none + * + * @remarks + * None + * + ******************************************************************************* + */ +void isvce_cabac_flush(isvce_cabac_ctxt_t *ps_cabac_ctxt); + +/** + ****************************************************************************** + * + * @brief Puts new byte (and outstanding bytes) into bitstream after cabac + * renormalization + * + * @par Description + * 1. Extract the leading byte of low(L) + * 2. If leading byte=0xff increment outstanding bytes and return + * (as the actual bits depend on carry propogation later) + * 3. If leading byte is not 0xff check for any carry propogation + * 4. Insert the carry (propogated in previous byte) along with outstanding + * bytes (if any) and leading byte + * + * + * @param[inout] ps_cabac_ctxt + * pointer to cabac context (handle) + * + * @return + * + ****************************************************************************** + */ +void isvce_cabac_put_byte(isvce_cabac_ctxt_t *ps_cabac_ctxt); + +/** + ****************************************************************************** + * + * @brief Codes a bin based on probablilty and mps packed context model + * + * @par Description + * 1. Apart from encoding bin, context model is updated as per state transition + * 2. Range and Low renormalization is done based on bin and original state + * 3. After renorm bistream is updated (if required) + * + * @param[inout] ps_cabac + * pointer to cabac context (handle) + * + * @param[in] bin + * bin(boolean) to be encoded + * + * @param[in] pu1_bin_ctxts + * index of cabac context model containing pState[bits 5-0] | MPS[bit6] + * + * @return + * + ****************************************************************************** + */ +void isvce_cabac_encode_bin(isvce_cabac_ctxt_t *ps_cabac, WORD32 bin, + bin_ctxt_model *pu1_bin_ctxts); + +/** + ******************************************************************************* + * + * @brief + * Encoding process for a binary decision :implements encoding process of a + decision + * as defined in 9.3.4.2 . This function encodes multiple bins, of a symbol. + Implements + * flowchart Figure 9-7( ITU_T_H264-201402) + * + * @param[in] u4_bins + * array of bin values + * + * @param[in] i1_bins_len + * Length of bins, maximum 32 + * + * @param[in] u4_ctx_inc + * CtxInc, byte0- bin0, byte1-bin1 .. + * + * @param[in] i1_valid_len + * valid length of bins, after that CtxInc is constant + * + * @param[in] pu1_bin_ctxt_type + * Pointer to binary contexts + + * @param[in] ps_cabac + * Pointer to cabac_context_structure + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ +void isvce_encode_decision_bins(UWORD32 u4_bins, WORD8 i1_bins_len, UWORD32 u4_ctx_inc, + WORD8 i1_valid_len, bin_ctxt_model *pu1_bin_ctxt_type, + isvce_cabac_ctxt_t *ps_cabac); + +/** + ******************************************************************************* + * @brief + * Encoding process for a binary decision before termination:Encoding process + * of a termination(9.3.4.5 :ITU_T_H264-201402) . Explained in flowchart 9-11. + * + * @param[in] ps_cabac + * Pointer to cabac structure + * + * @param[in] term_bin + * Symbol value, end of slice or not, term_bin is binary + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ +void isvce_cabac_encode_terminate(isvce_cabac_ctxt_t *ps_cabac, WORD32 term_bin); + +/** + ******************************************************************************* + * @brief + * Bypass encoding process for binary decisions: Explained (9.3.4.4 + *:ITU_T_H264-201402) , flowchart 9-10. + * + * @param[in] ps_cabac : pointer to cabac context (handle) + * + * @param[in] bin : bypass bin(0/1) to be encoded + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ + +void isvce_cabac_encode_bypass_bin(isvce_cabac_ctxt_t *ps_cabac, WORD32 bin); + +/** + ****************************************************************************** + * + * @brief Encodes a series of bypass bins (FLC bypass bins) + * + * @par Description + * This function is more optimal than calling isvce_cabac_encode_bypass_bin() + * in a loop as cabac low, renorm and generating the stream (8bins at a time) + * can be done in one operation + * + * @param[inout]ps_cabac + * pointer to cabac context (handle) + * + * @param[in] u4_bins + * syntax element to be coded (as FLC bins) + * + * @param[in] num_bins + * This is the FLC length for u4_sym + * + * @return + * + ****************************************************************************** + */ + +void isvce_cabac_encode_bypass_bins(isvce_cabac_ctxt_t *ps_cabac, UWORD32 u4_bins, WORD32 num_bins); + +/** + ******************************************************************************* + * + * @brief + * This function generates CABAC coded bit stream for an Intra Slice. + * + * @description + * The mb syntax layer for intra slices constitutes luma mb mode, luma sub + *modes (if present), mb qp delta, coded block pattern, chroma mb mode and + * luma/chroma residue. These syntax elements are written as directed by table + * 7.3.5 of h264 specification. + * + * @param[in] ps_ent_ctxt + * pointer to entropy context + * + * @returns error code + * + * @remarks none + * + ******************************************************************************* + */ +IH264E_ERROR_T isvce_write_islice_mb_cabac(isvce_entropy_ctxt_t *ps_ent_ctxt); + +/** + ******************************************************************************* + * + * @brief + * This function generates CABAC coded bit stream for Inter slices + * + * @description + * The mb syntax layer for inter slices constitutes luma mb mode, luma sub + *modes (if present), mb qp delta, coded block pattern, chroma mb mode and + * luma/chroma residue. These syntax elements are written as directed by table + * 7.3.5 of h264 specification + * + * @param[in] ps_ent_ctxt + * pointer to entropy context + * + * @returns error code + * + * @remarks none + * + ******************************************************************************* + */ +IH264E_ERROR_T isvce_write_pslice_mb_cabac(isvce_entropy_ctxt_t *ps_ent_ctxt); + +/** + ******************************************************************************* + * + * @brief + * This function generates CABAC coded bit stream for B slices + * + * @description + * The mb syntax layer for inter slices constitutes luma mb mode, + * mb qp delta, coded block pattern, chroma mb mode and + * luma/chroma residue. These syntax elements are written as directed by table + * 7.3.5 of h264 specification + * + * @param[in] ps_ent_ctxt + * pointer to entropy context + * + * @returns error code + * + * @remarks none + * + ******************************************************************************* + */ +IH264E_ERROR_T isvce_write_bslice_mb_cabac(isvce_entropy_ctxt_t *ps_ent_ctxt); + +#if ENABLE_RE_ENC_AS_SKIP +IH264E_ERROR_T isvce_reencode_as_skip_frame_cabac(isvce_entropy_ctxt_t *ps_ent_ctxt); +#endif + +#endif diff --git a/encoder/svc/isvce_cabac_encode.c b/encoder/svc/isvce_cabac_encode.c new file mode 100644 index 0000000..d31fdd8 --- /dev/null +++ b/encoder/svc/isvce_cabac_encode.c @@ -0,0 +1,2374 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** +******************************************************************************* +* @file +* isvce_cabac.c +* +* @brief +* Contains all functions to encode in CABAC entropy mode +* +* +* @author +* Doney Alex +* +* @par List of Functions: +* +* +* @remarks +* None +* +******************************************************************************* +*/ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System include files */ +#include +#include +#include +#include + +/* User include files */ +#include "ih264e_config.h" +#include "ih264_typedefs.h" +#include "iv2.h" +#include "ive2.h" +#include "ih264_debug.h" +#include "isvc_defs.h" +#include "isvce_defs.h" +#include "isvc_macros.h" +#include "ih264e_error.h" +#include "ih264e_bitstream.h" +#include "ime_distortion_metrics.h" +#include "ime_defs.h" +#include "ime_structs.h" +#include "ih264_error.h" +#include "isvc_structs.h" +#include "isvc_trans_quant_itrans_iquant.h" +#include "isvc_inter_pred_filters.h" +#include "isvc_mem_fns.h" +#include "ih264_padding.h" +#include "ih264_platform_macros.h" +#include "ih264_intra_pred_filters.h" +#include "ih264_deblk_edge_filters.h" +#include "isvc_cabac_tables.h" +#include "irc_cntrl_param.h" +#include "irc_frame_info_collector.h" +#include "isvce_rate_control.h" +#include "isvce_cabac_structs.h" +#include "isvce_structs.h" +#include "isvce_cabac.h" +#include "isvce_encode_header.h" +#include "ih264_cavlc_tables.h" +#include "isvce_cavlc.h" +#include "ih264e_statistics.h" +#include "ih264e_trace.h" +#include "isvce_cabac_utils.h" +#include "isvce_utils.h" + +/*****************************************************************************/ +/* Function Definitions */ +/*****************************************************************************/ + +/** + ******************************************************************************* + * + * @brief + * Encodes mb_skip_flag using CABAC entropy coding mode. + * + * @param[in] u1_mb_skip_flag + * mb_skip_flag + * + * @param[in] ps_cabac_ctxt + * Pointer to cabac context structure + * + * @param[in] u4_ctxidx_offset + * ctxIdxOffset for mb_skip_flag context + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ +static void isvce_cabac_enc_mb_skip(UWORD8 u1_mb_skip_flag, isvce_cabac_ctxt_t *ps_cabac_ctxt, + UWORD32 u4_ctxidx_offset) +{ + UWORD8 u4_ctx_inc; + WORD8 a, b; + a = ((ps_cabac_ctxt->ps_left_ctxt_mb_info->u1_mb_type & CAB_SKIP_MASK) ? 0 : 1); + b = ((ps_cabac_ctxt->ps_top_ctxt_mb_info->u1_mb_type & CAB_SKIP_MASK) ? 0 : 1); + + u4_ctx_inc = a + b; + /* Encode the bin */ + isvce_cabac_encode_bin(ps_cabac_ctxt, (UWORD32) u1_mb_skip_flag, + ps_cabac_ctxt->au1_cabac_ctxt_table + u4_ctxidx_offset + u4_ctx_inc); +} + +/* ! < Table 9-36 – Binarization for macroblock types in I slices in + * ITU_T_H264-201402 Bits 0-7 : binarised value Bits 8-15: length of binary + * sequence + */ +static const UWORD32 u4_mb_type_intra[26] = {0x0100, 0x0620, 0x0621, 0x0622, 0x0623, 0x0748, 0x0749, + 0x074a, 0x074b, 0x074c, 0x074d, 0x074e, 0x074f, 0x0628, + 0x0629, 0x062a, 0x062b, 0x0758, 0x0759, 0x075a, 0x075b, + 0x075c, 0x075d, 0x075e, 0x075f, 0x0203}; + +/* CtxInc for mb types */ +static const UWORD32 u4_mb_ctxinc[2][26] = { + /* Intra CtxInc's */ + {0x00, 0x03467, 0x03467, 0x03467, 0x03467, 0x034567, 0x034567, 0x034567, 0x034567, + 0x034567, 0x034567, 0x034567, 0x034567, 0x03467, 0x03467, 0x03467, 0x03467, 0x034567, + 0x034567, 0x034567, 0x034567, 0x034567, 0x034567, 0x034567, 0x034567, 0x00}, + /* Inter CtxInc's */ + {0x00, 0x001233, 0x001233, 0x001233, 0x001233, 0x0012233, 0x0012233, + 0x0012233, 0x0012233, 0x0012233, 0x0012233, 0x0012233, 0x0012233, 0x001233, + 0x001233, 0x001233, 0x001233, 0x0012233, 0x0012233, 0x0012233, 0x0012233, + 0x0012233, 0x0012233, 0x0012233, 0x0012233, 0x00}}; + +/** + ******************************************************************************* + * + * @brief + * Encodes mb_type for an intra MB. + * + * @param[in] u4_slice_type + * slice type + * + * @param[in] u4_intra_mb_type + * MB type (Table 7-11) + * + * @param[in] ps_cabac_ctxt + * Pointer to cabac context structure + * + ** @param[in] u4_ctxidx_offset + * ctxIdxOffset for mb_type context + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ + +static void isvce_cabac_enc_intra_mb_type(UWORD32 u4_slice_type, UWORD32 u4_intra_mb_type, + isvce_cabac_ctxt_t *ps_cabac_ctxt, + UWORD32 u4_ctx_idx_offset) +{ + encoding_envirnoment_t *ps_cab_enc_env = &(ps_cabac_ctxt->s_cab_enc_env); + bin_ctxt_model *pu1_mb_bin_ctxt, *pu1_bin_ctxt; + UWORD8 u1_bin; + isvce_mb_info_ctxt_t *ps_left_ctxt = ps_cabac_ctxt->ps_left_ctxt_mb_info; + isvce_mb_info_ctxt_t *ps_top_ctxt = ps_cabac_ctxt->ps_top_ctxt_mb_info; + UWORD32 u4_bins; + UWORD32 u4_ctx_inc; + WORD8 i1_bins_len; + UWORD32 u4_code_int_range; + UWORD32 u4_code_int_low; + UWORD16 u2_quant_code_int_range; + UWORD16 u4_code_int_range_lps; + WORD8 i; + UWORD8 u1_ctx_inc; + UWORD32 u4_table_val; + + pu1_mb_bin_ctxt = ps_cabac_ctxt->au1_cabac_ctxt_table + u4_ctx_idx_offset; + + u4_bins = u4_mb_type_intra[u4_intra_mb_type]; + i1_bins_len = (WORD8) ((u4_bins >> 8) & 0x0f); + u4_ctx_inc = u4_mb_ctxinc[(u4_slice_type != ISLICE)][u4_intra_mb_type]; + u1_ctx_inc = 0; + if(u4_slice_type == ISLICE) + { + if(ps_left_ctxt != ps_cabac_ctxt->ps_def_ctxt_mb_info) + u1_ctx_inc += ((ps_left_ctxt->u1_mb_type != CAB_I4x4) ? 1 : 0); + if(ps_top_ctxt != ps_cabac_ctxt->ps_def_ctxt_mb_info) + u1_ctx_inc += ((ps_top_ctxt->u1_mb_type != CAB_I4x4) ? 1 : 0); + + u4_ctx_inc = (u4_ctx_inc | (u1_ctx_inc << ((i1_bins_len - 1) << 2))); + } + else + { + pu1_mb_bin_ctxt += 3; + if(u4_slice_type == BSLICE) pu1_mb_bin_ctxt += 2; + } + + u4_code_int_range = ps_cab_enc_env->u4_code_int_range; + u4_code_int_low = ps_cab_enc_env->u4_code_int_low; + + for(i = (i1_bins_len - 1); i >= 0; i--) + { + WORD32 shift; + + u1_ctx_inc = ((u4_ctx_inc >> (i << 2)) & 0x0f); + u1_bin = ((u4_bins >> i) & 0x01); + /* Encode the bin */ + pu1_bin_ctxt = pu1_mb_bin_ctxt + u1_ctx_inc; + if(i != (i1_bins_len - 2)) + { + WORD8 i1_mps = !!((*pu1_bin_ctxt) & (0x40)); + WORD8 i1_state = (*pu1_bin_ctxt) & 0x3F; + + u2_quant_code_int_range = ((u4_code_int_range >> 6) & 0x03); + u4_table_val = gau4_isvc_cabac_table[i1_state][u2_quant_code_int_range]; + u4_code_int_range_lps = u4_table_val & 0xFF; + + u4_code_int_range -= u4_code_int_range_lps; + if(u1_bin != i1_mps) + { + u4_code_int_low += u4_code_int_range; + u4_code_int_range = u4_code_int_range_lps; + if(i1_state == 0) + { + /* MPS(CtxIdx) = 1 - MPS(CtxIdx) */ + i1_mps = 1 - i1_mps; + } + + i1_state = (u4_table_val >> 15) & 0x3F; + } + else + { + i1_state = (u4_table_val >> 8) & 0x3F; + } + + (*pu1_bin_ctxt) = (i1_mps << 6) | i1_state; + } + else + { + u4_code_int_range -= 2; + } + + /* Renormalize */ + /*****************************************************************/ + /* Renormalization; calculate bits generated based on range(R) */ + /* Note : 6 <= R < 512; R is 2 only for terminating encode */ + /*****************************************************************/ + GETRANGE(shift, u4_code_int_range); + shift = 9 - shift; + u4_code_int_low <<= shift; + u4_code_int_range <<= shift; + + /* bits to be inserted in the bitstream */ + ps_cab_enc_env->u4_bits_gen += shift; + ps_cab_enc_env->u4_code_int_range = u4_code_int_range; + ps_cab_enc_env->u4_code_int_low = u4_code_int_low; + + /* generate stream when a byte is ready */ + if(ps_cab_enc_env->u4_bits_gen > CABAC_BITS) + { + isvce_cabac_put_byte(ps_cabac_ctxt); + u4_code_int_range = ps_cab_enc_env->u4_code_int_range; + u4_code_int_low = ps_cab_enc_env->u4_code_int_low; + } + } +} + +/** + ******************************************************************************* + * + * @brief + * Encodes prev_intra4x4_pred_mode_flag and + * rem_intra4x4_pred_mode using CABAC entropy coding mode + * + * @param[in] ps_cabac_ctxt + * Pointer to cabac context structure + * + * @param[in] pu1_intra_4x4_modes + * Pointer to array containing prev_intra4x4_pred_mode_flag and + * rem_intra4x4_pred_mode + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ +static void isvce_cabac_enc_4x4mb_modes(isvce_cabac_ctxt_t *ps_cabac_ctxt, + UWORD8 *pu1_intra_4x4_modes) +{ + WORD32 i; + WORD8 byte; + for(i = 0; i < 16; i += 2) + { + /* sub blk idx 1 */ + byte = pu1_intra_4x4_modes[i >> 1]; + if(byte & 0x1) + { + isvce_cabac_encode_bin( + ps_cabac_ctxt, 1, + ps_cabac_ctxt->au1_cabac_ctxt_table + PREV_INTRA4X4_PRED_MODE_FLAG); + } + else + { + /* Binarization is FL and Cmax=7 */ + isvce_encode_decision_bins( + byte & 0xF, 4, 0x05554, 4, + ps_cabac_ctxt->au1_cabac_ctxt_table + REM_INTRA4X4_PRED_MODE - 5, ps_cabac_ctxt); + } + /* sub blk idx 2 */ + byte >>= 4; + if(byte & 0x1) + { + isvce_cabac_encode_bin( + ps_cabac_ctxt, 1, + ps_cabac_ctxt->au1_cabac_ctxt_table + PREV_INTRA4X4_PRED_MODE_FLAG); + } + else + { + isvce_encode_decision_bins( + byte & 0xF, 4, 0x05554, 4, + ps_cabac_ctxt->au1_cabac_ctxt_table + REM_INTRA4X4_PRED_MODE - 5, ps_cabac_ctxt); + } + } +} + +/** + ******************************************************************************* + * + * @brief + * Encodes chroma intrapred mode for the MB. + * + * @param[in] u1_chroma_pred_mode + * Chroma intr prediction mode + * + * @param[in] ps_cabac_ctxt + * Pointer to cabac context structure + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ +static void isvce_cabac_enc_chroma_predmode(UWORD8 u1_chroma_pred_mode, + isvce_cabac_ctxt_t *ps_cabac_ctxt) +{ + WORD8 i1_temp; + isvce_mb_info_ctxt_t *ps_curr_ctxt = ps_cabac_ctxt->ps_curr_ctxt_mb_info; + isvce_mb_info_ctxt_t *ps_left_ctxt = ps_cabac_ctxt->ps_left_ctxt_mb_info; + isvce_mb_info_ctxt_t *ps_top_ctxt = ps_cabac_ctxt->ps_top_ctxt_mb_info; + UWORD32 u4_bins = 0; + WORD8 i1_bins_len = 1; + UWORD32 u4_ctx_inc = 0; + UWORD8 a, b; + a = ((ps_left_ctxt->u1_intrapred_chroma_mode != 0) ? 1 : 0); + b = ((ps_top_ctxt->u1_intrapred_chroma_mode != 0) ? 1 : 0); + + /* Binarization is TU and Cmax=3 */ + ps_curr_ctxt->u1_intrapred_chroma_mode = u1_chroma_pred_mode; + + u4_ctx_inc = a + b; + u4_ctx_inc = (u4_ctx_inc | 0x330); + if(u1_chroma_pred_mode) + { + u4_bins = 1; + i1_temp = u1_chroma_pred_mode; + i1_temp--; + /* Put a stream of 1's of length Chromaps_pred_mode_ctxt value */ + while(i1_temp) + { + u4_bins = (u4_bins | (1 << i1_bins_len)); + i1_bins_len++; + i1_temp--; + } + /* If Chromaps_pred_mode_ctxt < Cmax i.e 3. Terminate put a zero */ + if(u1_chroma_pred_mode < 3) + { + i1_bins_len++; + } + } + + isvce_encode_decision_bins(u4_bins, i1_bins_len, u4_ctx_inc, 3, + ps_cabac_ctxt->au1_cabac_ctxt_table + INTRA_CHROMA_PRED_MODE, + ps_cabac_ctxt); +} + +/** + ******************************************************************************* + * + * @brief + * Encodes CBP for the MB. + * + * @param[in] u1_cbp + * CBP for the MB + * + * @param[in] ps_cabac_ctxt + * Pointer to cabac context structure + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ +static void isvce_cabac_enc_cbp(UWORD32 u4_cbp, isvce_cabac_ctxt_t *ps_cabac_ctxt) +{ + isvce_mb_info_ctxt_t *ps_left_ctxt = ps_cabac_ctxt->ps_left_ctxt_mb_info; + isvce_mb_info_ctxt_t *ps_top_ctxt = ps_cabac_ctxt->ps_top_ctxt_mb_info; + WORD8 i2_cbp_chroma, i, j; + UWORD8 u1_ctxt_inc, u1_bin; + UWORD8 a, b; + UWORD32 u4_ctx_inc; + UWORD32 u4_bins; + WORD8 i1_bins_len; + + /* CBP Luma, FL, Cmax = 15, L = 4 */ + u4_ctx_inc = 0; + u4_bins = 0; + i1_bins_len = 5; + for(i = 0; i < 4; i++) + { + /* calulate ctxtInc, depending on neighbour availability */ + /* u1_ctxt_inc = CondTerm(A) + 2 * CondTerm(B); + A: Left block and B: Top block */ + + /* Check for Top availability */ + if(i >> 1) + { + j = i - 2; + /* Top is available always and it's current MB */ + b = (((u4_cbp >> j) & 0x01) != 0 ? 0 : 1); + } + else + { + /* for blocks whose top reference is in another MB */ + { + j = i + 2; + b = ((ps_top_ctxt->u1_cbp >> j) & 0x01) ? 0 : 1; + } + } + + /* Check for Left availability */ + if(i & 0x01) + { + /* Left is available always and it's current MB */ + j = i - 1; + a = (((u4_cbp >> j) & 0x01) != 0 ? 0 : 1); + } + else + { + { + j = i + 1; + a = ((ps_left_ctxt->u1_cbp >> j) & 0x01) ? 0 : 1; + } + } + u1_ctxt_inc = a + 2 * b; + u1_bin = ((u4_cbp >> i) & 0x01); + u4_ctx_inc = (u4_ctx_inc | (u1_ctxt_inc << (i << 2))); + u4_bins = (u4_bins | (u1_bin << i)); + } + + /* CBP Chroma, TU, Cmax = 2 */ + i2_cbp_chroma = u4_cbp >> 4; + /* calulate ctxtInc, depending on neighbour availability */ + a = (ps_left_ctxt->u1_cbp > 15) ? 1 : 0; + b = (ps_top_ctxt->u1_cbp > 15) ? 1 : 0; + + u1_ctxt_inc = a + 2 * b; + if(i2_cbp_chroma) + { + u4_ctx_inc = u4_ctx_inc | ((4 + u1_ctxt_inc) << 16); + u4_bins = (u4_bins | 0x10); + /* calulate ctxtInc, depending on neighbour availability */ + a = (ps_left_ctxt->u1_cbp > 31) ? 1 : 0; + b = (ps_top_ctxt->u1_cbp > 31) ? 1 : 0; + u1_ctxt_inc = a + 2 * b; + u4_ctx_inc = u4_ctx_inc | ((8 + u1_ctxt_inc) << 20); + u4_bins = (u4_bins | (((i2_cbp_chroma >> 1) & 0x01) << i1_bins_len)); + i1_bins_len++; + } + else + { + u4_ctx_inc = (u4_ctx_inc | ((4 + u1_ctxt_inc) << 16)); + } + isvce_encode_decision_bins(u4_bins, i1_bins_len, u4_ctx_inc, 8, + ps_cabac_ctxt->au1_cabac_ctxt_table + CBP_LUMA, ps_cabac_ctxt); +} + +/** + ******************************************************************************* + * + * @brief + * Encodes mb_qp_delta for the MB. + * + * @param[in] i1_mb_qp_delta + * mb_qp_delta + * + * @param[in] ps_cabac_ctxt + * Pointer to cabac context structure + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ +static void isvce_cabac_enc_mb_qp_delta(WORD8 i1_mb_qp_delta, isvce_cabac_ctxt_t *ps_cabac_ctxt) +{ + UWORD8 u1_code_num; + UWORD8 u1_ctxt_inc; + + UWORD32 u4_bins; + WORD8 i1_bins_len; + + /* Range of ps_mb_qp_delta_ctxt= -26 to +25 inclusive */ + ASSERT((i1_mb_qp_delta < 26) && (i1_mb_qp_delta > -27)); + + /* if ps_mb_qp_delta_ctxt=0, then codeNum=0 */ + u1_code_num = 0; + if(i1_mb_qp_delta > 0) + { + u1_code_num = (i1_mb_qp_delta << 1) - 1; + } + else if(i1_mb_qp_delta < 0) + { + u1_code_num = (ABS(i1_mb_qp_delta)) << 1; + } + + u4_bins = 0; + i1_bins_len = 1; + + u1_ctxt_inc = !!ps_cabac_ctxt->i1_prevps_mb_qp_delta_ctxt; + + if(u1_code_num == 0) + { + isvce_encode_decision_bins(u4_bins, i1_bins_len, u1_ctxt_inc, 3, + ps_cabac_ctxt->au1_cabac_ctxt_table + MB_QP_DELTA, + ps_cabac_ctxt); + } + else + { + u4_bins = 1; + u1_code_num--; + + if(u1_code_num == 0) + { + i1_bins_len++; + + isvce_encode_decision_bins(u4_bins, i1_bins_len, u1_ctxt_inc | 0x20, 3, + ps_cabac_ctxt->au1_cabac_ctxt_table + MB_QP_DELTA, + ps_cabac_ctxt); + } + else + { + u4_bins = (u4_bins | (1 << i1_bins_len)); + i1_bins_len++; + u1_code_num--; + + /* BinIdx from b2 onwards */ + if(u1_code_num < 30) + { + /* maximum i1_bins_len = 31 */ + while(u1_code_num) + { + u4_bins = (u4_bins | (1 << i1_bins_len)); + i1_bins_len++; + u1_code_num--; + }; + + i1_bins_len++; + + isvce_encode_decision_bins(u4_bins, i1_bins_len, u1_ctxt_inc | 0x320, 2, + ps_cabac_ctxt->au1_cabac_ctxt_table + MB_QP_DELTA, + ps_cabac_ctxt); + } + else + { + /* maximum i1_bins_len = 53 */ + u4_bins = 0xffffffff; + i1_bins_len = 32; + u1_code_num -= 30; + + isvce_encode_decision_bins(u4_bins, i1_bins_len, u1_ctxt_inc | 0x320, 2, + ps_cabac_ctxt->au1_cabac_ctxt_table + MB_QP_DELTA, + ps_cabac_ctxt); + + u4_bins = 0; + i1_bins_len = 0; + + while(u1_code_num) + { + u4_bins = (u4_bins | (1 << i1_bins_len)); + i1_bins_len++; + u1_code_num--; + }; + + i1_bins_len++; + + isvce_encode_decision_bins(u4_bins, i1_bins_len, 0x333, 1, + ps_cabac_ctxt->au1_cabac_ctxt_table + MB_QP_DELTA, + ps_cabac_ctxt); + } + } + } +} + +/** + ******************************************************************************* + * @brief + * Encodes 4residual_block_cabac as defined in 7.3.5.3.3. + * + * @param[in] pi2_res_block + * pointer to the array of residues + * + * @param[in] u1_nnz + * Number of non zero coeffs in the block + * + * @param[in] u1_max_num_coeffs + * Max number of coeffs that can be there in the block + * + * @param[in] u2_sig_coeff_map + * Significant coeff map + * + * @param[in] u4_ctx_cat_offset + * ctxIdxOffset for absolute value contexts + * + * @param[in] pu1_ctxt_sig_coeff + * Pointer to residual state variables + * + * @param[in] ps_cabac_ctxt + * Pointer to cabac context structure + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ +static void isvce_cabac_write_coeff4x4(WORD16 *pi2_res_block, UWORD8 u1_nnz, + UWORD8 u1_max_num_coeffs, UWORD16 u2_sig_coeff_map, + UWORD32 u4_ctx_cat_offset, + bin_ctxt_model *pu1_ctxt_sig_coeff, + isvce_cabac_ctxt_t *ps_cabac_ctxt) +{ + WORD8 i; + WORD16 *pi16_coeffs; + UWORD32 u4_sig_coeff, u4_bins; + UWORD32 u4_ctx_inc; + UWORD8 u1_last_sig_coef_index = (31 - CLZ(u2_sig_coeff_map)); + + /* Always put Coded Block Flag as 1 */ + + pi16_coeffs = pi2_res_block; + { + bin_ctxt_model *pu1_bin_ctxt; + UWORD8 u1_bin, uc_last; + + i = 0; + pu1_bin_ctxt = pu1_ctxt_sig_coeff; + u4_sig_coeff = 0; + u1_bin = 1; + if((u1_last_sig_coef_index)) + { + u1_bin = !!(u2_sig_coeff_map & 01); + } + uc_last = 1; + + do + { + /* Encode Decision */ + isvce_cabac_encode_bin(ps_cabac_ctxt, u1_bin, pu1_bin_ctxt); + + if(u1_bin & uc_last) + { + u4_sig_coeff = (u4_sig_coeff | (1 << i)); + pu1_bin_ctxt = pu1_ctxt_sig_coeff + i + LAST_SIGNIFICANT_COEFF_FLAG_FRAME - + SIGNIFICANT_COEFF_FLAG_FRAME; + u1_bin = (i == u1_last_sig_coef_index); + uc_last = 0; + } + else + { + i = i + 1; + pu1_bin_ctxt = pu1_ctxt_sig_coeff + i; + u1_bin = (i == u1_last_sig_coef_index); + uc_last = 1; + if((i != u1_last_sig_coef_index)) + { + u1_bin = !!((u2_sig_coeff_map >> i) & 01); + } + } + } while(!((i > u1_last_sig_coef_index) || (i > (u1_max_num_coeffs - 1)))); + } + + /* Encode coeff_abs_level_minus1 and coeff_sign_flag */ + { + UWORD8 u1_sign; + UWORD16 u2_abs_level; + UWORD8 u1_abs_level_equal1 = 1, u1_abs_level_gt1 = 0; + UWORD8 u1_ctx_inc; + UWORD8 u1_coff; + WORD16 i2_sufs; + WORD8 i1_bins_len; + i = u1_last_sig_coef_index; + pi16_coeffs = pi2_res_block + u1_nnz - 1; + do + { + { + u4_sig_coeff = u4_sig_coeff & ((1 << i) - 1); + u4_bins = 0; + u4_ctx_inc = 0; + i1_bins_len = 1; + /* Encode the AbsLevelMinus1 */ + u2_abs_level = ABS(*(pi16_coeffs)) - 1; + /* CtxInc for bin0 */ + u4_ctx_inc = MIN(u1_abs_level_equal1, 4); + /* CtxInc for remaining */ + u1_ctx_inc = 5 + MIN(u1_abs_level_gt1, 4); + u4_ctx_inc = u4_ctx_inc + (u1_ctx_inc << 4); + if(u2_abs_level) + { + u1_abs_level_gt1++; + u1_abs_level_equal1 = 0; + } + if(!u1_abs_level_gt1) u1_abs_level_equal1++; + + u1_coff = 14; + if(u2_abs_level >= u1_coff) + { + /* Prefix TU i.e string of 14 1's */ + u4_bins = 0x3fff; + i1_bins_len = 14; + isvce_encode_decision_bins( + u4_bins, i1_bins_len, u4_ctx_inc, 1, + ps_cabac_ctxt->au1_cabac_ctxt_table + u4_ctx_cat_offset, ps_cabac_ctxt); + + /* Suffix, uses EncodeBypass */ + i2_sufs = u2_abs_level - u1_coff; + + u4_bins = isvce_cabac_UEGk0_binarization(i2_sufs, &i1_bins_len); + + isvce_cabac_encode_bypass_bins(ps_cabac_ctxt, u4_bins, i1_bins_len); + } + else + { + /* Prefix only */ + u4_bins = (1 << u2_abs_level) - 1; + i1_bins_len = u2_abs_level + 1; + /* Encode Terminating bit */ + isvce_encode_decision_bins( + u4_bins, i1_bins_len, u4_ctx_inc, 1, + ps_cabac_ctxt->au1_cabac_ctxt_table + u4_ctx_cat_offset, ps_cabac_ctxt); + } + } + /* encode coeff_sign_flag[i] */ + u1_sign = ((*pi16_coeffs) < 0) ? 1 : 0; + isvce_cabac_encode_bypass_bin(ps_cabac_ctxt, u1_sign); + i = CLZ(u4_sig_coeff); + i = 31 - i; + pi16_coeffs--; + } while(u4_sig_coeff); + } +} + +/** + ******************************************************************************* + * @brief + * Write DC coeffs for intra predicted luma block + * + * @param[in] ps_ent_ctxt + * Pointer to entropy context structure + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ +static void isvce_cabac_encode_residue_luma_dc(isvce_entropy_ctxt_t *ps_ent_ctxt) +{ + /* CABAC context */ + isvce_cabac_ctxt_t *ps_cabac_ctxt = ps_ent_ctxt->ps_cabac; + tu_sblk_coeff_data_t *ps_mb_coeff_data; + + /* packed residue */ + void *pv_mb_coeff_data = ps_ent_ctxt->pv_mb_coeff_data; + UWORD16 u2_sig_coeff_map; + WORD16 *pi2_res_block; + UWORD8 u1_nnz; + UWORD8 u1_cbf; + isvce_mb_info_ctxt_t *ps_top_ctxt = ps_cabac_ctxt->ps_top_ctxt_mb_info; + isvce_mb_info_ctxt_t *p_CurCtxt = ps_cabac_ctxt->ps_curr_ctxt_mb_info; + + PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, u1_nnz, u2_sig_coeff_map, + pi2_res_block); + + u1_cbf = !!(u1_nnz); + + { + UWORD32 u4_ctx_inc; + UWORD8 u1_a, u1_b; + + u1_a = ps_cabac_ctxt->pu1_left_yuv_dc_csbp[0] & 0x1; + u1_b = ps_top_ctxt->u1_yuv_dc_csbp & 0x1; + u4_ctx_inc = u1_a + (u1_b << 1); + + isvce_cabac_encode_bin( + ps_cabac_ctxt, u1_cbf, + ps_cabac_ctxt->au1_cabac_ctxt_table + CBF + (LUMA_DC_CTXCAT << 2) + u4_ctx_inc); + } + + /* Write coded_block_flag */ + if(u1_cbf) + { + isvce_cabac_write_coeff4x4(pi2_res_block, u1_nnz, 15, u2_sig_coeff_map, + COEFF_ABS_LEVEL_MINUS1 + COEFF_ABS_LEVEL_CAT_0_OFFSET, + ps_cabac_ctxt->au1_cabac_ctxt_table + + SIGNIFICANT_COEFF_FLAG_FRAME + SIG_COEFF_CTXT_CAT_0_OFFSET, + ps_cabac_ctxt); + + ps_cabac_ctxt->pu1_left_yuv_dc_csbp[0] |= 0x1; + p_CurCtxt->u1_yuv_dc_csbp |= 0x1; + } + else + { + ps_cabac_ctxt->pu1_left_yuv_dc_csbp[0] &= 0x6; + p_CurCtxt->u1_yuv_dc_csbp &= 0x6; + } + + ps_ent_ctxt->pv_mb_coeff_data = pv_mb_coeff_data; +} + +/** + ******************************************************************************* + * @brief + * Write chroma residues to the bitstream + * + * @param[in] ps_ent_ctxt + * Pointer to entropy context structure + * + * @param[in] u1_chroma_cbp + * coded block pattern, chroma + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ +static void isvce_cabac_write_chroma_residue(isvce_entropy_ctxt_t *ps_ent_ctxt, + UWORD8 u1_chroma_cbp) +{ + /* CABAC context */ + isvce_cabac_ctxt_t *ps_cabac_ctxt = ps_ent_ctxt->ps_cabac; + tu_sblk_coeff_data_t *ps_mb_coeff_data; + /* packed residue */ + void *pv_mb_coeff_data = ps_ent_ctxt->pv_mb_coeff_data; + UWORD16 u2_sig_coeff_map; + UWORD8 u1_nnz; + isvce_mb_info_ctxt_t *ps_top_ctxt_mb_info, *ps_curr_ctxt; + + ps_top_ctxt_mb_info = ps_cabac_ctxt->ps_top_ctxt_mb_info; + ps_curr_ctxt = ps_cabac_ctxt->ps_curr_ctxt_mb_info; + + /********************/ + /* Write Chroma DC */ + /********************/ + { + WORD16 *pi2_res_block; + UWORD8 u1_left_dc_csbp, u1_top_dc_csbp, u1_uv, u1_cbf; + + u1_left_dc_csbp = (ps_cabac_ctxt->pu1_left_yuv_dc_csbp[0]) >> 1; + u1_top_dc_csbp = (ps_top_ctxt_mb_info->u1_yuv_dc_csbp) >> 1; + + for(u1_uv = 0; u1_uv < 2; u1_uv++) + { + PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, u1_nnz, u2_sig_coeff_map, + pi2_res_block); + u1_cbf = !!(u1_nnz); + { + UWORD8 u1_a, u1_b; + UWORD32 u4_ctx_inc; + u1_a = (u1_left_dc_csbp >> u1_uv) & 0x01; + u1_b = (u1_top_dc_csbp >> u1_uv) & 0x01; + u4_ctx_inc = (u1_a + (u1_b << 1)); + + isvce_cabac_encode_bin(ps_cabac_ctxt, u1_cbf, + ps_cabac_ctxt->au1_cabac_ctxt_table + CBF + + (CHROMA_DC_CTXCAT << 2) + u4_ctx_inc); + } + + if(u1_cbf) + { + isvce_cabac_write_coeff4x4(pi2_res_block, u1_nnz, 3, u2_sig_coeff_map, + COEFF_ABS_LEVEL_MINUS1 + COEFF_ABS_LEVEL_CAT_3_OFFSET, + ps_cabac_ctxt->au1_cabac_ctxt_table + + SIGNIFICANT_COEFF_FLAG_FRAME + + SIG_COEFF_CTXT_CAT_3_OFFSET, + ps_cabac_ctxt); + + SETBIT(u1_top_dc_csbp, u1_uv); + SETBIT(u1_left_dc_csbp, u1_uv); + } + else + { + CLEARBIT(u1_top_dc_csbp, u1_uv); + CLEARBIT(u1_left_dc_csbp, u1_uv); + } + } + /*************************************************************/ + /* Update the DC csbp */ + /*************************************************************/ + ps_cabac_ctxt->pu1_left_yuv_dc_csbp[0] &= 0x1; + ps_curr_ctxt->u1_yuv_dc_csbp &= 0x1; + ps_cabac_ctxt->pu1_left_yuv_dc_csbp[0] |= (u1_left_dc_csbp << 1); + ps_curr_ctxt->u1_yuv_dc_csbp |= (u1_top_dc_csbp << 1); + } + /*******************/ + /* Write Chroma AC */ + /*******************/ + { + if(u1_chroma_cbp == 2) + { + UWORD8 u1_uv_blkno, u1_left_ac_csbp, u1_top_ac_csbp; + WORD16 *pi2_res_block; + u1_left_ac_csbp = ps_cabac_ctxt->pu1_left_uv_ac_csbp[0]; + u1_top_ac_csbp = ps_top_ctxt_mb_info->u1_yuv_ac_csbp >> 4; + + for(u1_uv_blkno = 0; u1_uv_blkno < 8; u1_uv_blkno++) + { + UWORD8 u1_cbf; + UWORD8 u1_b2b0, u1_b2b1; + PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, u1_nnz, + u2_sig_coeff_map, pi2_res_block); + + u1_cbf = !!(u1_nnz); + u1_b2b0 = ((u1_uv_blkno & 0x4) >> 1) | (u1_uv_blkno & 0x1); + u1_b2b1 = ((u1_uv_blkno & 0x4) >> 1) | ((u1_uv_blkno & 0x2) >> 1); + + { + UWORD8 u1_a, u1_b; + UWORD32 u4_ctx_inc; + /* write coded_block_flag */ + u1_a = (u1_left_ac_csbp >> u1_b2b1) & 0x1; + u1_b = (u1_top_ac_csbp >> u1_b2b0) & 0x1; + u4_ctx_inc = u1_a + (u1_b << 1); + + isvce_cabac_encode_bin(ps_cabac_ctxt, u1_cbf, + ps_cabac_ctxt->au1_cabac_ctxt_table + CBF + + (CHROMA_AC_CTXCAT << 2) + u4_ctx_inc); + } + if(u1_cbf) + { + isvce_cabac_write_coeff4x4( + pi2_res_block, u1_nnz, 14, u2_sig_coeff_map, + COEFF_ABS_LEVEL_MINUS1 + COEFF_ABS_LEVEL_CAT_4_OFFSET, + ps_cabac_ctxt->au1_cabac_ctxt_table + +SIGNIFICANT_COEFF_FLAG_FRAME + + SIG_COEFF_CTXT_CAT_4_OFFSET, + ps_cabac_ctxt); + + SETBIT(u1_left_ac_csbp, u1_b2b1); + SETBIT(u1_top_ac_csbp, u1_b2b0); + } + else + { + CLEARBIT(u1_left_ac_csbp, u1_b2b1); + CLEARBIT(u1_top_ac_csbp, u1_b2b0); + } + } + /*************************************************************/ + /* Update the AC csbp */ + /*************************************************************/ + ps_cabac_ctxt->pu1_left_uv_ac_csbp[0] = u1_left_ac_csbp; + ps_curr_ctxt->u1_yuv_ac_csbp &= 0x0f; + ps_curr_ctxt->u1_yuv_ac_csbp |= (u1_top_ac_csbp << 4); + } + else + { + ps_cabac_ctxt->pu1_left_uv_ac_csbp[0] = 0; + ps_curr_ctxt->u1_yuv_ac_csbp &= 0xf; + } + } + ps_ent_ctxt->pv_mb_coeff_data = pv_mb_coeff_data; +} + +/** + ******************************************************************************* + * @brief + * Encodes Residues for the MB as defined in 7.3.5.3 + * + * @param[in] ps_ent_ctxt + * Pointer to entropy context structure + * + * @param[in] u1_cbp + * coded block pattern + * + * @param[in] u1_ctx_cat + * Context category, LUMA_AC_CTXCAT or LUMA_4x4_CTXCAT + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ +static void isvce_cabac_encode_residue(isvce_entropy_ctxt_t *ps_ent_ctxt, UWORD32 u4_cbp, + UWORD8 u1_ctx_cat) +{ + /* CABAC context */ + isvce_cabac_ctxt_t *ps_cabac_ctxt = ps_ent_ctxt->ps_cabac; + + tu_sblk_coeff_data_t *ps_mb_coeff_data; + /* packed residue */ + void *pv_mb_coeff_data = ps_ent_ctxt->pv_mb_coeff_data; + UWORD16 u2_sig_coeff_map; + UWORD8 u1_nnz; + isvce_mb_info_ctxt_t *ps_curr_ctxt; + isvce_mb_info_ctxt_t *ps_top_ctxt; + UWORD8 u1_left_ac_csbp; + UWORD8 u1_top_ac_csbp; + UWORD32 u4_ctx_idx_offset_sig_coef, u4_ctx_idx_offset_abs_lvl; + ps_curr_ctxt = ps_cabac_ctxt->ps_curr_ctxt_mb_info; + ps_top_ctxt = ps_cabac_ctxt->ps_top_ctxt_mb_info; + u1_left_ac_csbp = ps_cabac_ctxt->pu1_left_y_ac_csbp[0]; + u1_top_ac_csbp = ps_top_ctxt->u1_yuv_ac_csbp; + + if(u4_cbp & 0xf) + { + /* Write luma residue */ + UWORD8 u1_offset; + WORD16 *pi2_res_block; + UWORD8 u1_subblk_num; + if(u1_ctx_cat == LUMA_AC_CTXCAT) + { + u1_offset = 1; + u4_ctx_idx_offset_sig_coef = SIG_COEFF_CTXT_CAT_1_OFFSET; + u4_ctx_idx_offset_abs_lvl = COEFF_ABS_LEVEL_MINUS1 + COEFF_ABS_LEVEL_CAT_1_OFFSET; + } + else + { + u1_offset = 0; + u4_ctx_idx_offset_sig_coef = SIG_COEFF_CTXT_CAT_2_OFFSET; + u4_ctx_idx_offset_abs_lvl = COEFF_ABS_LEVEL_MINUS1 + COEFF_ABS_LEVEL_CAT_2_OFFSET; + } + + for(u1_subblk_num = 0; u1_subblk_num < 16; u1_subblk_num++) + { + UWORD8 u1_b0, u1_b1, u1_b2, u1_b3, u1_b2b0, u1_b3b1, u1_b3b2; + u1_b0 = (u1_subblk_num & 0x1); + u1_b1 = (u1_subblk_num & 0x2) >> 1; + u1_b2 = (u1_subblk_num & 0x4) >> 2; + u1_b3 = (u1_subblk_num & 0x8) >> 3; + u1_b2b0 = (u1_b2 << 1) | (u1_b0); + u1_b3b1 = (u1_b3 << 1) | (u1_b1); + u1_b3b2 = (u1_b3 << 1) | (u1_b2); + + if(!((u4_cbp >> u1_b3b2) & 0x1)) + { + /* ---------------------------------------------------------- */ + /* The current block is not coded so skip all the sub block */ + /* and set the pointer of scan level, csbp accrodingly */ + /* ---------------------------------------------------------- */ + CLEARBIT(u1_top_ac_csbp, u1_b2b0); + CLEARBIT(u1_top_ac_csbp, (u1_b2b0 + 1)); + CLEARBIT(u1_left_ac_csbp, u1_b3b1); + CLEARBIT(u1_left_ac_csbp, (u1_b3b1 + 1)); + + u1_subblk_num += 3; + } + else + { + UWORD8 u1_csbf; + + PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, u1_nnz, + u2_sig_coeff_map, pi2_res_block); + + u1_csbf = !!(u1_nnz); + { + UWORD8 u1_a, u1_b; + UWORD32 u4_ctx_inc; + u1_b = (u1_top_ac_csbp >> u1_b2b0) & 0x01; + u1_a = (u1_left_ac_csbp >> u1_b3b1) & 0x01; + u4_ctx_inc = u1_a + (u1_b << 1); + + /* Encode the bin */ + isvce_cabac_encode_bin( + ps_cabac_ctxt, u1_csbf, + ps_cabac_ctxt->au1_cabac_ctxt_table + CBF + (u1_ctx_cat << 2) + u4_ctx_inc); + } + /**************************/ + /* Write coded_block_flag */ + /**************************/ + if(u1_csbf) + { + isvce_cabac_write_coeff4x4(pi2_res_block, u1_nnz, (UWORD8) (15 - u1_offset), + u2_sig_coeff_map, u4_ctx_idx_offset_abs_lvl, + ps_cabac_ctxt->au1_cabac_ctxt_table + + SIGNIFICANT_COEFF_FLAG_FRAME + + u4_ctx_idx_offset_sig_coef, + ps_cabac_ctxt); + + SETBIT(u1_top_ac_csbp, u1_b2b0); + SETBIT(u1_left_ac_csbp, u1_b3b1); + } + else + { + CLEARBIT(u1_top_ac_csbp, u1_b2b0); + CLEARBIT(u1_left_ac_csbp, u1_b3b1); + } + } + } + /**************************************************************************/ + /* Update the AC csbp */ + /**************************************************************************/ + ps_cabac_ctxt->pu1_left_y_ac_csbp[0] = u1_left_ac_csbp & 0xf; + u1_top_ac_csbp &= 0x0f; + ps_curr_ctxt->u1_yuv_ac_csbp &= 0xf0; + ps_curr_ctxt->u1_yuv_ac_csbp |= u1_top_ac_csbp; + } + else + { + ps_cabac_ctxt->pu1_left_y_ac_csbp[0] = 0; + ps_curr_ctxt->u1_yuv_ac_csbp &= 0xf0; + } + + /* Write chroma residue */ + + ps_ent_ctxt->pv_mb_coeff_data = pv_mb_coeff_data; + { + UWORD8 u1_cbp_chroma; + u1_cbp_chroma = u4_cbp >> 4; + if(u1_cbp_chroma) + { + isvce_cabac_write_chroma_residue(ps_ent_ctxt, u1_cbp_chroma); + } + else + { + ps_cabac_ctxt->pu1_left_yuv_dc_csbp[0] &= 0x1; + ps_curr_ctxt->u1_yuv_dc_csbp &= 0x1; + ps_cabac_ctxt->pu1_left_uv_ac_csbp[0] = 0; + ps_curr_ctxt->u1_yuv_ac_csbp &= 0xf; + } + } +} + +/** + ******************************************************************************* + * @brief + * Encodes a Motion vector (9.3.3.1.1.7 ) + * + * @param[in] u1_mvd + * Motion vector to be encoded + * + * @param[in] u4_ctx_idx_offset + * * ctxIdxOffset for MV_X or MV_Ycontext + * + * @param[in] ui2_abs_mvd + * sum of absolute value of corresponding neighboring motion vectors + * + * @param[in] ps_cabac_ctxt + * Pointer to cabac context structure + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ +static void isvce_cabac_enc_ctx_mvd(WORD16 u1_mvd, UWORD32 u4_ctx_idx_offset, UWORD16 ui2_abs_mvd, + isvce_cabac_ctxt_t *ps_cabac_ctxt) +{ + UWORD8 u1_bin, u1_ctxt_inc; + WORD8 k = 3, u1_coff = 9; + WORD16 i2_abs_mvd, i2_sufs; + UWORD32 u4_ctx_inc; + UWORD32 u4_bins; + WORD8 i1_bins_len; + + /* if mvd < u1_coff + only Prefix + else + Prefix + Suffix + + encode sign bit + + Prefix TU encoding Cmax =u1_coff and Suffix 3rd order Exp-Golomb + */ + + if(ui2_abs_mvd < 3) + u4_ctx_inc = 0; + else if(ui2_abs_mvd > 32) + u4_ctx_inc = 2; + else + u4_ctx_inc = 1; + + u4_bins = 0; + i1_bins_len = 1; + + if(u1_mvd == 0) + { + isvce_cabac_encode_bin( + ps_cabac_ctxt, 0, ps_cabac_ctxt->au1_cabac_ctxt_table + u4_ctx_idx_offset + u4_ctx_inc); + } + else + { + i2_abs_mvd = ABS(u1_mvd); + if(i2_abs_mvd >= u1_coff) + { + /* Prefix TU i.e string of 9 1's */ + u4_bins = 0x1ff; + i1_bins_len = 9; + u4_ctx_inc = (u4_ctx_inc | 0x065430); + + isvce_encode_decision_bins(u4_bins, i1_bins_len, u4_ctx_inc, 4, + ps_cabac_ctxt->au1_cabac_ctxt_table + u4_ctx_idx_offset, + ps_cabac_ctxt); + + /* Suffix, uses EncodeBypass */ + u4_bins = 0; + i1_bins_len = 0; + i2_sufs = i2_abs_mvd - u1_coff; + while(1) + { + if(i2_sufs >= (1 << k)) + { + u4_bins = (u4_bins | (1 << (31 - i1_bins_len))); + i1_bins_len++; + i2_sufs = i2_sufs - (1 << k); + k++; + } + else + { + i1_bins_len++; + while(k--) + { + u1_bin = ((i2_sufs >> k) & 0x01); + u4_bins = (u4_bins | (u1_bin << (31 - i1_bins_len))); + i1_bins_len++; + } + break; + } + } + u4_bins >>= (32 - i1_bins_len); + isvce_cabac_encode_bypass_bins(ps_cabac_ctxt, u4_bins, i1_bins_len); + } + else + { + /* Prefix only */ + /* b0 */ + u4_bins = 1; + i2_abs_mvd--; + u1_ctxt_inc = 3; + while(i2_abs_mvd) + { + i2_abs_mvd--; + u4_bins = (u4_bins | (1 << i1_bins_len)); + if(u1_ctxt_inc <= 6) + { + u4_ctx_inc = (u4_ctx_inc | (u1_ctxt_inc << (i1_bins_len << 2))); + u1_ctxt_inc++; + } + i1_bins_len++; + } + /* Encode Terminating bit */ + if(i1_bins_len <= 4) u4_ctx_inc = (u4_ctx_inc | (u1_ctxt_inc << (i1_bins_len << 2))); + i1_bins_len++; + isvce_encode_decision_bins(u4_bins, i1_bins_len, u4_ctx_inc, 4, + ps_cabac_ctxt->au1_cabac_ctxt_table + u4_ctx_idx_offset, + ps_cabac_ctxt); + } + /* sign bit, uses EncodeBypass */ + if(u1_mvd > 0) + isvce_cabac_encode_bypass_bin(ps_cabac_ctxt, 0); + else + isvce_cabac_encode_bypass_bin(ps_cabac_ctxt, 1); + } +} + +/** + ******************************************************************************* + * @brief + * Encodes all motion vectors for a P16x16 MB + * + * @param[in] ps_cabac_ctxt + * Pointer to cabac context structure + * + * @param[in] pi2_mv_ptr + * Pointer to array of motion vectors + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ +static void isvce_cabac_enc_mvds_p16x16(isvce_cabac_ctxt_t *ps_cabac_ctxt, WORD16 *pi2_mv_ptr) +{ + UWORD8 u1_abs_mvd_x, u1_abs_mvd_y; + UWORD8 *pu1_top_mv_ctxt, *pu1_lft_mv_ctxt; + WORD16 u2_mv; + u1_abs_mvd_x = 0; + u1_abs_mvd_y = 0; + pu1_top_mv_ctxt = ps_cabac_ctxt->ps_curr_ctxt_mb_info->u1_mv[0]; + pu1_lft_mv_ctxt = ps_cabac_ctxt->pu1_left_mv_ctxt_inc[0]; + { + UWORD16 u2_abs_mvd_x_a, u2_abs_mvd_x_b, u2_abs_mvd_y_a, u2_abs_mvd_y_b; + u2_abs_mvd_x_b = (UWORD16) pu1_top_mv_ctxt[0]; + u2_abs_mvd_y_b = (UWORD16) pu1_top_mv_ctxt[1]; + u2_abs_mvd_x_a = (UWORD16) pu1_lft_mv_ctxt[0]; + u2_abs_mvd_y_a = (UWORD16) pu1_lft_mv_ctxt[1]; + u2_mv = *(pi2_mv_ptr++); + + isvce_cabac_enc_ctx_mvd(u2_mv, MVD_X, (UWORD16) (u2_abs_mvd_x_a + u2_abs_mvd_x_b), + ps_cabac_ctxt); + + u1_abs_mvd_x = CLIP3(0, 127, ABS(u2_mv)); + u2_mv = *(pi2_mv_ptr++); + + isvce_cabac_enc_ctx_mvd(u2_mv, MVD_Y, (UWORD16) (u2_abs_mvd_y_a + u2_abs_mvd_y_b), + ps_cabac_ctxt); + + u1_abs_mvd_y = CLIP3(0, 127, ABS(u2_mv)); + } + /***************************************************************/ + /* Store abs_mvd_values cabac contexts */ + /***************************************************************/ + pu1_top_mv_ctxt[0] = pu1_lft_mv_ctxt[0] = u1_abs_mvd_x; + pu1_top_mv_ctxt[1] = pu1_lft_mv_ctxt[1] = u1_abs_mvd_y; +} + +/** + ******************************************************************************* + * @brief + * Encodes all motion vectors for a B MB (Assues that mbype is B_L0_16x16, + *B_L1_16x16 or B_Bi_16x16 + * + * @param[in] ps_cabac_ctxt + * Pointer to cabac context structure + * + * @param[in] pi2_mv_ptr + * Pointer to array of motion vectors + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ +static void isvce_cabac_enc_mvds_b16x16(isvce_cabac_ctxt_t *ps_cabac_ctxt, WORD16 *pi2_mv_ptr, + WORD32 i4_mb_part_pred_mode) +{ + /* Encode the differential component of the motion vectors */ + + { + UWORD8 u1_abs_mvd_x, u1_abs_mvd_y; + UWORD8 *pu1_top_mv_ctxt, *pu1_lft_mv_ctxt; + WORD16 u2_mv; + u1_abs_mvd_x = 0; + u1_abs_mvd_y = 0; + pu1_top_mv_ctxt = ps_cabac_ctxt->ps_curr_ctxt_mb_info->u1_mv[0]; + pu1_lft_mv_ctxt = ps_cabac_ctxt->pu1_left_mv_ctxt_inc[0]; + if(i4_mb_part_pred_mode != L1) + { + UWORD16 u2_abs_mvd_x_a, u2_abs_mvd_x_b, u2_abs_mvd_y_a, u2_abs_mvd_y_b; + u2_abs_mvd_x_b = (UWORD16) pu1_top_mv_ctxt[0]; + u2_abs_mvd_y_b = (UWORD16) pu1_top_mv_ctxt[1]; + u2_abs_mvd_x_a = (UWORD16) pu1_lft_mv_ctxt[0]; + u2_abs_mvd_y_a = (UWORD16) pu1_lft_mv_ctxt[1]; + u2_mv = pi2_mv_ptr[0]; + + isvce_cabac_enc_ctx_mvd(u2_mv, MVD_X, (UWORD16) (u2_abs_mvd_x_a + u2_abs_mvd_x_b), + ps_cabac_ctxt); + + u1_abs_mvd_x = CLIP3(0, 127, ABS(u2_mv)); + u2_mv = pi2_mv_ptr[1]; + + isvce_cabac_enc_ctx_mvd(u2_mv, MVD_Y, (UWORD16) (u2_abs_mvd_y_a + u2_abs_mvd_y_b), + ps_cabac_ctxt); + + u1_abs_mvd_y = CLIP3(0, 127, ABS(u2_mv)); + } + + /***************************************************************/ + /* Store abs_mvd_values cabac contexts */ + /***************************************************************/ + pu1_top_mv_ctxt[0] = pu1_lft_mv_ctxt[0] = u1_abs_mvd_x; + pu1_top_mv_ctxt[1] = pu1_lft_mv_ctxt[1] = u1_abs_mvd_y; + + u1_abs_mvd_x = 0; + u1_abs_mvd_y = 0; + if(i4_mb_part_pred_mode != L0) + { + UWORD16 u2_abs_mvd_x_a, u2_abs_mvd_x_b, u2_abs_mvd_y_a, u2_abs_mvd_y_b; + u2_abs_mvd_x_b = (UWORD16) pu1_top_mv_ctxt[2]; + u2_abs_mvd_y_b = (UWORD16) pu1_top_mv_ctxt[3]; + u2_abs_mvd_x_a = (UWORD16) pu1_lft_mv_ctxt[2]; + u2_abs_mvd_y_a = (UWORD16) pu1_lft_mv_ctxt[3]; + u2_mv = pi2_mv_ptr[2]; + + isvce_cabac_enc_ctx_mvd(u2_mv, MVD_X, (UWORD16) (u2_abs_mvd_x_a + u2_abs_mvd_x_b), + ps_cabac_ctxt); + + u1_abs_mvd_x = CLIP3(0, 127, ABS(u2_mv)); + u2_mv = pi2_mv_ptr[3]; + + isvce_cabac_enc_ctx_mvd(u2_mv, MVD_Y, (UWORD16) (u2_abs_mvd_y_a + u2_abs_mvd_y_b), + ps_cabac_ctxt); + + u1_abs_mvd_y = CLIP3(0, 127, ABS(u2_mv)); + } + /***************************************************************/ + /* Store abs_mvd_values cabac contexts */ + /***************************************************************/ + pu1_top_mv_ctxt[2] = pu1_lft_mv_ctxt[2] = u1_abs_mvd_x; + pu1_top_mv_ctxt[3] = pu1_lft_mv_ctxt[3] = u1_abs_mvd_y; + } +} + +static FORCEINLINE void isvce_mb_ctxt_update(isvce_cabac_ctxt_t *ps_cabac_ctxt, + isvce_mb_info_ctxt_t *ps_curr_ctxt, + WORD8 i1_mb_qp_delta, UWORD8 u1_cbp, + UWORD8 u1_base_mode_flag, MBTYPES_T e_mb_type) +{ + UWORD8 u1_is_intra_mb = (e_mb_type == I16x16) || (e_mb_type == I8x8) || (e_mb_type == I4x4); + UWORD8 u1_is_skip_mb = (e_mb_type == PSKIP) || (e_mb_type == BSKIP); + UWORD8 u1_is_direct_mb = (e_mb_type == BDIRECT); + + ps_curr_ctxt->u1_cbp = u1_cbp; + ps_curr_ctxt->u1_base_mode_flag = u1_base_mode_flag; + + if(u1_is_intra_mb || u1_is_skip_mb || u1_is_direct_mb || u1_base_mode_flag) + { + memset(ps_curr_ctxt->u1_mv, 0, 16); + memset(ps_cabac_ctxt->pu1_left_mv_ctxt_inc, 0, 16); + } + + if((0 == u1_cbp) && (e_mb_type != I16x16)) + { + ps_curr_ctxt->u1_yuv_ac_csbp = 0; + ps_curr_ctxt->u1_yuv_dc_csbp = 0; + + ps_cabac_ctxt->pu1_left_uv_ac_csbp[0] = 0; + ps_cabac_ctxt->pu1_left_y_ac_csbp[0] = 0; + ps_cabac_ctxt->pu1_left_yuv_dc_csbp[0] = 0; + } + + if(u1_is_skip_mb) + { + ps_cabac_ctxt->i1_prevps_mb_qp_delta_ctxt = 0; + } + else if((I16x16 != e_mb_type) && (0 == u1_cbp)) + { + ps_cabac_ctxt->i1_prevps_mb_qp_delta_ctxt = 0; + } + else if(0 == i1_mb_qp_delta) + { + ps_cabac_ctxt->i1_prevps_mb_qp_delta_ctxt = 0; + } + else + { + ps_cabac_ctxt->i1_prevps_mb_qp_delta_ctxt = 1; + } + + if(!u1_is_intra_mb || u1_base_mode_flag) + { + ps_curr_ctxt->u1_intrapred_chroma_mode = 0; + } +} + +/** + ******************************************************************************* + * + * @brief + * This function generates CABAC coded bit stream for an Intra Slice. + * + * @description + * The mb syntax layer for intra slices constitutes luma mb mode, mb qp delta, + *coded block pattern, chroma mb mode and luma/chroma residue. These syntax + *elements are written as directed by table 7.3.5 of h264 specification. + * + * @param[in] ps_ent_ctxt + * pointer to entropy context + * + * @returns error code + * + * @remarks none + * + ******************************************************************************* + */ +IH264E_ERROR_T isvce_write_islice_mb_cabac(isvce_entropy_ctxt_t *ps_ent_ctxt) +{ + isvce_mb_info_ctxt_t *ps_curr_ctxt; + + WORD32 mb_tpm, mb_type, chroma_intra_mode, luma_intra_mode; + UWORD8 u1_cbp, u1_cbp_l, u1_cbp_c; + WORD8 mb_qp_delta; + WORD32 bitstream_start_offset, bitstream_end_offset; + UWORD8 u1_base_mode_flag; + + bitstrm_t *ps_bitstream = ps_ent_ctxt->ps_bitstrm; + isvce_cabac_ctxt_t *ps_cabac_ctxt = ps_ent_ctxt->ps_cabac; + svc_slice_header_t *ps_svc_slice_header = + ps_ent_ctxt->ps_svc_slice_hdr_base + + (ps_ent_ctxt->i4_cur_slice_idx % SVC_MAX_SLICE_HDR_CNT); + isvce_mb_hdr_common_t *ps_mb_hdr = (isvce_mb_hdr_common_t *) ps_ent_ctxt->pv_mb_header_data; + + UWORD8 *pu1_byte = ps_ent_ctxt->pv_mb_header_data; + + if((ps_bitstream->u4_strm_buf_offset + MIN_STREAM_SIZE_MB) >= ps_bitstream->u4_max_strm_size) + { + /* return without corrupting the buffer beyond its size */ + return (IH264E_BITSTREAM_BUFFER_OVERFLOW); + } + + mb_tpm = ps_mb_hdr->u1_mb_type_mode; + u1_base_mode_flag = ps_mb_hdr->u1_base_mode_flag; + u1_cbp = ps_mb_hdr->u1_cbp; + u1_cbp_c = (u1_cbp >> 4); + u1_cbp_l = (u1_cbp & 0xF); + mb_type = mb_tpm & 0xF; + + isvce_get_cabac_context(ps_ent_ctxt, mb_type); + ps_curr_ctxt = ps_cabac_ctxt->ps_curr_ctxt_mb_info; + + bitstream_start_offset = isvce_get_num_bits(ps_bitstream); + + if(mb_type == I16x16) + { + luma_intra_mode = ((mb_tpm >> 4) & 3) + 1 + (u1_cbp_c << 2) + (u1_cbp_l == 15) * 12; + } + else + { + luma_intra_mode = 0; + } + + chroma_intra_mode = (mb_tpm >> 6); + + if(ps_ent_ctxt->u1_spatial_layer_id && ps_svc_slice_header->i1_adaptive_base_mode_flag) + { + isvce_cabac_enc_base_mode_flag(ps_cabac_ctxt, u1_base_mode_flag); + } + + if(!u1_base_mode_flag) + { + isvce_cabac_enc_intra_mb_type(ISLICE, luma_intra_mode, ps_cabac_ctxt, MB_TYPE_I_SLICE); + + if(mb_type == I4x4) + { + isvce_mb_hdr_i4x4_t *ps_mb_hdr_i4x4 = + (isvce_mb_hdr_i4x4_t *) ps_ent_ctxt->pv_mb_header_data; + + isvce_cabac_enc_4x4mb_modes(ps_cabac_ctxt, ps_mb_hdr_i4x4->au1_sub_blk_modes); + } + + isvce_cabac_enc_chroma_predmode(chroma_intra_mode, ps_cabac_ctxt); + } + + if(u1_base_mode_flag || (mb_type != I16x16)) + { + isvce_cabac_enc_cbp(u1_cbp, ps_cabac_ctxt); + } + + if((u1_cbp > 0) || (mb_type == I16x16)) + { + mb_qp_delta = + ((WORD16) ps_mb_hdr->u1_mb_qp) - ((WORD16) ps_ent_ctxt->ps_mb_qp_ctxt->u1_cur_mb_qp); + + isvce_cabac_enc_mb_qp_delta(mb_qp_delta, ps_cabac_ctxt); + ps_ent_ctxt->ps_mb_qp_ctxt->u1_cur_mb_qp = ps_mb_hdr->u1_mb_qp; + + bitstream_end_offset = isvce_get_num_bits(ps_bitstream); + ps_ent_ctxt->u4_header_bits[0] += bitstream_end_offset - bitstream_start_offset; + bitstream_start_offset = bitstream_end_offset; + + if(mb_type == I16x16) + { + ps_curr_ctxt->u1_mb_type = CAB_I16x16; + + isvce_cabac_encode_residue_luma_dc(ps_ent_ctxt); + + isvce_cabac_encode_residue(ps_ent_ctxt, u1_cbp, LUMA_AC_CTXCAT); + + pu1_byte += sizeof(isvce_mb_hdr_i16x16_t); + } + else if(mb_type == I4x4) + { + ps_curr_ctxt->u1_mb_type = CAB_I4x4; + + isvce_cabac_encode_residue(ps_ent_ctxt, u1_cbp, LUMA_4X4_CTXCAT); + + ps_cabac_ctxt->pu1_left_yuv_dc_csbp[0] &= 0x6; + ps_cabac_ctxt->ps_curr_ctxt_mb_info->u1_yuv_dc_csbp &= 0x6; + + pu1_byte += sizeof(isvce_mb_hdr_i4x4_t); + } + else if(mb_type == BASE_MODE) + { + ps_curr_ctxt->u1_mb_type = CAB_P | CAB_NON_BD16x16; + + isvce_cabac_encode_residue(ps_ent_ctxt, u1_cbp, LUMA_4X4_CTXCAT); + + ps_cabac_ctxt->pu1_left_yuv_dc_csbp[0] &= 0x6; + ps_cabac_ctxt->ps_curr_ctxt_mb_info->u1_yuv_dc_csbp &= 0x6; + + pu1_byte += sizeof(isvce_mb_hdr_base_mode_t); + } + + bitstream_end_offset = isvce_get_num_bits(ps_bitstream); + ps_ent_ctxt->u4_residue_bits[0] += bitstream_end_offset - bitstream_start_offset; + } + else + { + mb_qp_delta = 0; + + if(mb_type == I16x16) + { + ps_curr_ctxt->u1_mb_type = CAB_I16x16; + + pu1_byte += sizeof(isvce_mb_hdr_i16x16_t); + } + else if(mb_type == I4x4) + { + ps_curr_ctxt->u1_mb_type = CAB_I4x4; + + pu1_byte += sizeof(isvce_mb_hdr_i4x4_t); + } + else if(mb_type == BASE_MODE) + { + ps_curr_ctxt->u1_mb_type = CAB_P | CAB_NON_BD16x16; + + pu1_byte += sizeof(isvce_mb_hdr_base_mode_t); + } + + bitstream_end_offset = isvce_get_num_bits(ps_bitstream); + ps_ent_ctxt->u4_header_bits[0] += bitstream_end_offset - bitstream_start_offset; + } + + isvce_mb_ctxt_update(ps_cabac_ctxt, ps_curr_ctxt, mb_qp_delta, u1_cbp, u1_base_mode_flag, + mb_type); + + ps_ent_ctxt->pv_mb_header_data = pu1_byte; + + return IH264E_SUCCESS; +} + +/** + ******************************************************************************* + * + * @brief + * This function generates CABAC coded bit stream for Inter slices + * + * @description + * The mb syntax layer for inter slices constitutes luma mb mode, mb qp delta, + *coded block pattern, chroma mb mode and luma/chroma residue. These syntax + *elements are written as directed by table 7.3.5 of h264 specification + * + * @param[in] ps_ent_ctxt + * pointer to entropy context + * + * @returns error code + * + * @remarks none + * + ******************************************************************************* + */ +IH264E_ERROR_T isvce_write_pslice_mb_cabac(isvce_entropy_ctxt_t *ps_ent_ctxt) +{ + isvce_mb_info_ctxt_t *ps_curr_ctxt; + + WORD32 mb_tpm, mb_type, chroma_intra_mode, luma_intra_mode; + UWORD8 u1_cbp, u1_cbp_l, u1_cbp_c; + WORD8 mb_qp_delta; + WORD32 bitstream_start_offset, bitstream_end_offset; + UWORD8 u1_base_mode_flag; + UWORD8 u1_is_intra_mb; + + bitstrm_t *ps_bitstream = ps_ent_ctxt->ps_bitstrm; + isvce_cabac_ctxt_t *ps_cabac_ctxt = ps_ent_ctxt->ps_cabac; + svc_slice_header_t *ps_svc_slice_header = + ps_ent_ctxt->ps_svc_slice_hdr_base + + (ps_ent_ctxt->i4_cur_slice_idx % SVC_MAX_SLICE_HDR_CNT); + isvce_mb_hdr_common_t *ps_mb_hdr = (isvce_mb_hdr_common_t *) ps_ent_ctxt->pv_mb_header_data; + + UWORD8 *pu1_byte = ps_ent_ctxt->pv_mb_header_data; + + if((ps_bitstream->u4_strm_buf_offset + MIN_STREAM_SIZE_MB) >= ps_bitstream->u4_max_strm_size) + { + /* return without corrupting the buffer beyond its size */ + return IH264E_BITSTREAM_BUFFER_OVERFLOW; + } + + /* mb header info */ + mb_tpm = ps_mb_hdr->u1_mb_type_mode; + u1_base_mode_flag = ps_mb_hdr->u1_base_mode_flag; + u1_cbp = ps_mb_hdr->u1_cbp; + u1_cbp_c = (u1_cbp >> 4); + u1_cbp_l = (u1_cbp & 0xF); + + /* mb type */ + mb_type = mb_tpm & 0xF; + u1_is_intra_mb = (mb_type == I16x16) || (mb_type == I8x8) || (mb_type == I4x4); + + /* CABAC contexts for the MB */ + isvce_get_cabac_context(ps_ent_ctxt, mb_type); + ps_curr_ctxt = ps_cabac_ctxt->ps_curr_ctxt_mb_info; + + /* Starting bitstream offset for header in bits */ + bitstream_start_offset = isvce_get_num_bits(ps_bitstream); + + /* Encode mb_skip_flag */ + isvce_cabac_enc_mb_skip(mb_type == PSKIP, ps_cabac_ctxt, MB_SKIP_FLAG_P_SLICE); + + if(mb_type == PSKIP) + { + ps_curr_ctxt->u1_mb_type = CAB_P_SKIP; + + ps_ent_ctxt->pi4_mb_skip_run[0]++; + + isvce_mb_ctxt_update(ps_cabac_ctxt, ps_curr_ctxt, 0, 0, 0, PSKIP); + + bitstream_end_offset = isvce_get_num_bits(ps_bitstream); + ps_ent_ctxt->u4_header_bits[!u1_is_intra_mb] += + bitstream_end_offset - bitstream_start_offset; + + pu1_byte += sizeof(isvce_mb_hdr_pskip_t); + ps_ent_ctxt->pv_mb_header_data = pu1_byte; + + return IH264E_SUCCESS; + } + + if(ps_ent_ctxt->u1_spatial_layer_id && ps_svc_slice_header->i1_adaptive_base_mode_flag) + { + isvce_cabac_enc_base_mode_flag(ps_cabac_ctxt, u1_base_mode_flag); + } + + if(!u1_base_mode_flag) + { + if(u1_is_intra_mb) + { + if(mb_type == I16x16) + { + luma_intra_mode = ((mb_tpm >> 4) & 3) + 1 + (u1_cbp_c << 2) + (u1_cbp_l == 15) * 12; + } + else + { + luma_intra_mode = 0; + } + + isvce_cabac_encode_bin(ps_cabac_ctxt, 1, + ps_cabac_ctxt->au1_cabac_ctxt_table + MB_TYPE_P_SLICE); + + isvce_cabac_enc_intra_mb_type(PSLICE, (UWORD8) luma_intra_mode, ps_cabac_ctxt, + MB_TYPE_P_SLICE); + + if(mb_type == I4x4) + { + isvce_mb_hdr_i4x4_t *ps_mb_hdr_i4x4 = + (isvce_mb_hdr_i4x4_t *) ps_ent_ctxt->pv_mb_header_data; + + isvce_cabac_enc_4x4mb_modes(ps_cabac_ctxt, ps_mb_hdr_i4x4->au1_sub_blk_modes); + } + + chroma_intra_mode = (mb_tpm >> 6); + + isvce_cabac_enc_chroma_predmode(chroma_intra_mode, ps_cabac_ctxt); + } + else + { + UWORD32 u4_ctx_inc_p; + + isvce_mb_hdr_p16x16_t *ps_mb_hdr_p16x16 = + (isvce_mb_hdr_p16x16_t *) ps_ent_ctxt->pv_mb_header_data; + + WORD16 *pi2_mv_ptr = (WORD16 *) ps_mb_hdr_p16x16->ai2_mvd; + + /* Encoding mb_type as P16x16 */ + u4_ctx_inc_p = (0x010 + ((2) << 8)); + + isvce_encode_decision_bins(0, 3, u4_ctx_inc_p, 3, + &(ps_cabac_ctxt->au1_cabac_ctxt_table[MB_TYPE_P_SLICE]), + ps_cabac_ctxt); + + if(ps_ent_ctxt->u1_spatial_layer_id && + ps_svc_slice_header->i1_adaptive_motion_prediction_flag) + { + isvce_cabac_enc_motion_prediction_flag(ps_cabac_ctxt, ps_mb_hdr_p16x16->u1_mvp_idx, + 1); + } + + isvce_cabac_enc_mvds_p16x16(ps_cabac_ctxt, pi2_mv_ptr); + } + } + + if(ps_ent_ctxt->u1_spatial_layer_id && (u1_base_mode_flag || !u1_is_intra_mb) && + ps_svc_slice_header->i1_adaptive_residual_prediction_flag) + { + isvce_cabac_enc_residual_prediction_flag(ps_cabac_ctxt, u1_base_mode_flag, + ps_mb_hdr->u1_residual_prediction_flag); + } + + if(u1_base_mode_flag || (mb_type != I16x16)) + { + isvce_cabac_enc_cbp(u1_cbp, ps_cabac_ctxt); + } + + if((u1_cbp > 0) || (mb_type == I16x16)) + { + mb_qp_delta = + ((WORD16) ps_mb_hdr->u1_mb_qp) - ((WORD16) ps_ent_ctxt->ps_mb_qp_ctxt->u1_cur_mb_qp); + + isvce_cabac_enc_mb_qp_delta(mb_qp_delta, ps_cabac_ctxt); + ps_ent_ctxt->ps_mb_qp_ctxt->u1_cur_mb_qp = ps_mb_hdr->u1_mb_qp; + + bitstream_end_offset = isvce_get_num_bits(ps_bitstream); + ps_ent_ctxt->u4_header_bits[!u1_is_intra_mb] += + bitstream_end_offset - bitstream_start_offset; + + bitstream_start_offset = bitstream_end_offset; + + if(mb_type == I16x16) + { + ps_curr_ctxt->u1_mb_type = CAB_I16x16; + + isvce_cabac_encode_residue_luma_dc(ps_ent_ctxt); + + isvce_cabac_encode_residue(ps_ent_ctxt, u1_cbp, LUMA_AC_CTXCAT); + + pu1_byte += sizeof(isvce_mb_hdr_i16x16_t); + } + else if(mb_type == I4x4) + { + ps_curr_ctxt->u1_mb_type = CAB_I4x4; + + isvce_cabac_encode_residue(ps_ent_ctxt, u1_cbp, LUMA_4X4_CTXCAT); + + ps_cabac_ctxt->pu1_left_yuv_dc_csbp[0] &= 0x6; + ps_cabac_ctxt->ps_curr_ctxt_mb_info->u1_yuv_dc_csbp &= 0x6; + + pu1_byte += sizeof(isvce_mb_hdr_i4x4_t); + } + else if(mb_type == P16x16) + { + ps_curr_ctxt->u1_mb_type = (CAB_P | CAB_NON_BD16x16); + + isvce_cabac_encode_residue(ps_ent_ctxt, u1_cbp, LUMA_4X4_CTXCAT); + + ps_cabac_ctxt->pu1_left_yuv_dc_csbp[0] &= 0x6; + ps_cabac_ctxt->ps_curr_ctxt_mb_info->u1_yuv_dc_csbp &= 0x6; + + pu1_byte += sizeof(isvce_mb_hdr_p16x16_t); + } + else if(mb_type == BASE_MODE) + { + ps_curr_ctxt->u1_mb_type = (CAB_P | CAB_NON_BD16x16); + + isvce_cabac_encode_residue(ps_ent_ctxt, u1_cbp, LUMA_4X4_CTXCAT); + + ps_cabac_ctxt->pu1_left_yuv_dc_csbp[0] &= 0x6; + ps_cabac_ctxt->ps_curr_ctxt_mb_info->u1_yuv_dc_csbp &= 0x6; + + pu1_byte += sizeof(isvce_mb_hdr_base_mode_t); + } + + bitstream_end_offset = isvce_get_num_bits(ps_bitstream); + ps_ent_ctxt->u4_residue_bits[!u1_is_intra_mb] += + bitstream_end_offset - bitstream_start_offset; + } + else + { + mb_qp_delta = 0; + + if(mb_type == I16x16) + { + ps_curr_ctxt->u1_mb_type = CAB_I16x16; + + pu1_byte += sizeof(isvce_mb_hdr_i16x16_t); + } + else if(mb_type == I4x4) + { + ps_curr_ctxt->u1_mb_type = CAB_I4x4; + + pu1_byte += sizeof(isvce_mb_hdr_i4x4_t); + } + else if(mb_type == P16x16) + { + ps_curr_ctxt->u1_mb_type = (CAB_P | CAB_NON_BD16x16); + + pu1_byte += sizeof(isvce_mb_hdr_p16x16_t); + } + else if(mb_type == BASE_MODE) + { + ps_curr_ctxt->u1_mb_type = (CAB_P | CAB_NON_BD16x16); + + pu1_byte += sizeof(isvce_mb_hdr_base_mode_t); + } + + bitstream_end_offset = isvce_get_num_bits(ps_bitstream); + ps_ent_ctxt->u4_header_bits[!u1_is_intra_mb] += + bitstream_end_offset - bitstream_start_offset; + } + + isvce_mb_ctxt_update(ps_cabac_ctxt, ps_curr_ctxt, mb_qp_delta, u1_cbp, u1_base_mode_flag, + mb_type); + + ps_ent_ctxt->pv_mb_header_data = pu1_byte; + + return IH264E_SUCCESS; +} + +/* ! < Table 9-37 – Binarization for macroblock types in B slices in + * ITU_T_H264-201402 Bits 0-7 : binarised value Bits 8-15: length of binary + * sequence */ + +static const UWORD32 u4_b_mb_type[27] = { + 0x0100, 0x0301, 0x0305, 0x0603, 0x0623, 0x0613, 0x0633, 0x060b, 0x062b, 0x061b, 0x063b, 0x061f, + 0x0707, 0x0747, 0x0727, 0x0767, 0x0717, 0x0757, 0x0737, 0x0777, 0x070f, 0x074f, 0x063f}; +/* CtxInc for mb types in B slices */ +static const UWORD32 ui_b_mb_type_ctx_inc[27] = { + 0x00, 0x0530, 0x0530, 0x0555430, 0x0555430, 0x0555430, 0x0555430, 0x0555430, + 0x0555430, 0x0555430, 0x0555430, 0x0555430, 0x05555430, 0x05555430, 0x05555430, 0x05555430, + 0x05555430, 0x05555430, 0x05555430, 0x05555430, 0x05555430, 0x05555430, 0x0555430}; + +/** + ******************************************************************************* + * + * @brief + * This function generates CABAC coded bit stream for B slices + * + * @description + * The mb syntax layer for inter slices constitutes luma mb mode, + * mb qp delta, coded block pattern, chroma mb mode and + * luma/chroma residue. These syntax elements are written as directed by table + * 7.3.5 of h264 specification + * + * @param[in] ps_ent_ctxt + * pointer to entropy context + * + * @returns error code + * + * @remarks none + * + ******************************************************************************* + */ +IH264E_ERROR_T isvce_write_bslice_mb_cabac(isvce_entropy_ctxt_t *ps_ent_ctxt) +{ + isvce_mb_info_ctxt_t *ps_curr_ctxt; + + WORD32 mb_tpm, mb_type, chroma_intra_mode, luma_intra_mode; + UWORD8 u1_cbp, u1_cbp_l, u1_cbp_c; + WORD8 mb_qp_delta; + WORD32 bitstream_start_offset, bitstream_end_offset; + UWORD8 u1_base_mode_flag; + UWORD8 u1_is_intra_mb; + + bitstrm_t *ps_bitstream = ps_ent_ctxt->ps_bitstrm; + isvce_cabac_ctxt_t *ps_cabac_ctxt = ps_ent_ctxt->ps_cabac; + svc_slice_header_t *ps_svc_slice_header = + ps_ent_ctxt->ps_svc_slice_hdr_base + + (ps_ent_ctxt->i4_cur_slice_idx % SVC_MAX_SLICE_HDR_CNT); + isvce_mb_hdr_common_t *ps_mb_hdr = (isvce_mb_hdr_common_t *) ps_ent_ctxt->pv_mb_header_data; + + UWORD8 *pu1_byte = ps_ent_ctxt->pv_mb_header_data; + + if((ps_bitstream->u4_strm_buf_offset + MIN_STREAM_SIZE_MB) >= ps_bitstream->u4_max_strm_size) + { + /* return without corrupting the buffer beyond its size */ + return (IH264E_BITSTREAM_BUFFER_OVERFLOW); + } + + /* mb header info */ + mb_tpm = ps_mb_hdr->u1_mb_type_mode; + u1_base_mode_flag = ps_mb_hdr->u1_base_mode_flag; + u1_cbp = ps_mb_hdr->u1_cbp; + u1_cbp_c = (u1_cbp >> 4); + u1_cbp_l = (u1_cbp & 0xF); + + /* mb type */ + mb_type = mb_tpm & 0xF; + u1_is_intra_mb = (mb_type == I16x16) || (mb_type == I8x8) || (mb_type == I4x4); + + /* CABAC contexts for the MB */ + isvce_get_cabac_context(ps_ent_ctxt, mb_type); + ps_curr_ctxt = ps_cabac_ctxt->ps_curr_ctxt_mb_info; + + /* Starting bitstream offset for header in bits */ + bitstream_start_offset = isvce_get_num_bits(ps_bitstream); + + /* Encode mb_skip_flag */ + isvce_cabac_enc_mb_skip(mb_type == BSKIP, ps_cabac_ctxt, MB_SKIP_FLAG_B_SLICE); + + if(mb_type == BSKIP) + { + ps_curr_ctxt->u1_mb_type = CAB_B_SKIP; + + ps_ent_ctxt->pi4_mb_skip_run[0]++; + + isvce_mb_ctxt_update(ps_cabac_ctxt, ps_curr_ctxt, 0, 0, 0, BSKIP); + + bitstream_end_offset = isvce_get_num_bits(ps_bitstream); + ps_ent_ctxt->u4_header_bits[!u1_is_intra_mb] += + bitstream_end_offset - bitstream_start_offset; + + pu1_byte += sizeof(isvce_mb_hdr_bskip_t); + ps_ent_ctxt->pv_mb_header_data = pu1_byte; + + return IH264E_SUCCESS; + } + + if(ps_ent_ctxt->u1_spatial_layer_id && ps_svc_slice_header->i1_adaptive_base_mode_flag) + { + isvce_cabac_enc_base_mode_flag(ps_cabac_ctxt, u1_base_mode_flag); + } + + if(!u1_base_mode_flag) + { + if(u1_is_intra_mb) + { + if(mb_type == I16x16) + { + luma_intra_mode = ((mb_tpm >> 4) & 3) + 1 + (u1_cbp_c << 2) + (u1_cbp_l == 15) * 12; + } + else + { + luma_intra_mode = 0; + } + + { + isvce_mb_info_ctxt_t *ps_left_ctxt = ps_cabac_ctxt->ps_left_ctxt_mb_info; + isvce_mb_info_ctxt_t *ps_top_ctxt = ps_cabac_ctxt->ps_top_ctxt_mb_info; + + UWORD32 u4_ctx_inc = 0; + + if(ps_left_ctxt != ps_cabac_ctxt->ps_def_ctxt_mb_info) + { + u4_ctx_inc += + ((ps_left_ctxt->u1_mb_type & CAB_BD16x16_MASK) != CAB_BD16x16) ? 1 : 0; + } + + if(ps_top_ctxt != ps_cabac_ctxt->ps_def_ctxt_mb_info) + { + u4_ctx_inc += + ((ps_top_ctxt->u1_mb_type & CAB_BD16x16_MASK) != CAB_BD16x16) ? 1 : 0; + } + + /* Intra Prefix Only "111101" */ + u4_ctx_inc = (u4_ctx_inc | 0x05555430); + isvce_encode_decision_bins(0x2f, 6, u4_ctx_inc, 3, + ps_cabac_ctxt->au1_cabac_ctxt_table + MB_TYPE_B_SLICE, + ps_cabac_ctxt); + + isvce_cabac_enc_intra_mb_type(BSLICE, (UWORD8) luma_intra_mode, ps_cabac_ctxt, + MB_TYPE_B_SLICE); + } + + if(mb_type == I4x4) + { + isvce_mb_hdr_i4x4_t *ps_mb_hdr_i4x4 = + (isvce_mb_hdr_i4x4_t *) ps_ent_ctxt->pv_mb_header_data; + + isvce_cabac_enc_4x4mb_modes(ps_cabac_ctxt, ps_mb_hdr_i4x4->au1_sub_blk_modes); + } + + chroma_intra_mode = (mb_tpm >> 6); + + isvce_cabac_enc_chroma_predmode(chroma_intra_mode, ps_cabac_ctxt); + } + else if(mb_type == BDIRECT) + { + /* Encoding mb_type as B_Direct_16x16 */ + { + isvce_mb_info_ctxt_t *ps_left_ctxt = ps_cabac_ctxt->ps_left_ctxt_mb_info; + isvce_mb_info_ctxt_t *ps_top_ctxt = ps_cabac_ctxt->ps_top_ctxt_mb_info; + + UWORD32 u4_ctx_inc = 0; + + if(ps_left_ctxt != ps_cabac_ctxt->ps_def_ctxt_mb_info) + { + u4_ctx_inc += + ((ps_left_ctxt->u1_mb_type & CAB_BD16x16_MASK) != CAB_BD16x16) ? 1 : 0; + } + + if(ps_top_ctxt != ps_cabac_ctxt->ps_def_ctxt_mb_info) + { + u4_ctx_inc += + ((ps_top_ctxt->u1_mb_type & CAB_BD16x16_MASK) != CAB_BD16x16) ? 1 : 0; + } + + /* Encode the bin */ + isvce_cabac_encode_bin( + ps_cabac_ctxt, 0, + ps_cabac_ctxt->au1_cabac_ctxt_table + MB_TYPE_B_SLICE + u4_ctx_inc); + } + } + else + { + WORD32 i; + + isvce_mb_hdr_b16x16_t *ps_mb_hdr_b16x16 = + (isvce_mb_hdr_b16x16_t *) ps_ent_ctxt->pv_mb_header_data; + + WORD16 *pi2_mv_ptr = (WORD16 *) ps_mb_hdr_b16x16->ai2_mvd; + WORD32 i4_mb_part_pred_mode = (mb_tpm >> 4); + UWORD32 u4_mb_type = mb_type - B16x16 + B_L0_16x16 + i4_mb_part_pred_mode; + + /* Encoding mb_type as B16x16 */ + { + isvce_mb_info_ctxt_t *ps_left_ctxt = ps_cabac_ctxt->ps_left_ctxt_mb_info; + isvce_mb_info_ctxt_t *ps_top_ctxt = ps_cabac_ctxt->ps_top_ctxt_mb_info; + UWORD32 u4_ctx_inc = 0; + + UWORD32 u4_mb_type_bins = u4_b_mb_type[u4_mb_type]; + UWORD32 u4_bin_len = (u4_mb_type_bins >> 8) & 0x0F; + u4_mb_type_bins = u4_mb_type_bins & 0xFF; + + if(ps_left_ctxt != ps_cabac_ctxt->ps_def_ctxt_mb_info) + u4_ctx_inc += + ((ps_left_ctxt->u1_mb_type & CAB_BD16x16_MASK) != CAB_BD16x16) ? 1 : 0; + if(ps_top_ctxt != ps_cabac_ctxt->ps_def_ctxt_mb_info) + u4_ctx_inc += + ((ps_top_ctxt->u1_mb_type & CAB_BD16x16_MASK) != CAB_BD16x16) ? 1 : 0; + + u4_ctx_inc = u4_ctx_inc | ui_b_mb_type_ctx_inc[u4_mb_type]; + + isvce_encode_decision_bins(u4_mb_type_bins, u4_bin_len, u4_ctx_inc, u4_bin_len, + &(ps_cabac_ctxt->au1_cabac_ctxt_table[MB_TYPE_B_SLICE]), + ps_cabac_ctxt); + } + + for(i = 0; i < NUM_PRED_DIRS; i++) + { + PRED_MODE_T e_pred_mode = (PRED_MODE_T) i; + PRED_MODE_T e_cmpl_pred_mode = (e_pred_mode == L0) ? L1 : L0; + + if(((PRED_MODE_T) i4_mb_part_pred_mode) != e_pred_mode) + { + if(ps_svc_slice_header->i1_adaptive_motion_prediction_flag && + ps_ent_ctxt->u1_spatial_layer_id) + { + isvce_cabac_enc_motion_prediction_flag( + ps_cabac_ctxt, ps_mb_hdr_b16x16->au1_mvp_idx[e_cmpl_pred_mode], + e_cmpl_pred_mode == L0); + } + } + } + + isvce_cabac_enc_mvds_b16x16(ps_cabac_ctxt, pi2_mv_ptr, i4_mb_part_pred_mode); + } + } + + if(ps_svc_slice_header->i1_adaptive_residual_prediction_flag && + ps_ent_ctxt->u1_spatial_layer_id && (u1_base_mode_flag || !u1_is_intra_mb)) + { + isvce_cabac_enc_residual_prediction_flag(ps_cabac_ctxt, u1_base_mode_flag, + ps_mb_hdr->u1_residual_prediction_flag); + } + + if(u1_base_mode_flag || (mb_type != I16x16)) + { + isvce_cabac_enc_cbp(u1_cbp, ps_cabac_ctxt); + } + + if((u1_cbp > 0) || (mb_type == I16x16)) + { + mb_qp_delta = + ((WORD16) ps_mb_hdr->u1_mb_qp) - ((WORD16) ps_ent_ctxt->ps_mb_qp_ctxt->u1_cur_mb_qp); + + isvce_cabac_enc_mb_qp_delta(mb_qp_delta, ps_cabac_ctxt); + ps_ent_ctxt->ps_mb_qp_ctxt->u1_cur_mb_qp = ps_mb_hdr->u1_mb_qp; + + bitstream_end_offset = isvce_get_num_bits(ps_bitstream); + ps_ent_ctxt->u4_header_bits[!u1_is_intra_mb] += + bitstream_end_offset - bitstream_start_offset; + bitstream_start_offset = bitstream_end_offset; + + if(mb_type == I16x16) + { + ps_curr_ctxt->u1_mb_type = CAB_I16x16; + + isvce_cabac_encode_residue_luma_dc(ps_ent_ctxt); + + isvce_cabac_encode_residue(ps_ent_ctxt, u1_cbp, LUMA_AC_CTXCAT); + + pu1_byte += sizeof(isvce_mb_hdr_i16x16_t); + } + else if(mb_type == I4x4) + { + ps_curr_ctxt->u1_mb_type = CAB_I4x4; + + isvce_cabac_encode_residue(ps_ent_ctxt, u1_cbp, LUMA_4X4_CTXCAT); + + ps_cabac_ctxt->pu1_left_yuv_dc_csbp[0] &= 0x6; + ps_cabac_ctxt->ps_curr_ctxt_mb_info->u1_yuv_dc_csbp &= 0x6; + + pu1_byte += sizeof(isvce_mb_hdr_i4x4_t); + } + else if(mb_type == B16x16) + { + ps_curr_ctxt->u1_mb_type = CAB_NON_BD16x16; + + isvce_cabac_encode_residue(ps_ent_ctxt, u1_cbp, LUMA_4X4_CTXCAT); + + ps_cabac_ctxt->pu1_left_yuv_dc_csbp[0] &= 0x6; + ps_cabac_ctxt->ps_curr_ctxt_mb_info->u1_yuv_dc_csbp &= 0x6; + + pu1_byte += sizeof(isvce_mb_hdr_b16x16_t); + } + else if(mb_type == BDIRECT) + { + ps_curr_ctxt->u1_mb_type = CAB_BD16x16; + + isvce_cabac_encode_residue(ps_ent_ctxt, u1_cbp, LUMA_4X4_CTXCAT); + + ps_cabac_ctxt->pu1_left_yuv_dc_csbp[0] &= 0x6; + ps_cabac_ctxt->ps_curr_ctxt_mb_info->u1_yuv_dc_csbp &= 0x6; + + pu1_byte += sizeof(isvce_mb_hdr_b16x16_t); + } + else if(mb_type == BASE_MODE) + { + ps_curr_ctxt->u1_mb_type = CAB_NON_BD16x16; + + isvce_cabac_encode_residue(ps_ent_ctxt, u1_cbp, LUMA_4X4_CTXCAT); + + ps_cabac_ctxt->pu1_left_yuv_dc_csbp[0] &= 0x6; + ps_cabac_ctxt->ps_curr_ctxt_mb_info->u1_yuv_dc_csbp &= 0x6; + + pu1_byte += sizeof(isvce_mb_hdr_base_mode_t); + } + + bitstream_end_offset = isvce_get_num_bits(ps_bitstream); + ps_ent_ctxt->u4_residue_bits[!u1_is_intra_mb] += + bitstream_end_offset - bitstream_start_offset; + } + else + { + mb_qp_delta = 0; + + if(mb_type == I16x16) + { + ps_curr_ctxt->u1_mb_type = CAB_I16x16; + + pu1_byte += sizeof(isvce_mb_hdr_i16x16_t); + } + else if(mb_type == I4x4) + { + ps_curr_ctxt->u1_mb_type = CAB_I4x4; + + pu1_byte += sizeof(isvce_mb_hdr_i4x4_t); + } + else if(mb_type == B16x16) + { + ps_curr_ctxt->u1_mb_type = CAB_NON_BD16x16; + + pu1_byte += sizeof(isvce_mb_hdr_b16x16_t); + } + else if(mb_type == BDIRECT) + { + ps_curr_ctxt->u1_mb_type = CAB_BD16x16; + + pu1_byte += sizeof(isvce_mb_hdr_b16x16_t); + } + else if(mb_type == BDIRECT) + { + ps_curr_ctxt->u1_mb_type = CAB_NON_BD16x16; + + pu1_byte += sizeof(isvce_mb_hdr_base_mode_t); + } + + bitstream_end_offset = isvce_get_num_bits(ps_bitstream); + ps_ent_ctxt->u4_header_bits[!u1_is_intra_mb] += + bitstream_end_offset - bitstream_start_offset; + } + + isvce_mb_ctxt_update(ps_cabac_ctxt, ps_curr_ctxt, mb_qp_delta, u1_cbp, u1_base_mode_flag, + mb_type); + + ps_ent_ctxt->pv_mb_header_data = pu1_byte; + + return IH264E_SUCCESS; +} + +#if ENABLE_RE_ENC_AS_SKIP +IH264E_ERROR_T isvce_reencode_as_skip_frame_cabac(isvce_entropy_ctxt_t *ps_ent_ctxt) +{ + isvce_cabac_ctxt_t *ps_cabac_ctxt = ps_ent_ctxt->ps_cabac; + bitstrm_t *ps_bitstrm = ps_ent_ctxt->ps_bitstrm; + bitstrm_t *ps_bitstrm_after_slice_hdr = ps_ent_ctxt->ps_bitstrm_after_slice_hdr; + + isvce_mb_info_ctxt_t *ps_curr_ctxt; + + slice_header_t *ps_slice_header = + (ps_ent_ctxt->u1_spatial_layer_id == 0) + ? &ps_ent_ctxt->ps_slice_hdr_base[ps_ent_ctxt->i4_cur_slice_idx % SVC_MAX_SLICE_HDR_CNT] + : &ps_ent_ctxt + ->ps_svc_slice_hdr_base[ps_ent_ctxt->i4_cur_slice_idx % SVC_MAX_SLICE_HDR_CNT] + .s_slice_header; + + /* total mb cnt */ + UWORD32 i4_wd_mbs = ps_ent_ctxt->i4_wd_mbs; + UWORD32 i4_ht_mbs = ps_ent_ctxt->i4_ht_mbs; + UWORD8 i, j; + + isvce_init_cabac_ctxt(ps_ent_ctxt, ps_slice_header); + + ps_bitstrm->i4_bits_left_in_cw = ps_bitstrm_after_slice_hdr->i4_bits_left_in_cw; + ps_bitstrm->u4_cur_word = ps_bitstrm_after_slice_hdr->u4_cur_word; + ps_bitstrm->u4_strm_buf_offset = ps_bitstrm_after_slice_hdr->u4_strm_buf_offset; + ps_bitstrm->i4_zero_bytes_run = ps_bitstrm_after_slice_hdr->i4_zero_bytes_run; + + for(i = 0; i < i4_ht_mbs; i++) + { + for(j = 0; j < i4_wd_mbs; j++) + { + MBTYPES_T mb_type = PSKIP; + + ps_ent_ctxt->i4_mb_x = j; + ps_ent_ctxt->i4_mb_y = i; + + isvce_get_cabac_context(ps_ent_ctxt, mb_type); + ps_curr_ctxt = ps_cabac_ctxt->ps_curr_ctxt_mb_info; + + isvce_cabac_enc_mb_skip(mb_type == PSKIP, ps_cabac_ctxt, MB_SKIP_FLAG_P_SLICE); + + if(mb_type == PSKIP) + { + ps_curr_ctxt->u1_mb_type = CAB_P_SKIP; + isvce_mb_ctxt_update(ps_cabac_ctxt, ps_curr_ctxt, 0, 0, 0, PSKIP); + } + + if(j == i4_wd_mbs - 1 && i == i4_ht_mbs - 1) + { + isvce_cabac_encode_terminate(ps_cabac_ctxt, 1); + } + else + { + isvce_cabac_encode_terminate(ps_cabac_ctxt, 0); + } + } + } + return IH264E_SUCCESS; +} +#endif diff --git a/encoder/svc/isvce_cabac_init.c b/encoder/svc/isvce_cabac_init.c new file mode 100644 index 0000000..ea8695b --- /dev/null +++ b/encoder/svc/isvce_cabac_init.c @@ -0,0 +1,215 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** +******************************************************************************* +* @file +* isvce_cabac_init.c +* +* @brief +* Contains all initialization functions for cabac contexts +* +* @author +* Doney Alex +* +* @par List of Functions: +* +* +* @remarks +* None +* +******************************************************************************* +*/ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System include files */ +#include +#include +#include +#include +#include +#include + +/* User include files */ +#include "ih264_typedefs.h" +#include "iv2.h" +#include "ive2.h" +#include "isvc_defs.h" +#include "ih264_debug.h" +#include "ime_distortion_metrics.h" +#include "ime_defs.h" +#include "ime_structs.h" +#include "ih264_error.h" +#include "isvc_structs.h" +#include "isvc_trans_quant_itrans_iquant.h" +#include "isvc_inter_pred_filters.h" +#include "isvc_mem_fns.h" +#include "ih264_padding.h" +#include "ih264_intra_pred_filters.h" +#include "ih264_deblk_edge_filters.h" +#include "ih264_platform_macros.h" +#include "isvc_macros.h" +#include "ih264_buf_mgr.h" +#include "ih264e_error.h" +#include "ih264e_bitstream.h" +#include "isvc_common_tables.h" +#include "isvc_cabac_tables.h" +#include "ih264_list.h" +#include "isvce_defs.h" +#include "irc_cntrl_param.h" +#include "irc_frame_info_collector.h" +#include "isvce_rate_control.h" +#include "isvce_cabac_structs.h" +#include "isvce_structs.h" +#include "isvce_cabac.h" +#include "isvce_process.h" +#include "ithread.h" +#include "isvce_encode_header.h" +#include "isvce_globals.h" +#include "ih264e_config.h" +#include "ih264e_trace.h" +#include "ih264e_statistics.h" +#include "ih264_cavlc_tables.h" +#include "isvce_deblk.h" +#include "isvce_me.h" +#include "ih264e_debug.h" +#include "ih264e_master.h" +#include "isvce_utils.h" +#include "irc_mem_req_and_acq.h" +#include "irc_rate_control_api.h" +#include "ih264e_platform_macros.h" +#include "ime_statistics.h" + +/*****************************************************************************/ +/* Function definitions . */ +/*****************************************************************************/ + +/** + ******************************************************************************* + * + * @brief + * Initialize cabac encoding environment + * + * @param[in] ps_cab_enc_env + * Pointer to encoding_envirnoment_t structure + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ +static void isvce_init_cabac_enc_envirnoment(encoding_envirnoment_t *ps_cab_enc_env) +{ + ps_cab_enc_env->u4_code_int_low = 0; + ps_cab_enc_env->u4_code_int_range = 0x1fe; + ps_cab_enc_env->u4_out_standing_bytes = 0; + ps_cab_enc_env->u4_bits_gen = 0; +} + +/** + ******************************************************************************* + * + * @brief + * Initialize default context values and pointers (Called once at the beginning + *of encoding). + * + * @param[in] ps_ent_ctxt + * Pointer to entropy context structure + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ +void isvce_init_cabac_table(isvce_entropy_ctxt_t *ps_ent_ctxt) +{ + /* CABAC context */ + isvce_cabac_ctxt_t *ps_cabac_ctxt = ps_ent_ctxt->ps_cabac; + ps_cabac_ctxt->ps_mb_map_ctxt_inc = ps_cabac_ctxt->ps_mb_map_ctxt_inc_base + 1; + ps_cabac_ctxt->ps_lft_csbp = &ps_cabac_ctxt->s_lft_csbp; + ps_cabac_ctxt->ps_bitstrm = ps_ent_ctxt->ps_bitstrm; + + { + /* 0th entry of mb_map_ctxt_inc will be always be containing default values + */ + /* for CABAC context representing MB not available */ + isvce_mb_info_ctxt_t *ps_def_ctxt = ps_cabac_ctxt->ps_mb_map_ctxt_inc - 1; + + ps_def_ctxt->u1_mb_type = CAB_SKIP; + ps_def_ctxt->u1_cbp = 0x0f; + ps_def_ctxt->u1_intrapred_chroma_mode = 0; + ps_def_ctxt->u1_base_mode_flag = 0; + + memset(ps_def_ctxt->i1_ref_idx, 0, sizeof(ps_def_ctxt->i1_ref_idx)); + memset(ps_def_ctxt->u1_mv, 0, sizeof(ps_def_ctxt->u1_mv)); + ps_cabac_ctxt->ps_def_ctxt_mb_info = ps_def_ctxt; + } +} + +/** + ******************************************************************************* + * + * @brief + * Initialize cabac context: Initialize all contest with init values given in + *the spec. Called at the beginning of entropy coding of each slice for CABAC + *encoding. + * + * @param[in] ps_ent_ctxt + * Pointer to entropy context structure + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ +void isvce_init_cabac_ctxt(isvce_entropy_ctxt_t *ps_ent_ctxt, slice_header_t *ps_slice_hdr) +{ + isvce_cabac_ctxt_t *ps_cabac_ctxt = ps_ent_ctxt->ps_cabac; + + const UWORD8 u1_slice_type = ps_slice_hdr->u1_slice_type; + WORD8 i1_cabac_init_idc = 0; + bin_ctxt_model *au1_cabac_ctxt_table = ps_cabac_ctxt->au1_cabac_ctxt_table; + UWORD8 u1_qp_y = ps_slice_hdr->i1_slice_qp; + + isvce_init_cabac_enc_envirnoment(&ps_cabac_ctxt->s_cab_enc_env); + + ps_cabac_ctxt->i1_prevps_mb_qp_delta_ctxt = 0; + + if(ISLICE != u1_slice_type) + { + i1_cabac_init_idc = ps_slice_hdr->i1_cabac_init_idc; + } + else + { + i1_cabac_init_idc = 3; + } + + memcpy(au1_cabac_ctxt_table, gau1_isvc_cabac_ctxt_init_table[i1_cabac_init_idc][u1_qp_y], + NUM_SVC_CABAC_CTXTS * sizeof(bin_ctxt_model)); +} diff --git a/encoder/svc/isvce_cabac_structs.h b/encoder/svc/isvce_cabac_structs.h new file mode 100644 index 0000000..4c7d208 --- /dev/null +++ b/encoder/svc/isvce_cabac_structs.h @@ -0,0 +1,142 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** + ******************************************************************************* + * @file + * isvce_cabac_structs.h + * + * @brief + * This file contains cabac related structure definitions. + * + * @author + * Doney Alex + * + * @remarks + * none + * + ******************************************************************************* + */ + +#ifndef _ISVCE_CABAC_STRUCTS_H_ +#define _ISVCE_CABAC_STRUCTS_H_ + +#include "ih264_typedefs.h" +#include "isvc_cabac_tables.h" +#include "ih264e_bitstream.h" +#include "ih264e_cabac_structs.h" + +/** + ****************************************************************************** + * @brief MB info for cabac + ****************************************************************************** + */ +typedef struct isvce_mb_info_ctxt_t +{ + /* Neighbour availability Variables needed to get CtxtInc, for CABAC */ + UWORD8 u1_mb_type; /* !< macroblock type: I/P/B/SI/SP */ + + UWORD8 u1_cbp; /* !< Coded Block Pattern */ + UWORD8 u1_intrapred_chroma_mode; + + /*************************************************************************/ + /* Arrangnment of AC CSBP */ + /* bits: b7 b6 b5 b4 b3 b2 b1 b0 */ + /* CSBP: V1 V0 U1 U0 Y3 Y2 Y1 Y0 */ + /*************************************************************************/ + UWORD8 u1_yuv_ac_csbp; + /*************************************************************************/ + /* Arrangnment of DC CSBP */ + /* bits: b7 b6 b5 b4 b3 b2 b1 b0 */ + /* CSBP: x x x x x Vdc Udc Ydc */ + /*************************************************************************/ + UWORD8 u1_yuv_dc_csbp; + + WORD8 i1_ref_idx[4]; + UWORD8 u1_mv[4][4]; + + UWORD8 u1_base_mode_flag; +} isvce_mb_info_ctxt_t; + +/** + ****************************************************************************** + * @brief CABAC Context structure : Variables to handle Cabac + ****************************************************************************** + */ +typedef struct isvce_cabac_ctxt_t +{ + /* Base pointer to all the cabac contexts */ + bin_ctxt_model au1_cabac_ctxt_table[NUM_SVC_CABAC_CTXTS]; + + cab_csbp_t s_lft_csbp; + + /** + * pointer to Bitstream structure + */ + bitstrm_t *ps_bitstrm; + + /* Pointer to mb_info_ctxt_t map_base */ + isvce_mb_info_ctxt_t *ps_mb_map_ctxt_inc_base; + + /* Pointer to encoding_envirnoment_t */ + encoding_envirnoment_t s_cab_enc_env; + + /* These things need to be updated at each MbLevel */ + + /* Prev ps_mb_qp_delta_ctxt */ + WORD8 i1_prevps_mb_qp_delta_ctxt; + + /* Pointer to mb_info_ctxt_t map */ + isvce_mb_info_ctxt_t *ps_mb_map_ctxt_inc; + + /* Pointer to default mb_info_ctxt_t */ + isvce_mb_info_ctxt_t *ps_def_ctxt_mb_info; + + /* Pointer to current mb_info_ctxt_t */ + isvce_mb_info_ctxt_t *ps_curr_ctxt_mb_info; + + /* Pointer to left mb_info_ctxt_t */ + isvce_mb_info_ctxt_t *ps_left_ctxt_mb_info; + + /* Pointer to top mb_info_ctxt_t */ + isvce_mb_info_ctxt_t *ps_top_ctxt_mb_info; + + /* Poniter to left csbp structure */ + cab_csbp_t *ps_lft_csbp; + UWORD8 *pu1_left_y_ac_csbp; + UWORD8 *pu1_left_uv_ac_csbp; + UWORD8 *pu1_left_yuv_dc_csbp; + + /***************************************************************************/ + /* Ref_idx contexts are stored in the following way */ + /* Array Idx 0,1 for reference indices in Forward direction */ + /* Array Idx 2,3 for reference indices in backward direction */ + /***************************************************************************/ + /* Dimensions for u1_left_ref_ctxt_inc_arr is [2][4] for Mbaff:Top and Bot */ + WORD8 i1_left_ref_idx_ctx_inc_arr[2][4]; + WORD8 *pi1_left_ref_idx_ctxt_inc; + + /* Dimensions for u1_left_mv_ctxt_inc_arr is [2][4][4] for Mbaff case */ + UWORD8 u1_left_mv_ctxt_inc_arr[2][4][4]; + UWORD8 (*pu1_left_mv_ctxt_inc)[4]; + +} isvce_cabac_ctxt_t; + +#endif diff --git a/encoder/svc/isvce_cabac_utils.h b/encoder/svc/isvce_cabac_utils.h new file mode 100644 index 0000000..ffd05ed --- /dev/null +++ b/encoder/svc/isvce_cabac_utils.h @@ -0,0 +1,88 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** +******************************************************************************* +* @file +* isvce_cabac_utils.h +* +* @brief +* Contains function declarations for function declared in +* isvce_svc_cabac_utils.c +* +* @author +* ittiam +* +* @remarks +* None +* +******************************************************************************* +*/ + +#ifndef _ISVCE_CABAC_UTILS_H_ +#define _ISVCE_CABAC_UTILS_H_ + +#include "ih264_typedefs.h" +#include "isvc_macros.h" +#include "isvc_defs.h" +#include "isvc_cabac_tables.h" +#include "isvce_cabac_structs.h" +#include "isvce_cabac.h" + +static FORCEINLINE void isvce_cabac_enc_base_mode_flag(isvce_cabac_ctxt_t *ps_cabac_ctxt, + UWORD8 u1_base_mode_flag) +{ + UWORD8 u1_ctx_inc; + UWORD8 u1_a, u1_b; + + const UWORD32 u4_ctxidx_offset = BASE_MODE_FLAG; + + u1_a = !ps_cabac_ctxt->ps_left_ctxt_mb_info->u1_base_mode_flag; + u1_b = !ps_cabac_ctxt->ps_top_ctxt_mb_info->u1_base_mode_flag; + + u1_ctx_inc = u1_a + u1_b; + + isvce_cabac_encode_bin(ps_cabac_ctxt, u1_base_mode_flag, + ps_cabac_ctxt->au1_cabac_ctxt_table + u4_ctxidx_offset + u1_ctx_inc); +} + +static FORCEINLINE void isvce_cabac_enc_residual_prediction_flag(isvce_cabac_ctxt_t *ps_cabac_ctxt, + UWORD8 u1_base_mode_flag, + UWORD8 u1_residual_prediction_flag) +{ + const UWORD32 u4_ctxidx_offset = RESIDUAL_PREDICTION_FLAG; + UWORD8 u1_ctx_inc = !u1_base_mode_flag; + + isvce_cabac_encode_bin(ps_cabac_ctxt, u1_residual_prediction_flag, + ps_cabac_ctxt->au1_cabac_ctxt_table + u4_ctxidx_offset + u1_ctx_inc); +} + +static FORCEINLINE void isvce_cabac_enc_motion_prediction_flag(isvce_cabac_ctxt_t *ps_cabac_ctxt, + UWORD8 u1_motion_prediction_flag, + UWORD8 u1_is_l0_mvp) +{ + const UWORD32 u4_ctxidx_offset = + u1_is_l0_mvp ? MOTION_PREDICTION_FLAG_L0 : MOTION_PREDICTION_FLAG_L1; + + isvce_cabac_encode_bin(ps_cabac_ctxt, u1_motion_prediction_flag, + ps_cabac_ctxt->au1_cabac_ctxt_table + u4_ctxidx_offset); +} + +#endif diff --git a/encoder/svc/isvce_cavlc.c b/encoder/svc/isvce_cavlc.c new file mode 100644 index 0000000..71122ed --- /dev/null +++ b/encoder/svc/isvce_cavlc.c @@ -0,0 +1,2021 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** +******************************************************************************* +* @file +* isvce_cavlc.c +* +* @brief +* Contains all the routines to code syntax elements and residuals when entropy +* coding chosen is CAVLC +* +* @author +* ittiam +* +* @par List of Functions: +* - isvce_compute_zeroruns_and_trailingones() +* - isvce_write_coeff4x4_cavlc() +* - isvce_write_coeff8x8_cavlc() +* - isvce_encode_residue() +* - isvce_write_islice_mb_cavlc() +* - isvce_write_pslice_mb_cavlc() +* +* @remarks +* None +* +******************************************************************************* +*/ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System include files */ +#include +#include +#include + +/* User include files */ +#include "ih264e_config.h" +#include "ih264_typedefs.h" +#include "iv2.h" +#include "ive2.h" +#include "ih264_debug.h" +#include "isvc_macros.h" +#include "isvc_defs.h" +#include "isvce_defs.h" +#include "ih264e_error.h" +#include "ih264e_bitstream.h" +#include "ime_distortion_metrics.h" +#include "ime_defs.h" +#include "ime_structs.h" +#include "ih264_error.h" +#include "isvc_structs.h" +#include "isvc_trans_quant_itrans_iquant.h" +#include "isvc_inter_pred_filters.h" +#include "isvc_mem_fns.h" +#include "ih264_padding.h" +#include "ih264_intra_pred_filters.h" +#include "ih264_deblk_edge_filters.h" +#include "isvc_cabac_tables.h" +#include "irc_cntrl_param.h" +#include "irc_frame_info_collector.h" +#include "isvce_rate_control.h" +#include "isvce_cabac_structs.h" +#include "isvce_structs.h" +#include "isvce_encode_header.h" +#include "ih264_cavlc_tables.h" +#include "isvce_cavlc.h" +#include "ih264e_statistics.h" +#include "ih264e_trace.h" +#include "isvce_encode_header.h" +#include "isvce_utils.h" + +/*****************************************************************************/ +/* Function Definitions */ +/*****************************************************************************/ + +/** +******************************************************************************* +* +* @brief +* This function computes run of zero, number of trailing ones and sign of +* trailing ones basing on the significant coeff map, residual block and +* total nnz. +* +* @param[in] pi2_res_block +* Pointer to residual block containing levels in scan order +* +* @param[in] u4_total_coeff +* Total non-zero coefficients in that sub block +* +* @param[in] pu1_zero_run +* Pointer to array to store run of zeros +* +* @param[in] u4_sig_coeff_map +* significant coefficient map +* +* @returns u4_totzero_sign_trailone +* Bits 0-8 contains number of trailing ones. +* Bits 8-16 contains bitwise sign information of trailing one +* Bits 16-24 contains total number of zeros. +* +* @remarks +* None +* +******************************************************************************* +*/ +static UWORD32 isvce_compute_zeroruns_and_trailingones(WORD16 *pi2_res_block, + UWORD32 u4_total_coeff, UWORD8 *pu1_zero_run, + UWORD32 u4_sig_coeff_map) +{ + UWORD32 i = 0; + UWORD32 u4_nnz_coeff = 0; + WORD32 i4_run = -1; + UWORD32 u4_sign = 0; + UWORD32 u4_tot_zero = 0; + UWORD32 u4_trailing1 = 0; + WORD32 i4_val; + UWORD32 u4_totzero_sign_trailone; + UWORD32 *pu4_zero_run; + + pu4_zero_run = (void *) pu1_zero_run; + pu4_zero_run[0] = 0; + pu4_zero_run[1] = 0; + pu4_zero_run[2] = 0; + pu4_zero_run[3] = 0; + + /* Compute Runs of zeros for all nnz coefficients except the last 3 */ + if(u4_total_coeff > 3) + { + for(i = 0; u4_nnz_coeff < (u4_total_coeff - 3); i++) + { + i4_run++; + + i4_val = (u4_sig_coeff_map & 0x1); + u4_sig_coeff_map >>= 1; + + if(i4_val != 0) + { + pu1_zero_run[u4_nnz_coeff++] = i4_run; + i4_run = -1; + } + } + } + + /* Compute T1's, Signof(T1's) and Runs of zeros for the last 3 */ + while(u4_nnz_coeff != u4_total_coeff) + { + i4_run++; + + i4_val = (u4_sig_coeff_map & 0x1); + u4_sig_coeff_map >>= 1; + + if(i4_val != 0) + { + if(pi2_res_block[u4_nnz_coeff] == 1) + { + pu1_zero_run[u4_nnz_coeff] = i4_run; + u4_trailing1++; + } + else + { + if(pi2_res_block[u4_nnz_coeff] == -1) + { + pu1_zero_run[u4_nnz_coeff] = i4_run; + u4_sign |= 1 << u4_trailing1; + u4_trailing1++; + } + else + { + pu1_zero_run[u4_nnz_coeff] = i4_run; + u4_trailing1 = 0; + u4_sign = 0; + } + } + i4_run = -1; + u4_nnz_coeff++; + } + i++; + } + + u4_tot_zero = i - u4_total_coeff; + u4_totzero_sign_trailone = (u4_tot_zero << 16) | (u4_sign << 8) | u4_trailing1; + + return (u4_totzero_sign_trailone); +} + +/** +******************************************************************************* +* +* @brief +* This function generates CAVLC coded bit stream for the given residual block +* +* @param[in] pi2_res_block +* Pointer to residual block containing levels in scan order +* +* @param[in] u4_total_coeff +* Total non-zero coefficients in the sub block +* +* @param[in] u4_block_type +* block type +* +* @param[in] pu1_zero_run +* Pointer to array to store run of zeros +* +* @param[in] u4_nc +* average of non zero coeff from top and left blocks (when available) +* +* @param[in, out] ps_bit_stream +* structure pointing to a buffer holding output bit stream +* +* @param[in] u4_sig_coeff_map +* significant coefficient map of the residual block +* +* @returns +* error code +* +* @remarks +* If the block type is CAVLC_CHROMA_4x4_DC, then u4_nc is non-significant +* +******************************************************************************* +*/ +static IH264E_ERROR_T isvce_write_coeff4x4_cavlc(WORD16 *pi2_res_block, UWORD32 u4_total_coeff, + ENTROPY_BLK_TYPE u4_block_type, + UWORD8 *pu1_zero_run, UWORD32 u4_nc, + bitstrm_t *ps_bit_stream, UWORD32 u4_sig_coeff_map) +{ + IH264E_ERROR_T error_status = IH264E_SUCCESS; + UWORD32 u4_totzero_sign_trailone = 0; + UWORD32 u4_trailing_ones = 0; + UWORD32 u4_tot_zeros = 0; + UWORD32 u4_remaining_coeff = 0; + UWORD32 u4_sign1 = 0; + UWORD32 u4_max_num_coeff = 0; + const UWORD32 au4_max_num_nnz_coeff[] = {16, 15, 16, 4, 15}; + + /* validate inputs */ + ASSERT(u4_block_type <= CAVLC_CHROMA_4x4_AC); + + u4_max_num_coeff = au4_max_num_nnz_coeff[u4_block_type]; + + ASSERT(u4_total_coeff <= u4_max_num_coeff); + + if(!u4_total_coeff) + { + UWORD32 u4_codeword = 15; + UWORD32 u4_codesize = 1; + if(u4_block_type == CAVLC_CHROMA_4x4_DC) + { + u4_codeword = 1; + u4_codesize = 2; + DEBUG("\n[%d numcoeff, %d numtrailing ones]", u4_total_coeff, 0); + ENTROPY_TRACE("\tnumber of non zero coeffs ", u4_total_coeff); + ENTROPY_TRACE("\tnumber of trailing ones ", 0); + } + else + { + UWORD32 u4_vlcnum = u4_nc >> 1; + + /* write coeff_token */ + if(u4_vlcnum > 3) + { + /* Num-FLC */ + u4_codeword = 3; + u4_codesize = 6; + } + else + { + /* Num-VLC 0, 1, 2 */ + if(u4_vlcnum > 1) + { + u4_vlcnum = 2; + } + u4_codesize <<= u4_vlcnum; + u4_codeword >>= (4 - u4_codesize); + } + + DEBUG("\n[%d numcoeff, %d numtrailing ones, %d nnz]", u4_total_coeff, 0, u4_nc); + ENTROPY_TRACE("\tnumber of non zero coeffs ", u4_total_coeff); + ENTROPY_TRACE("\tnC ", u4_nc); + } + + DEBUG("\nCOEFF TOKEN 0: %d u4_codeword, %d u4_codesize", u4_codeword, u4_codesize); + ENTROPY_TRACE("\tcodeword ", u4_codeword); + ENTROPY_TRACE("\tcodesize ", u4_codesize); + + error_status = ih264e_put_bits(ps_bit_stream, u4_codeword, u4_codesize); + + return error_status; + } + else + { + /* Compute zero run, number of trailing ones and their sign. */ + u4_totzero_sign_trailone = isvce_compute_zeroruns_and_trailingones( + pi2_res_block, u4_total_coeff, pu1_zero_run, u4_sig_coeff_map); + u4_trailing_ones = u4_totzero_sign_trailone & 0xFF; + u4_sign1 = (u4_totzero_sign_trailone >> 8) & 0xFF; + u4_tot_zeros = (u4_totzero_sign_trailone >> 16) & 0xFF; + u4_remaining_coeff = u4_total_coeff - u4_trailing_ones; + + /* write coeff_token */ + { + UWORD32 u4_codeword; + UWORD32 u4_codesize; + if(u4_block_type == CAVLC_CHROMA_4x4_DC) + { + u4_codeword = + gu1_code_coeff_token_table_chroma[u4_trailing_ones][u4_total_coeff - 1]; + u4_codesize = + gu1_size_coeff_token_table_chroma[u4_trailing_ones][u4_total_coeff - 1]; + + DEBUG("\n[%d numcoeff, %d numtrailing ones]", u4_total_coeff, u4_trailing_ones); + ENTROPY_TRACE("\tnumber of non zero coeffs ", u4_total_coeff); + ENTROPY_TRACE("\tnumber of trailing ones ", u4_trailing_ones); + } + else + { + UWORD32 u4_vlcnum = u4_nc >> 1; + + if(u4_vlcnum > 3) + { + /* Num-FLC */ + u4_codeword = ((u4_total_coeff - 1) << 2) + u4_trailing_ones; + u4_codesize = 6; + } + else + { + /* Num-VLC 0, 1, 2 */ + if(u4_vlcnum > 1) + { + u4_vlcnum = 2; + } + u4_codeword = + gu1_code_coeff_token_table[u4_vlcnum][u4_trailing_ones][u4_total_coeff - 1]; + u4_codesize = + gu1_size_coeff_token_table[u4_vlcnum][u4_trailing_ones][u4_total_coeff - 1]; + } + + DEBUG("\n[%d numcoeff, %d numtrailing ones, %d nnz]", u4_total_coeff, + u4_trailing_ones, u4_nc); + ENTROPY_TRACE("\tnumber of non zero coeffs ", u4_total_coeff); + ENTROPY_TRACE("\tnumber of trailing ones ", u4_trailing_ones); + ENTROPY_TRACE("\tnC ", u4_nc); + } + + DEBUG("\nCOEFF TOKEN 0: %d u4_codeword, %d u4_codesize", u4_codeword, u4_codesize); + ENTROPY_TRACE("\tcodeword ", u4_codeword); + ENTROPY_TRACE("\tcodesize ", u4_codesize); + + error_status = ih264e_put_bits(ps_bit_stream, u4_codeword, u4_codesize); + } + + /* write sign of trailing ones */ + if(u4_trailing_ones) + { + DEBUG("\nT1's: %d u4_codeword, %d u4_codesize", u4_sign1, u4_trailing_ones); + error_status = ih264e_put_bits(ps_bit_stream, u4_sign1, u4_trailing_ones); + ENTROPY_TRACE("\tnumber of trailing ones ", u4_trailing_ones); + ENTROPY_TRACE("\tsign of trailing ones ", u4_sign1); + } + + /* write level codes */ + if(u4_remaining_coeff) + { + WORD32 i4_level = pi2_res_block[u4_remaining_coeff - 1]; + UWORD32 u4_escape; + UWORD32 u4_suffix_length = 0; // Level-VLC[N] + UWORD32 u4_abs_level, u4_abs_level_actual = 0; + WORD32 i4_sign; + const UWORD32 u4_rndfactor[] = {0, 0, 1, 3, 7, 15, 31}; + + DEBUG("\n \t%d coeff,", i4_level); + ENTROPY_TRACE("\tcoeff ", i4_level); + + if(u4_trailing_ones < 3) + { + /* If there are less than 3 T1s, then the first non-T1 level is + * incremented if negative (decremented if positive)*/ + if(i4_level < 0) + { + i4_level += 1; + } + else + { + i4_level -= 1; + } + + u4_abs_level_actual = 1; + + /* Initialize VLC table (Suffix Length) to encode the level */ + if(u4_total_coeff > 10) + { + u4_suffix_length = 1; + } + } + + i4_sign = (i4_level >> (sizeof(WORD32) * CHAR_BIT - 1)); + u4_abs_level = ((i4_level + i4_sign) ^ i4_sign); + + u4_abs_level_actual += u4_abs_level; + + u4_escape = (u4_abs_level + u4_rndfactor[u4_suffix_length]) >> u4_suffix_length; + + while(1) + { + UWORD32 u4_codesize; + UWORD32 u4_codeword; + UWORD32 u4_codeval; + + u4_remaining_coeff--; + + GATHER_CAVLC_STATS1(); + + { + u4_codeval = u4_abs_level << 1; + u4_codeval = u4_codeval - 2 - i4_sign; + + if((!u4_suffix_length) && (u4_escape > 7) && (u4_abs_level < 16)) + { + u4_codeword = (1 << 4) + (u4_codeval - 14); + u4_codesize = 19; + } + else if(u4_escape > 7) + { + u4_codeword = (1 << 12) + (u4_codeval - (15 << u4_suffix_length)); + u4_codesize = 28; + if(!u4_suffix_length) + { + u4_codeword -= 15; + } + } + else + { + u4_codeword = + (1 << u4_suffix_length) + (u4_codeval & ((1 << u4_suffix_length) - 1)); + u4_codesize = (u4_codeval >> u4_suffix_length) + 1 + u4_suffix_length; + } + } + + /*put the level code in bitstream*/ + DEBUG("\nLEVEL: %d u4_codeword, %d u4_codesize", u4_codeword, u4_codesize); + ENTROPY_TRACE("\tcodeword ", u4_codeword); + ENTROPY_TRACE("\tcodesize ", u4_codesize); + error_status = ih264e_put_bits(ps_bit_stream, u4_codeword, u4_codesize); + + if(u4_remaining_coeff == 0) break; + + /*update suffix length for next level*/ + if(u4_suffix_length == 0) + { + u4_suffix_length++; + } + if(u4_suffix_length < 6) + { + if(u4_abs_level_actual > gu1_threshold_vlc_level[u4_suffix_length]) + { + u4_suffix_length++; + } + } + + /* next level */ + i4_level = pi2_res_block[u4_remaining_coeff - 1]; + + DEBUG("\n \t%d coeff,", i4_level); + ENTROPY_TRACE("\tcoeff ", i4_level); + + i4_sign = (i4_level >> (sizeof(WORD32) * CHAR_BIT - 1)); + u4_abs_level = ((i4_level + i4_sign) ^ i4_sign); + + u4_abs_level_actual = u4_abs_level; + + u4_escape = (u4_abs_level + u4_rndfactor[u4_suffix_length]) >> u4_suffix_length; + } + } + + DEBUG("\n \t %d totalzeros", u4_tot_zeros); + ENTROPY_TRACE("\ttotal zeros ", u4_tot_zeros); + + /* Write Total Zeros */ + if(u4_total_coeff < u4_max_num_coeff) + { + WORD32 index; + UWORD32 u4_codeword; + UWORD32 u4_codesize; + + if(u4_block_type == CAVLC_CHROMA_4x4_DC) + { + UWORD8 gu1_index_zero_table_chroma[] = {0, 4, 7}; + index = gu1_index_zero_table_chroma[u4_total_coeff - 1] + u4_tot_zeros; + u4_codesize = gu1_size_zero_table_chroma[index]; + u4_codeword = gu1_code_zero_table_chroma[index]; + } + else + { + index = gu1_index_zero_table[u4_total_coeff - 1] + u4_tot_zeros; + u4_codesize = gu1_size_zero_table[index]; + u4_codeword = gu1_code_zero_table[index]; + } + + DEBUG("\nTOTAL ZEROS: %d u4_codeword, %d u4_codesize", u4_codeword, u4_codesize); + ENTROPY_TRACE("\tcodeword ", u4_codeword); + ENTROPY_TRACE("\tcodesize ", u4_codesize); + error_status = ih264e_put_bits(ps_bit_stream, u4_codeword, u4_codesize); + } + + /* Write Run Before */ + if(u4_tot_zeros) + { + UWORD32 u4_max_num_coef = u4_total_coeff - 1; + UWORD32 u4_codeword; + UWORD32 u4_codesize; + UWORD32 u4_zeros_left = u4_tot_zeros; + + while(u4_max_num_coef) + { + UWORD32 u4_run_before = pu1_zero_run[u4_max_num_coef]; + UWORD32 u4_index; + + if(u4_zeros_left > MAX_ZERO_LEFT) + { + u4_index = gu1_index_run_table[MAX_ZERO_LEFT]; + } + else + { + u4_index = gu1_index_run_table[u4_zeros_left - 1]; + } + + u4_codesize = gu1_size_run_table[u4_index + u4_run_before]; + u4_codeword = gu1_code_run_table[u4_index + u4_run_before]; + + DEBUG("\nRUN BEFORE ZEROS: %d u4_codeword, %d u4_codesize", u4_codeword, + u4_codesize); + ENTROPY_TRACE("\tcodeword ", u4_codeword); + ENTROPY_TRACE("\tcodesize ", u4_codesize); + error_status = ih264e_put_bits(ps_bit_stream, u4_codeword, u4_codesize); + + u4_zeros_left -= u4_run_before; + if(!u4_zeros_left) + { + break; + } + u4_max_num_coef--; + } + } + } + + return error_status; +} + +/** +******************************************************************************* +* +* @brief +* This function generates CAVLC coded bit stream for the given subblock +* +* @param[in] ps_ent_ctxt +* Pointer to entropy context +* +* @param[in] pi2_res_block +* Pointers to residual blocks of all the partitions for the current subblk +* (containing levels in scan order) +* +* @param[in] pu1_nnz +* Total non-zero coefficients of all the partitions for the current subblk +* +* @param[in] pu2_sig_coeff_map +* Significant coefficient map of all the partitions for the current subblk +* +* @param[in] u4_block_type +* entropy coding block type +* +* @param[in] u4_ngbr_avbl +* top and left availability of all the partitions for the current subblk +* (packed) +* +* @param[in] pu1_top_nnz +* pointer to the buffer containing nnz of all the subblks to the top +* +* @param[in] pu1_left_nnz +* pointer to the buffer containing nnz of all the subblks to the left +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +static IH264E_ERROR_T isvce_write_coeff8x8_cavlc(isvce_entropy_ctxt_t *ps_ent_ctxt, + WORD16 **pi2_res_block, UWORD8 *pu1_nnz, + UWORD16 *pu2_sig_coeff_map, + ENTROPY_BLK_TYPE u4_block_type, + UWORD32 u4_ngbr_avlb, UWORD8 *pu1_top_nnz, + UWORD8 *pu1_left_nnz) +{ + IH264E_ERROR_T error_status = IH264E_SUCCESS; + bitstrm_t *ps_bitstream = ps_ent_ctxt->ps_bitstrm; + UWORD8 *pu1_zero_run = ps_ent_ctxt->au1_zero_run, *pu1_ngbr_avbl; + UWORD32 u4_nC; + UWORD8 u1_mb_a, u1_mb_b; + + pu1_ngbr_avbl = (void *) (&u4_ngbr_avlb); + + /* encode ac block index 4x4 = 0*/ + u1_mb_a = pu1_ngbr_avbl[0] & 0x0F; + u1_mb_b = pu1_ngbr_avbl[0] & 0xF0; + u4_nC = 0; + if(u1_mb_a) u4_nC += pu1_left_nnz[0]; + if(u1_mb_b) u4_nC += pu1_top_nnz[0]; + if(u1_mb_a && u1_mb_b) u4_nC = (u4_nC + 1) >> 1; + pu1_left_nnz[0] = pu1_top_nnz[0] = pu1_nnz[0]; + error_status = + isvce_write_coeff4x4_cavlc(pi2_res_block[0], pu1_nnz[0], u4_block_type, pu1_zero_run, u4_nC, + ps_bitstream, pu2_sig_coeff_map[0]); + + /* encode ac block index 4x4 = 1*/ + u1_mb_a = pu1_ngbr_avbl[1] & 0x0F; + u1_mb_b = pu1_ngbr_avbl[1] & 0xF0; + u4_nC = 0; + if(u1_mb_a) u4_nC += pu1_left_nnz[0]; + if(u1_mb_b) u4_nC += pu1_top_nnz[1]; + if(u1_mb_a && u1_mb_b) u4_nC = (u4_nC + 1) >> 1; + pu1_left_nnz[0] = pu1_top_nnz[1] = pu1_nnz[1]; + error_status = + isvce_write_coeff4x4_cavlc(pi2_res_block[1], pu1_nnz[1], u4_block_type, pu1_zero_run, u4_nC, + ps_bitstream, pu2_sig_coeff_map[1]); + + /* encode ac block index 4x4 = 2*/ + u1_mb_a = pu1_ngbr_avbl[2] & 0x0F; + u1_mb_b = pu1_ngbr_avbl[2] & 0xF0; + u4_nC = 0; + if(u1_mb_a) u4_nC += pu1_left_nnz[1]; + if(u1_mb_b) u4_nC += pu1_top_nnz[0]; + if(u1_mb_a && u1_mb_b) u4_nC = (u4_nC + 1) >> 1; + pu1_left_nnz[1] = pu1_top_nnz[0] = pu1_nnz[2]; + error_status = + isvce_write_coeff4x4_cavlc(pi2_res_block[2], pu1_nnz[2], u4_block_type, pu1_zero_run, u4_nC, + ps_bitstream, pu2_sig_coeff_map[2]); + + /* encode ac block index 4x4 = 0*/ + u1_mb_a = pu1_ngbr_avbl[3] & 0x0F; + u1_mb_b = pu1_ngbr_avbl[3] & 0xF0; + u4_nC = 0; + if(u1_mb_a) u4_nC += pu1_left_nnz[1]; + if(u1_mb_b) u4_nC += pu1_top_nnz[1]; + if(u1_mb_a && u1_mb_b) u4_nC = (u4_nC + 1) >> 1; + pu1_left_nnz[1] = pu1_top_nnz[1] = pu1_nnz[3]; + error_status = + isvce_write_coeff4x4_cavlc(pi2_res_block[3], pu1_nnz[3], u4_block_type, pu1_zero_run, u4_nC, + ps_bitstream, pu2_sig_coeff_map[3]); + + return error_status; +} + +/** +******************************************************************************* +* +* @brief +* This function encodes luma and chroma residues of a macro block when +* the entropy coding mode chosen is cavlc. +* +* @param[in] ps_ent_ctxt +* Pointer to entropy context +* +* @param[in] u4_mb_type +* current mb type +* +* @param[in] u4_cbp +* coded block pattern for the current mb +* +* @returns error code +* +* @remarks none +* +******************************************************************************* +*/ +static IH264E_ERROR_T isvce_encode_residue(isvce_entropy_ctxt_t *ps_ent_ctxt, UWORD32 u4_mb_type, + UWORD32 u4_cbp) +{ + /* error status */ + IH264E_ERROR_T error_status = IH264E_SUCCESS; + + /* packed residue */ + void *pv_mb_coeff_data = ps_ent_ctxt->pv_mb_coeff_data; + + /* bit stream buffer */ + bitstrm_t *ps_bitstream = ps_ent_ctxt->ps_bitstrm; + + /* zero run */ + UWORD8 *pu1_zero_run = ps_ent_ctxt->au1_zero_run; + + /* temp var */ + UWORD32 u4_nC, u4_ngbr_avlb; + UWORD8 au1_nnz[4], *pu1_ngbr_avlb, *pu1_top_nnz, *pu1_left_nnz; + UWORD16 au2_sig_coeff_map[4] = {0}; + WORD16 *pi2_res_block[4] = {NULL}; + UWORD8 *pu1_slice_idx = ps_ent_ctxt->pu1_slice_idx; + tu_sblk_coeff_data_t *ps_mb_coeff_data; + ENTROPY_BLK_TYPE e_entropy_blk_type = CAVLC_LUMA_4x4; + + /* ngbr availability */ + UWORD8 u1_mb_a, u1_mb_b; + + /* cbp */ + UWORD32 u4_cbp_luma = u4_cbp & 0xF, u4_cbp_chroma = u4_cbp >> 4; + + /* mb indices */ + WORD32 i4_mb_x, i4_mb_y; + + /* derive neighbor availability */ + i4_mb_x = ps_ent_ctxt->i4_mb_x; + i4_mb_y = ps_ent_ctxt->i4_mb_y; + pu1_slice_idx += (i4_mb_y * ps_ent_ctxt->i4_wd_mbs); + /* left macroblock availability */ + u1_mb_a = (i4_mb_x == 0 || (pu1_slice_idx[i4_mb_x - 1] != pu1_slice_idx[i4_mb_x])) ? 0 : 1; + /* top macroblock availability */ + u1_mb_b = (i4_mb_y == 0 || + (pu1_slice_idx[i4_mb_x - ps_ent_ctxt->i4_wd_mbs] != pu1_slice_idx[i4_mb_x])) + ? 0 + : 1; + + pu1_ngbr_avlb = (void *) (&u4_ngbr_avlb); + pu1_top_nnz = ps_ent_ctxt->pu1_top_nnz_luma[ps_ent_ctxt->i4_mb_x]; + pu1_left_nnz = (UWORD8 *) &ps_ent_ctxt->u4_left_nnz_luma; + + /* encode luma residue */ + + /* mb type intra 16x16 */ + if(u4_mb_type == I16x16) + { + /* parse packed coeff data structure for residual data */ + PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[0], + au2_sig_coeff_map[0], pi2_res_block[0]); + /* estimate nnz for the current mb */ + u4_nC = 0; + if(u1_mb_a) u4_nC += pu1_left_nnz[0]; + if(u1_mb_b) u4_nC += pu1_top_nnz[0]; + if(u1_mb_a && u1_mb_b) u4_nC = (u4_nC + 1) >> 1; + + /* encode dc block */ + ENTROPY_TRACE("Luma DC blk idx %d", 0); + error_status = + isvce_write_coeff4x4_cavlc(pi2_res_block[0], au1_nnz[0], CAVLC_LUMA_4x4_DC, + pu1_zero_run, u4_nC, ps_bitstream, au2_sig_coeff_map[0]); + + e_entropy_blk_type = CAVLC_LUMA_4x4_AC; + } + + if(u4_cbp_luma & 1) + { + /* encode ac block index 8x8 = 0*/ + /* parse packed coeff data structure for residual data */ + PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[0], + au2_sig_coeff_map[0], pi2_res_block[0]); + PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[1], + au2_sig_coeff_map[1], pi2_res_block[1]); + PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[2], + au2_sig_coeff_map[2], pi2_res_block[2]); + PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[3], + au2_sig_coeff_map[3], pi2_res_block[3]); + /* derive sub block neighbor availability */ + + pu1_ngbr_avlb[0] = (u1_mb_b << 4) | (u1_mb_a); + pu1_ngbr_avlb[1] = (u1_mb_b << 4) | 1; + pu1_ngbr_avlb[2] = (1 << 4) | (u1_mb_a); + pu1_ngbr_avlb[3] = 0x11; + /* encode sub blk */ + ENTROPY_TRACE("Luma blk idx %d", 0); + error_status = + isvce_write_coeff8x8_cavlc(ps_ent_ctxt, pi2_res_block, au1_nnz, au2_sig_coeff_map, + e_entropy_blk_type, u4_ngbr_avlb, pu1_top_nnz, pu1_left_nnz); + } + else + { + pu1_top_nnz[0] = pu1_top_nnz[1] = 0; + pu1_left_nnz[0] = pu1_left_nnz[1] = 0; + } + + if(u4_cbp_luma & 2) + { + /* encode ac block index 8x8 = 1*/ + /* parse packed coeff data structure for residual data */ + PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[0], + au2_sig_coeff_map[0], pi2_res_block[0]); + PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[1], + au2_sig_coeff_map[1], pi2_res_block[1]); + PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[2], + au2_sig_coeff_map[2], pi2_res_block[2]); + PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[3], + au2_sig_coeff_map[3], pi2_res_block[3]); + + /* derive sub block neighbor availability */ + pu1_ngbr_avlb[1] = pu1_ngbr_avlb[0] = (u1_mb_b << 4) | 1; + pu1_ngbr_avlb[3] = pu1_ngbr_avlb[2] = 0x11; + /* encode sub blk */ + ENTROPY_TRACE("Luma blk idx %d", 1); + error_status = isvce_write_coeff8x8_cavlc(ps_ent_ctxt, pi2_res_block, au1_nnz, + au2_sig_coeff_map, e_entropy_blk_type, + u4_ngbr_avlb, pu1_top_nnz + 2, pu1_left_nnz); + } + else + { + (pu1_top_nnz + 2)[0] = (pu1_top_nnz + 2)[1] = 0; + pu1_left_nnz[0] = pu1_left_nnz[1] = 0; + } + + if(u4_cbp_luma & 0x4) + { + /* encode ac block index 8x8 = 2*/ + /* parse packed coeff data structure for residual data */ + PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[0], + au2_sig_coeff_map[0], pi2_res_block[0]); + PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[1], + au2_sig_coeff_map[1], pi2_res_block[1]); + PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[2], + au2_sig_coeff_map[2], pi2_res_block[2]); + PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[3], + au2_sig_coeff_map[3], pi2_res_block[3]); + + /* derive sub block neighbor availability */ + pu1_ngbr_avlb[2] = pu1_ngbr_avlb[0] = (1 << 4) | u1_mb_a; + pu1_ngbr_avlb[1] = pu1_ngbr_avlb[3] = 0x11; + /* encode sub blk */ + ENTROPY_TRACE("Luma blk idx %d", 2); + error_status = isvce_write_coeff8x8_cavlc(ps_ent_ctxt, pi2_res_block, au1_nnz, + au2_sig_coeff_map, e_entropy_blk_type, + u4_ngbr_avlb, pu1_top_nnz, (pu1_left_nnz + 2)); + } + else + { + pu1_top_nnz[0] = pu1_top_nnz[1] = 0; + (pu1_left_nnz + 2)[0] = (pu1_left_nnz + 2)[1] = 0; + } + + if(u4_cbp_luma & 0x8) + { + /* encode ac block index 8x8 = 3*/ + /* parse packed coeff data structure for residual data */ + PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[0], + au2_sig_coeff_map[0], pi2_res_block[0]); + PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[1], + au2_sig_coeff_map[1], pi2_res_block[1]); + PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[2], + au2_sig_coeff_map[2], pi2_res_block[2]); + PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[3], + au2_sig_coeff_map[3], pi2_res_block[3]); + + /* derive sub block neighbor availability */ + u4_ngbr_avlb = 0x11111111; + /* encode sub blk */ + ENTROPY_TRACE("Luma blk idx %d", 3); + error_status = isvce_write_coeff8x8_cavlc(ps_ent_ctxt, pi2_res_block, au1_nnz, + au2_sig_coeff_map, e_entropy_blk_type, + u4_ngbr_avlb, pu1_top_nnz + 2, pu1_left_nnz + 2); + } + else + { + (pu1_top_nnz + 2)[0] = (pu1_top_nnz + 2)[1] = 0; + (pu1_left_nnz + 2)[0] = (pu1_left_nnz + 2)[1] = 0; + } + + /* encode chroma residue */ + if(u4_cbp_chroma & 3) + { + /* parse packed coeff data structure for residual data */ + /* cb, cr */ + PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[0], + au2_sig_coeff_map[0], pi2_res_block[0]); + PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[1], + au2_sig_coeff_map[1], pi2_res_block[1]); + + /* encode dc block */ + /* cb, cr */ + ENTROPY_TRACE("Chroma DC blk idx %d", 0); + error_status = + isvce_write_coeff4x4_cavlc(pi2_res_block[0], au1_nnz[0], CAVLC_CHROMA_4x4_DC, + pu1_zero_run, 0, ps_bitstream, au2_sig_coeff_map[0]); + ENTROPY_TRACE("Chroma DC blk idx %d", 1); + error_status = + isvce_write_coeff4x4_cavlc(pi2_res_block[1], au1_nnz[1], CAVLC_CHROMA_4x4_DC, + pu1_zero_run, 0, ps_bitstream, au2_sig_coeff_map[1]); + } + + pu1_top_nnz = ps_ent_ctxt->pu1_top_nnz_cbcr[ps_ent_ctxt->i4_mb_x]; + pu1_left_nnz = (UWORD8 *) &ps_ent_ctxt->u4_left_nnz_cbcr; + + /* encode sub blk */ + if(u4_cbp_chroma & 0x2) + { + /* encode ac block index 8x8 = 0*/ + /* derive sub block neighbor availability */ + pu1_ngbr_avlb[0] = (u1_mb_b << 4) | (u1_mb_a); + pu1_ngbr_avlb[1] = (u1_mb_b << 4) | 1; + pu1_ngbr_avlb[2] = (1 << 4) | (u1_mb_a); + pu1_ngbr_avlb[3] = 0x11; + + /* parse packed coeff data structure for residual data */ + PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[0], + au2_sig_coeff_map[0], pi2_res_block[0]); + PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[1], + au2_sig_coeff_map[1], pi2_res_block[1]); + PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[2], + au2_sig_coeff_map[2], pi2_res_block[2]); + PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[3], + au2_sig_coeff_map[3], pi2_res_block[3]); + + ENTROPY_TRACE("Chroma AC blk idx %d", 0); + error_status = isvce_write_coeff8x8_cavlc(ps_ent_ctxt, pi2_res_block, au1_nnz, + au2_sig_coeff_map, CAVLC_CHROMA_4x4_AC, + u4_ngbr_avlb, pu1_top_nnz, pu1_left_nnz); + } + else + { + pu1_top_nnz[0] = pu1_top_nnz[1] = 0; + pu1_left_nnz[0] = pu1_left_nnz[1] = 0; + } + + pu1_top_nnz += 2; + pu1_left_nnz += 2; + + /* encode sub blk */ + if(u4_cbp_chroma & 0x2) + { + /* parse packed coeff data structure for residual data */ + PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[0], + au2_sig_coeff_map[0], pi2_res_block[0]); + PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[1], + au2_sig_coeff_map[1], pi2_res_block[1]); + PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[2], + au2_sig_coeff_map[2], pi2_res_block[2]); + PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[3], + au2_sig_coeff_map[3], pi2_res_block[3]); + + ENTROPY_TRACE("Chroma AC blk idx %d", 1); + error_status = isvce_write_coeff8x8_cavlc(ps_ent_ctxt, pi2_res_block, au1_nnz, + au2_sig_coeff_map, CAVLC_CHROMA_4x4_AC, + u4_ngbr_avlb, pu1_top_nnz, pu1_left_nnz); + } + else + { + pu1_top_nnz[0] = pu1_top_nnz[1] = 0; + pu1_left_nnz[0] = pu1_left_nnz[1] = 0; + } + + /* store the index of the next mb coeff data */ + ps_ent_ctxt->pv_mb_coeff_data = pv_mb_coeff_data; + + return error_status; +} + +/** +******************************************************************************* +* +* @brief +* This function generates CAVLC coded bit stream for an Intra Slice. +* +* @description +* The mb syntax layer for intra slices constitutes luma mb mode, luma sub modes +* (if present), mb qp delta, coded block pattern, chroma mb mode and +* luma/chroma residue. These syntax elements are written as directed by table +* 7.3.5 of h264 specification. +* +* @param[in] ps_ent_ctxt +* pointer to entropy context +* +* @returns error code +* +* @remarks none +* +******************************************************************************* +*/ +IH264E_ERROR_T isvce_write_islice_mb_cavlc(isvce_entropy_ctxt_t *ps_ent_ctxt) +{ + /* error status */ + IH264E_ERROR_T error_status = IH264E_SUCCESS; + + /* bit stream ptr */ + bitstrm_t *ps_bitstream = ps_ent_ctxt->ps_bitstrm; + + /* packed header data */ + UWORD8 *pu1_byte = ps_ent_ctxt->pv_mb_header_data; + isvce_mb_hdr_common_t *ps_mb_hdr = (isvce_mb_hdr_common_t *) ps_ent_ctxt->pv_mb_header_data; + + /* mb header info */ + /* + * mb_tpm : mb type plus mode + * mb_type : luma mb type and chroma mb type are packed + * cbp : coded block pattern + * mb_qp_delta : mb qp delta + * chroma_intra_mode : chroma intra mode + * luma_intra_mode : luma intra mode + */ + WORD32 mb_tpm, mb_type, cbp, chroma_intra_mode, luma_intra_mode; + WORD8 mb_qp_delta; + + /* temp var */ + WORD32 i, mb_type_stream; + + WORD32 bitstream_start_offset, bitstream_end_offset; + + svc_slice_header_t *ps_svc_slice_header = + ps_ent_ctxt->ps_svc_slice_hdr_base + + (ps_ent_ctxt->i4_cur_slice_idx % SVC_MAX_SLICE_HDR_CNT); + + /* Starting bitstream offset for header in bits */ + bitstream_start_offset = isvce_get_num_bits(ps_bitstream); + + /********************************************************************/ + /* BEGIN HEADER GENERATION */ + /********************************************************************/ + + if(ps_ent_ctxt->u1_spatial_layer_id && ps_svc_slice_header->i1_adaptive_base_mode_flag) + { + /* write base_mode_flag */ + PUT_BITS(ps_bitstream, ps_mb_hdr->u1_base_mode_flag, 1, error_status, "base_mode_flag"); + } + else + { + ps_mb_hdr->u1_base_mode_flag = 0; + } + + /* mb header info */ + mb_tpm = ps_mb_hdr->u1_mb_type_mode; + cbp = ps_mb_hdr->u1_cbp; + mb_qp_delta = + ((WORD16) ps_mb_hdr->u1_mb_qp) - ((WORD16) ps_ent_ctxt->ps_mb_qp_ctxt->u1_cur_mb_qp); + + /* mb type */ + mb_type = mb_tpm & 0xF; + /* is intra ? */ + if(!ps_mb_hdr->u1_base_mode_flag) + { + if(mb_type == I16x16) + { + UWORD32 u4_cbp_l, u4_cbp_c; + + u4_cbp_c = (cbp >> 4); + u4_cbp_l = (cbp & 0xF); + luma_intra_mode = (mb_tpm >> 4) & 3; + chroma_intra_mode = (mb_tpm >> 6); + + mb_type_stream = luma_intra_mode + 1 + (u4_cbp_c << 2) + (u4_cbp_l == 15) * 12; + + /* write mb type */ + PUT_BITS_UEV(ps_bitstream, mb_type_stream, error_status, "mb type"); + + /* intra_chroma_pred_mode */ + PUT_BITS_UEV(ps_bitstream, chroma_intra_mode, error_status, "intra_chroma_pred_mode"); + + pu1_byte += sizeof(isvce_mb_hdr_i16x16_t); + } + else if(mb_type == I4x4) + { + isvce_mb_hdr_i4x4_t *ps_mb_hdr_i4x4 = + (isvce_mb_hdr_i4x4_t *) ps_ent_ctxt->pv_mb_header_data; + + /* mb sub blk modes */ + WORD32 intra_pred_mode_flag, rem_intra_mode; + WORD32 byte; + + chroma_intra_mode = (mb_tpm >> 6); + + /* write mb type */ + PUT_BITS_UEV(ps_bitstream, 0, error_status, "mb type"); + + for(i = 0; i < 16; i += 2) + { + /* sub blk idx 1 */ + byte = ps_mb_hdr_i4x4->au1_sub_blk_modes[i >> 1]; + + intra_pred_mode_flag = byte & 0x1; + + /* prev_intra4x4_pred_mode_flag */ + PUT_BITS(ps_bitstream, intra_pred_mode_flag, 1, error_status, + "prev_intra4x4_pred_mode_flag"); + + /* rem_intra4x4_pred_mode */ + if(!intra_pred_mode_flag) + { + rem_intra_mode = (byte & 0xF) >> 1; + PUT_BITS(ps_bitstream, rem_intra_mode, 3, error_status, + "rem_intra4x4_pred_mode"); + } + + /* sub blk idx 2 */ + byte >>= 4; + + intra_pred_mode_flag = byte & 0x1; + + /* prev_intra4x4_pred_mode_flag */ + PUT_BITS(ps_bitstream, intra_pred_mode_flag, 1, error_status, + "prev_intra4x4_pred_mode_flag"); + + /* rem_intra4x4_pred_mode */ + if(!intra_pred_mode_flag) + { + rem_intra_mode = (byte & 0xF) >> 1; + PUT_BITS(ps_bitstream, rem_intra_mode, 3, error_status, + "rem_intra4x4_pred_mode"); + } + } + + /* intra_chroma_pred_mode */ + PUT_BITS_UEV(ps_bitstream, chroma_intra_mode, error_status, "intra_chroma_pred_mode"); + + pu1_byte += sizeof(isvce_mb_hdr_i4x4_t); + } + else if(mb_type == I8x8) + { + /* transform 8x8 flag */ + UWORD32 u4_transform_size_8x8_flag = ps_ent_ctxt->i1_transform_8x8_mode_flag; + isvce_mb_hdr_i8x8_t *ps_mb_hdr_i8x8 = + (isvce_mb_hdr_i8x8_t *) ps_ent_ctxt->pv_mb_header_data; + + /* mb sub blk modes */ + WORD32 intra_pred_mode_flag, rem_intra_mode; + WORD32 byte; + + chroma_intra_mode = (mb_tpm >> 6); + + ASSERT(0); + + /* write mb type */ + PUT_BITS_UEV(ps_bitstream, 0, error_status, "mb type"); + + /* u4_transform_size_8x8_flag */ + PUT_BITS(ps_bitstream, u4_transform_size_8x8_flag, 1, error_status, + "u4_transform_size_8x8_flag"); + + /* write sub block modes */ + for(i = 0; i < 4; i++) + { + /* sub blk idx 1 */ + byte = ps_mb_hdr_i8x8->au1_sub_blk_modes[i >> 1]; + + intra_pred_mode_flag = byte & 0x1; + + /* prev_intra4x4_pred_mode_flag */ + PUT_BITS(ps_bitstream, intra_pred_mode_flag, 1, error_status, + "prev_intra4x4_pred_mode_flag"); + + /* rem_intra4x4_pred_mode */ + if(!intra_pred_mode_flag) + { + rem_intra_mode = (byte & 0xF) >> 1; + PUT_BITS(ps_bitstream, rem_intra_mode, 3, error_status, + "rem_intra4x4_pred_mode"); + } + + /* sub blk idx 2 */ + byte >>= 4; + + intra_pred_mode_flag = byte & 0x1; + + /* prev_intra4x4_pred_mode_flag */ + PUT_BITS(ps_bitstream, intra_pred_mode_flag, 1, error_status, + "prev_intra4x4_pred_mode_flag"); + + /* rem_intra4x4_pred_mode */ + if(!intra_pred_mode_flag) + { + rem_intra_mode = (byte & 0xF) >> 1; + PUT_BITS(ps_bitstream, rem_intra_mode, 3, error_status, + "rem_intra4x4_pred_mode"); + } + } + + /* intra_chroma_pred_mode */ + PUT_BITS_UEV(ps_bitstream, chroma_intra_mode, error_status, "intra_chroma_pred_mode"); + + pu1_byte += sizeof(isvce_mb_hdr_i8x8_t); + } + } + else + { + pu1_byte += sizeof(isvce_mb_hdr_base_mode_t); + } + + /* coded_block_pattern */ + if(ps_mb_hdr->u1_base_mode_flag || mb_type != I16x16) + { + PUT_BITS_UEV(ps_bitstream, gu1_cbp_map_tables[cbp][ps_mb_hdr->u1_base_mode_flag], + error_status, "coded_block_pattern"); + + if(cbp % 16 > 0 && ps_ent_ctxt->i1_transform_8x8_mode_flag && + (ps_mb_hdr->u1_base_mode_flag || (mb_type == I8x8 || mb_type == I4x4))) + { + PUT_BITS(ps_bitstream, ps_ent_ctxt->i1_transform_8x8_mode_flag, 1, error_status, + "u4_transform_size_8x8_flag"); + } + } + + if((cbp > 0) || (mb_type == I16x16)) + { + /* mb_qp_delta */ + PUT_BITS_SEV(ps_bitstream, mb_qp_delta, error_status, "mb_qp_delta"); + ps_ent_ctxt->ps_mb_qp_ctxt->u1_cur_mb_qp = ps_mb_hdr->u1_mb_qp; + } + + /* Ending bitstream offset for header in bits */ + bitstream_end_offset = isvce_get_num_bits(ps_bitstream); + + ps_ent_ctxt->u4_header_bits[0] += bitstream_end_offset - bitstream_start_offset; + + /* Starting bitstream offset for residue */ + bitstream_start_offset = bitstream_end_offset; + + /* residual */ + error_status = isvce_encode_residue(ps_ent_ctxt, mb_type, cbp); + + /* Ending bitstream offset for reside in bits */ + bitstream_end_offset = isvce_get_num_bits(ps_bitstream); + ps_ent_ctxt->u4_residue_bits[0] += bitstream_end_offset - bitstream_start_offset; + + /* store the index of the next mb syntax layer */ + ps_ent_ctxt->pv_mb_header_data = pu1_byte; + + return error_status; +} + +/** +******************************************************************************* +* +* @brief +* This function generates CAVLC coded bit stream for Inter slices +* +* @description +* The mb syntax layer for inter slices constitutes luma mb mode, luma sub modes +* (if present), mb qp delta, coded block pattern, chroma mb mode and +* luma/chroma residue. These syntax elements are written as directed by table +* 7.3.5 of h264 specification +* +* @param[in] ps_ent_ctxt +* pointer to entropy context +* +* @returns error code +* +* @remarks none +* +******************************************************************************* +*/ +IH264E_ERROR_T isvce_write_pslice_mb_cavlc(isvce_entropy_ctxt_t *ps_ent_ctxt) +{ + /* mb header info */ + /* + * mb_tpm : mb type plus mode + * mb_type : luma mb type and chroma mb type are packed + * cbp : coded block pattern + * mb_qp_delta : mb qp delta + * chroma_intra_mode : chroma intra mode + * luma_intra_mode : luma intra mode + * ps_pu : Pointer to the array of structures having motion vectors, size + * and position of sub partitions + */ + WORD32 mb_tpm, mb_type, cbp, chroma_intra_mode, luma_intra_mode; + WORD8 mb_qp_delta; + WORD32 i, mb_type_stream; + WORD32 bitstream_start_offset, bitstream_end_offset; + UWORD8 u1_is_intra_mb; + + bitstrm_t *ps_bitstream = ps_ent_ctxt->ps_bitstrm; + isvce_mb_hdr_common_t *ps_mb_hdr = (isvce_mb_hdr_common_t *) ps_ent_ctxt->pv_mb_header_data; + svc_slice_header_t *ps_svc_slice_header = + ps_ent_ctxt->ps_svc_slice_hdr_base + + (ps_ent_ctxt->i4_cur_slice_idx % SVC_MAX_SLICE_HDR_CNT); + + IH264E_ERROR_T error_status = IH264E_SUCCESS; + + UWORD8 *pu1_byte = ps_ent_ctxt->pv_mb_header_data; + WORD32 cbptable = 1; + WORD32 is_inter = 0; + + /* Starting bitstream offset for header in bits */ + bitstream_start_offset = isvce_get_num_bits(ps_bitstream); + + /********************************************************************/ + /* BEGIN HEADER GENERATION */ + /********************************************************************/ + + /* mb header info */ + mb_tpm = ps_mb_hdr->u1_mb_type_mode; + + /* mb type */ + mb_type = mb_tpm & 0xF; + u1_is_intra_mb = (mb_type == I16x16) || (mb_type == I8x8) || (mb_type == I4x4); + + /* check for skip */ + if(mb_type == PSKIP) + { + UWORD32 *nnz; + + is_inter = 1; + + /* increment skip counter */ + (*ps_ent_ctxt->pi4_mb_skip_run)++; + + /* store the index of the next mb syntax layer */ + pu1_byte += sizeof(isvce_mb_hdr_pskip_t); + ps_ent_ctxt->pv_mb_header_data = pu1_byte; + + /* set nnz to zero */ + ps_ent_ctxt->u4_left_nnz_luma = 0; + nnz = (UWORD32 *) ps_ent_ctxt->pu1_top_nnz_luma[ps_ent_ctxt->i4_mb_x]; + *nnz = 0; + ps_ent_ctxt->u4_left_nnz_cbcr = 0; + nnz = (UWORD32 *) ps_ent_ctxt->pu1_top_nnz_cbcr[ps_ent_ctxt->i4_mb_x]; + *nnz = 0; + + /* residual */ + error_status = isvce_encode_residue(ps_ent_ctxt, P16x16, 0); + + bitstream_end_offset = isvce_get_num_bits(ps_bitstream); + + ps_ent_ctxt->u4_header_bits[is_inter] += bitstream_end_offset - bitstream_start_offset; + + return error_status; + } + + /* remaining mb header info */ + cbp = ps_mb_hdr->u1_cbp; + mb_qp_delta = + ((WORD16) ps_mb_hdr->u1_mb_qp) - ((WORD16) ps_ent_ctxt->ps_mb_qp_ctxt->u1_cur_mb_qp); + + /* mb skip run */ + PUT_BITS_UEV(ps_bitstream, *ps_ent_ctxt->pi4_mb_skip_run, error_status, "mb skip run"); + + /* reset skip counter */ + *ps_ent_ctxt->pi4_mb_skip_run = 0; + + if(ps_ent_ctxt->u1_spatial_layer_id && ps_svc_slice_header->i1_adaptive_base_mode_flag) + { + /* write base_mode_flag */ + PUT_BITS(ps_bitstream, ps_mb_hdr->u1_base_mode_flag, 1, error_status, "base_mode_flag"); + } + else + { + ps_mb_hdr->u1_base_mode_flag = 0; + } + + if(!ps_mb_hdr->u1_base_mode_flag) + { + /* is intra ? */ + if(mb_type == I16x16) + { + UWORD32 u4_cbp_l, u4_cbp_c; + + is_inter = 0; + + u4_cbp_c = (cbp >> 4); + u4_cbp_l = (cbp & 0xF); + luma_intra_mode = (mb_tpm >> 4) & 3; + chroma_intra_mode = (mb_tpm >> 6); + + mb_type_stream = luma_intra_mode + 1 + (u4_cbp_c << 2) + (u4_cbp_l == 15) * 12; + + mb_type_stream += 5; + + /* write mb type */ + PUT_BITS_UEV(ps_bitstream, mb_type_stream, error_status, "mb type"); + + /* intra_chroma_pred_mode */ + PUT_BITS_UEV(ps_bitstream, chroma_intra_mode, error_status, "intra_chroma_pred_mode"); + pu1_byte += sizeof(isvce_mb_hdr_i16x16_t); + } + else if(mb_type == I4x4) + { + isvce_mb_hdr_i4x4_t *ps_mb_hdr_i4x4 = + (isvce_mb_hdr_i4x4_t *) ps_ent_ctxt->pv_mb_header_data; + + /* mb sub blk modes */ + WORD32 intra_pred_mode_flag, rem_intra_mode; + WORD32 byte; + + is_inter = 0; + + chroma_intra_mode = (mb_tpm >> 6); + cbptable = 0; + + /* write mb type */ + PUT_BITS_UEV(ps_bitstream, 5, error_status, "mb type"); + + for(i = 0; i < 16; i += 2) + { + /* sub blk idx 1 */ + byte = ps_mb_hdr_i4x4->au1_sub_blk_modes[i >> 1]; + + intra_pred_mode_flag = byte & 0x1; + + /* prev_intra4x4_pred_mode_flag */ + PUT_BITS(ps_bitstream, intra_pred_mode_flag, 1, error_status, + "prev_intra4x4_pred_mode_flag"); + + /* rem_intra4x4_pred_mode */ + if(!intra_pred_mode_flag) + { + rem_intra_mode = (byte & 0xF) >> 1; + PUT_BITS(ps_bitstream, rem_intra_mode, 3, error_status, + "rem_intra4x4_pred_mode"); + } + + /* sub blk idx 2 */ + byte >>= 4; + + intra_pred_mode_flag = byte & 0x1; + + /* prev_intra4x4_pred_mode_flag */ + PUT_BITS(ps_bitstream, intra_pred_mode_flag, 1, error_status, + "prev_intra4x4_pred_mode_flag"); + + /* rem_intra4x4_pred_mode */ + if(!intra_pred_mode_flag) + { + rem_intra_mode = (byte & 0xF) >> 1; + PUT_BITS(ps_bitstream, rem_intra_mode, 3, error_status, + "rem_intra4x4_pred_mode"); + } + } + + /* intra_chroma_pred_mode */ + PUT_BITS_UEV(ps_bitstream, chroma_intra_mode, error_status, "intra_chroma_pred_mode"); + + pu1_byte += sizeof(isvce_mb_hdr_i4x4_t); + } + else if(mb_type == I8x8) + { + isvce_mb_hdr_i8x8_t *ps_mb_hdr_i8x8 = + (isvce_mb_hdr_i8x8_t *) ps_ent_ctxt->pv_mb_header_data; + + /* transform 8x8 flag */ + UWORD32 u4_transform_size_8x8_flag = ps_ent_ctxt->i1_transform_8x8_mode_flag; + + /* mb sub blk modes */ + WORD32 intra_pred_mode_flag, rem_intra_mode; + WORD32 byte; + + is_inter = 0; + + chroma_intra_mode = (mb_tpm >> 6); + cbptable = 0; + + ASSERT(0); + + /* write mb type */ + PUT_BITS_UEV(ps_bitstream, 5, error_status, "mb type"); + + /* u4_transform_size_8x8_flag */ + PUT_BITS(ps_bitstream, u4_transform_size_8x8_flag, 1, error_status, + "u4_transform_size_8x8_flag"); + + /* write sub block modes */ + for(i = 0; i < 4; i++) + { + /* sub blk idx 1 */ + byte = ps_mb_hdr_i8x8->au1_sub_blk_modes[i >> 1]; + + intra_pred_mode_flag = byte & 0x1; + + /* prev_intra4x4_pred_mode_flag */ + PUT_BITS(ps_bitstream, intra_pred_mode_flag, 1, error_status, + "prev_intra4x4_pred_mode_flag"); + + /* rem_intra4x4_pred_mode */ + if(!intra_pred_mode_flag) + { + rem_intra_mode = (byte & 0xF) >> 1; + PUT_BITS(ps_bitstream, rem_intra_mode, 3, error_status, + "rem_intra4x4_pred_mode"); + } + + /* sub blk idx 2 */ + byte >>= 4; + + intra_pred_mode_flag = byte & 0x1; + + /* prev_intra4x4_pred_mode_flag */ + PUT_BITS(ps_bitstream, intra_pred_mode_flag, 1, error_status, + "prev_intra4x4_pred_mode_flag"); + + /* rem_intra4x4_pred_mode */ + if(!intra_pred_mode_flag) + { + rem_intra_mode = (byte & 0xF) >> 1; + PUT_BITS(ps_bitstream, rem_intra_mode, 3, error_status, + "rem_intra4x4_pred_mode"); + } + } + + /* intra_chroma_pred_mode */ + PUT_BITS_UEV(ps_bitstream, chroma_intra_mode, error_status, "intra_chroma_pred_mode"); + + pu1_byte += sizeof(isvce_mb_hdr_i8x8_t); + } + else + { + isvce_mb_hdr_p16x16_t *ps_mb_hdr_p16x16 = + (isvce_mb_hdr_p16x16_t *) ps_ent_ctxt->pv_mb_header_data; + + /* inter macro block partition cnt */ + const UWORD8 au1_part_cnt[] = {1, 2, 2, 4}; + + /* mv ptr */ + WORD16 *pi2_mv_ptr = (WORD16 *) ps_mb_hdr_p16x16->ai2_mvd; + + /* number of partitions for the current mb */ + UWORD32 u4_part_cnt = au1_part_cnt[mb_type - 3]; + + is_inter = 1; + + /* write mb type */ + PUT_BITS_UEV(ps_bitstream, mb_type - 3, error_status, "mb type"); + + for(i = 0; i < (WORD32) u4_part_cnt; i++) + { + if(ps_ent_ctxt->u1_spatial_layer_id && + ps_svc_slice_header->i1_adaptive_motion_prediction_flag) + { + PUT_BITS(ps_bitstream, ps_mb_hdr_p16x16->u1_mvp_idx, 1, error_status, + "motion_prediction_flag_l0"); + } + } + + for(i = 0; i < (WORD32) u4_part_cnt; i++) + { + PUT_BITS_SEV(ps_bitstream, pi2_mv_ptr[i], error_status, "mv x"); + PUT_BITS_SEV(ps_bitstream, pi2_mv_ptr[i + 1], error_status, "mv y"); + } + + pu1_byte += sizeof(isvce_mb_hdr_p16x16_t); + } + } + else + { + pu1_byte += sizeof(isvce_mb_hdr_base_mode_t); + } + + if(ps_ent_ctxt->u1_spatial_layer_id && + ps_svc_slice_header->i1_adaptive_residual_prediction_flag && + !ps_ent_ctxt + ->ps_svc_nalu_ext_base[1 + (ps_ent_ctxt->i4_cur_slice_idx % SVC_MAX_SLICE_HDR_CNT)] + .u1_idr_flag && + (ps_mb_hdr->u1_base_mode_flag || !u1_is_intra_mb)) + { + PUT_BITS(ps_bitstream, ps_mb_hdr->u1_residual_prediction_flag, 1, error_status, + "residual_prediction_flag"); + } + + /* coded_block_pattern */ + if(ps_mb_hdr->u1_base_mode_flag || (mb_type != I16x16)) + { + PUT_BITS_UEV(ps_bitstream, gu1_cbp_map_tables[cbp][cbptable], error_status, + "coded_block_pattern"); + } + + if((cbp > 0) || (mb_type == I16x16)) + { + /* mb_qp_delta */ + PUT_BITS_SEV(ps_bitstream, mb_qp_delta, error_status, "mb_qp_delta"); + ps_ent_ctxt->ps_mb_qp_ctxt->u1_cur_mb_qp = ps_mb_hdr->u1_mb_qp; + } + + /* Ending bitstream offset for header in bits */ + bitstream_end_offset = isvce_get_num_bits(ps_bitstream); + + ps_ent_ctxt->u4_header_bits[is_inter] += bitstream_end_offset - bitstream_start_offset; + + /* start bitstream offset for residue in bits */ + bitstream_start_offset = bitstream_end_offset; + + /* residual */ + error_status = isvce_encode_residue(ps_ent_ctxt, mb_type, cbp); + + /* Ending bitstream offset for residue in bits */ + bitstream_end_offset = isvce_get_num_bits(ps_bitstream); + + ps_ent_ctxt->u4_residue_bits[is_inter] += bitstream_end_offset - bitstream_start_offset; + + /* store the index of the next mb syntax layer */ + ps_ent_ctxt->pv_mb_header_data = pu1_byte; + + return error_status; +} + +/** +******************************************************************************* +* +* @brief +* This function generates CAVLC coded bit stream for B slices +* +* @description +* The mb syntax layer for inter slices constitutes luma mb mode, luma sub modes +* (if present), mb qp delta, coded block pattern, chroma mb mode and +* luma/chroma residue. These syntax elements are written as directed by table +* 7.3.5 of h264 specification +* +* @param[in] ps_ent_ctxt +* pointer to entropy context +* +* @returns error code +* +* @remarks none +* +******************************************************************************* +*/ +IH264E_ERROR_T isvce_write_bslice_mb_cavlc(isvce_entropy_ctxt_t *ps_ent_ctxt) +{ + /* mb header info */ + /* + * mb_tpm : mb type plus mode + * mb_type : luma mb type and chroma mb type are packed + * cbp : coded block pattern + * mb_qp_delta : mb qp delta + * chroma_intra_mode : chroma intra mode + * luma_intra_mode : luma intra mode + * ps_pu : Pointer to the array of structures having motion vectors, size + * and position of sub partitions + */ + WORD32 mb_tpm, mb_type, cbp, chroma_intra_mode, luma_intra_mode; + WORD8 mb_qp_delta; + WORD32 i, j; + WORD32 mb_type_stream; + WORD32 bitstream_start_offset, bitstream_end_offset; + UWORD8 u1_is_intra_mb; + + bitstrm_t *ps_bitstream = ps_ent_ctxt->ps_bitstrm; + isvce_mb_hdr_common_t *ps_mb_hdr = (isvce_mb_hdr_common_t *) ps_ent_ctxt->pv_mb_header_data; + svc_slice_header_t *ps_svc_slice_header = + ps_ent_ctxt->ps_svc_slice_hdr_base + + (ps_ent_ctxt->i4_cur_slice_idx % SVC_MAX_SLICE_HDR_CNT); + + IH264E_ERROR_T error_status = IH264E_SUCCESS; + + UWORD8 *pu1_byte = ps_ent_ctxt->pv_mb_header_data; + WORD32 cbptable = 1; + WORD32 is_inter = 0; + + /* Starting bitstream offset for header in bits */ + bitstream_start_offset = isvce_get_num_bits(ps_bitstream); + + /********************************************************************/ + /* BEGIN HEADER GENERATION */ + /********************************************************************/ + + mb_tpm = ps_mb_hdr->u1_mb_type_mode; + + /* mb type */ + mb_type = mb_tpm & 0xF; + u1_is_intra_mb = (mb_type == I16x16) || (mb_type == I8x8) || (mb_type == I4x4); + + /* check for skip */ + if(mb_type == BSKIP) + { + UWORD32 *nnz; + + is_inter = 1; + + /* increment skip counter */ + (*ps_ent_ctxt->pi4_mb_skip_run)++; + + /* store the index of the next mb syntax layer */ + pu1_byte += sizeof(isvce_mb_hdr_bskip_t); + ps_ent_ctxt->pv_mb_header_data = pu1_byte; + + /* set nnz to zero */ + ps_ent_ctxt->u4_left_nnz_luma = 0; + nnz = (UWORD32 *) ps_ent_ctxt->pu1_top_nnz_luma[ps_ent_ctxt->i4_mb_x]; + *nnz = 0; + ps_ent_ctxt->u4_left_nnz_cbcr = 0; + nnz = (UWORD32 *) ps_ent_ctxt->pu1_top_nnz_cbcr[ps_ent_ctxt->i4_mb_x]; + *nnz = 0; + + /* residual */ + error_status = isvce_encode_residue(ps_ent_ctxt, B16x16, 0); + + bitstream_end_offset = isvce_get_num_bits(ps_bitstream); + + ps_ent_ctxt->u4_header_bits[is_inter] += bitstream_end_offset - bitstream_start_offset; + + return error_status; + } + + /* remaining mb header info */ + cbp = ps_mb_hdr->u1_cbp; + mb_qp_delta = + ((WORD16) ps_mb_hdr->u1_mb_qp) - ((WORD16) ps_ent_ctxt->ps_mb_qp_ctxt->u1_cur_mb_qp); + + /* mb skip run */ + PUT_BITS_UEV(ps_bitstream, *ps_ent_ctxt->pi4_mb_skip_run, error_status, "mb skip run"); + + /* reset skip counter */ + *ps_ent_ctxt->pi4_mb_skip_run = 0; + + if(ps_ent_ctxt->u1_spatial_layer_id && ps_svc_slice_header->i1_adaptive_base_mode_flag) + { + /* write base_mode_flag */ + PUT_BITS(ps_bitstream, ps_mb_hdr->u1_base_mode_flag, 1, error_status, "base_mode_flag"); + } + else + { + ps_mb_hdr->u1_base_mode_flag = 0; + } + + if(!ps_mb_hdr->u1_base_mode_flag) + { + /* is intra ? */ + if(mb_type == I16x16) + { + UWORD32 u4_cbp_l, u4_cbp_c; + + is_inter = 0; + + u4_cbp_c = (cbp >> 4); + u4_cbp_l = (cbp & 0xF); + luma_intra_mode = (mb_tpm >> 4) & 3; + chroma_intra_mode = (mb_tpm >> 6); + + mb_type_stream = luma_intra_mode + 1 + (u4_cbp_c << 2) + (u4_cbp_l == 15) * 12; + + mb_type_stream += 23; + + /* write mb type */ + PUT_BITS_UEV(ps_bitstream, mb_type_stream, error_status, "mb type"); + + /* intra_chroma_pred_mode */ + PUT_BITS_UEV(ps_bitstream, chroma_intra_mode, error_status, "intra_chroma_pred_mode"); + pu1_byte += sizeof(isvce_mb_hdr_i16x16_t); + } + else if(mb_type == I4x4) + { + isvce_mb_hdr_i4x4_t *ps_mb_hdr_i4x4 = + (isvce_mb_hdr_i4x4_t *) ps_ent_ctxt->pv_mb_header_data; + + /* mb sub blk modes */ + WORD32 intra_pred_mode_flag, rem_intra_mode; + WORD32 byte; + + is_inter = 0; + + chroma_intra_mode = (mb_tpm >> 6); + cbptable = 0; + + /* write mb type */ + PUT_BITS_UEV(ps_bitstream, 23, error_status, "mb type"); + + for(i = 0; i < 16; i += 2) + { + /* sub blk idx 1 */ + byte = ps_mb_hdr_i4x4->au1_sub_blk_modes[i >> 1]; + + intra_pred_mode_flag = byte & 0x1; + + /* prev_intra4x4_pred_mode_flag */ + PUT_BITS(ps_bitstream, intra_pred_mode_flag, 1, error_status, + "prev_intra4x4_pred_mode_flag"); + + /* rem_intra4x4_pred_mode */ + if(!intra_pred_mode_flag) + { + rem_intra_mode = (byte & 0xF) >> 1; + PUT_BITS(ps_bitstream, rem_intra_mode, 3, error_status, + "rem_intra4x4_pred_mode"); + } + + /* sub blk idx 2 */ + byte >>= 4; + + intra_pred_mode_flag = byte & 0x1; + + /* prev_intra4x4_pred_mode_flag */ + PUT_BITS(ps_bitstream, intra_pred_mode_flag, 1, error_status, + "prev_intra4x4_pred_mode_flag"); + + /* rem_intra4x4_pred_mode */ + if(!intra_pred_mode_flag) + { + rem_intra_mode = (byte & 0xF) >> 1; + PUT_BITS(ps_bitstream, rem_intra_mode, 3, error_status, + "rem_intra4x4_pred_mode"); + } + } + + /* intra_chroma_pred_mode */ + PUT_BITS_UEV(ps_bitstream, chroma_intra_mode, error_status, "intra_chroma_pred_mode"); + pu1_byte += sizeof(isvce_mb_hdr_i4x4_t); + } + else if(mb_type == I8x8) + { + isvce_mb_hdr_i8x8_t *ps_mb_hdr_i8x8 = + (isvce_mb_hdr_i8x8_t *) ps_ent_ctxt->pv_mb_header_data; + + /* transform 8x8 flag */ + UWORD32 u4_transform_size_8x8_flag = ps_ent_ctxt->i1_transform_8x8_mode_flag; + + /* mb sub blk modes */ + WORD32 intra_pred_mode_flag, rem_intra_mode; + WORD32 byte; + + is_inter = 0; + + chroma_intra_mode = (mb_tpm >> 6); + cbptable = 0; + + ASSERT(0); + + /* write mb type */ + PUT_BITS_UEV(ps_bitstream, 23, error_status, "mb type"); + + /* u4_transform_size_8x8_flag */ + PUT_BITS(ps_bitstream, u4_transform_size_8x8_flag, 1, error_status, + "u4_transform_size_8x8_flag"); + + /* write sub block modes */ + for(i = 0; i < 4; i++) + { + /* sub blk idx 1 */ + byte = ps_mb_hdr_i8x8->au1_sub_blk_modes[i >> 1]; + + intra_pred_mode_flag = byte & 0x1; + + /* prev_intra4x4_pred_mode_flag */ + PUT_BITS(ps_bitstream, intra_pred_mode_flag, 1, error_status, + "prev_intra4x4_pred_mode_flag"); + + /* rem_intra4x4_pred_mode */ + if(!intra_pred_mode_flag) + { + rem_intra_mode = (byte & 0xF) >> 1; + PUT_BITS(ps_bitstream, rem_intra_mode, 3, error_status, + "rem_intra4x4_pred_mode"); + } + + /* sub blk idx 2 */ + byte >>= 4; + + intra_pred_mode_flag = byte & 0x1; + + /* prev_intra4x4_pred_mode_flag */ + PUT_BITS(ps_bitstream, intra_pred_mode_flag, 1, error_status, + "prev_intra4x4_pred_mode_flag"); + + /* rem_intra4x4_pred_mode */ + if(!intra_pred_mode_flag) + { + rem_intra_mode = (byte & 0xF) >> 1; + PUT_BITS(ps_bitstream, rem_intra_mode, 3, error_status, + "rem_intra4x4_pred_mode"); + } + } + + /* intra_chroma_pred_mode */ + PUT_BITS_UEV(ps_bitstream, chroma_intra_mode, error_status, "intra_chroma_pred_mode"); + pu1_byte += sizeof(isvce_mb_hdr_i8x8_t); + } + else if(mb_type == BDIRECT) + { + is_inter = 1; + /* write mb type */ + PUT_BITS_UEV(ps_bitstream, B_DIRECT_16x16, error_status, "mb type"); + pu1_byte += sizeof(isvce_mb_hdr_bdirect_t); + } + else + { + isvce_mb_hdr_b16x16_t *ps_mb_hdr_b16x16 = + (isvce_mb_hdr_b16x16_t *) ps_ent_ctxt->pv_mb_header_data; + + /* inter macro block partition cnt for 16x16 16x8 8x16 8x8 */ + const UWORD8 au1_part_cnt[] = {1, 2, 2, 4}; + + /* number of partitions for the current mb */ + UWORD32 u4_part_cnt = au1_part_cnt[mb_type - B16x16]; + + /* Get the pred modes */ + WORD32 i4_mb_part_pred_mode = (mb_tpm >> 4); + + ASSERT(mb_type == B16x16); + + is_inter = 1; + + mb_type_stream = mb_type - B16x16 + B_L0_16x16 + i4_mb_part_pred_mode; + + /* write mb type */ + PUT_BITS_UEV(ps_bitstream, mb_type_stream, error_status, "mb type"); + + for(i = 0; i < (WORD32) u4_part_cnt; i++) + { + for(j = 0; j < NUM_PRED_DIRS; j++) + { + PRED_MODE_T e_pred_mode = (PRED_MODE_T) j; + PRED_MODE_T e_cmpl_pred_mode = (e_pred_mode == L0) ? L1 : L0; + + if(((PRED_MODE_T) i4_mb_part_pred_mode) != e_pred_mode) + { + if(ps_svc_slice_header->i1_adaptive_motion_prediction_flag && + ps_ent_ctxt->u1_spatial_layer_id) + { + PUT_BITS(ps_bitstream, ps_mb_hdr_b16x16->au1_mvp_idx[e_cmpl_pred_mode], + 1, error_status, "motion_prediction_flag_l0"); + } + } + } + } + + for(i = 0; i < (WORD32) u4_part_cnt; i++) + { + if(i4_mb_part_pred_mode != L1) + { + PUT_BITS_SEV(ps_bitstream, ps_mb_hdr_b16x16->ai2_mvd[0][0], error_status, + "mv l0 x"); + PUT_BITS_SEV(ps_bitstream, ps_mb_hdr_b16x16->ai2_mvd[0][1], error_status, + "mv l0 y"); + } + if(i4_mb_part_pred_mode != L0) + { + PUT_BITS_SEV(ps_bitstream, ps_mb_hdr_b16x16->ai2_mvd[1][0], error_status, + "mv l1 x"); + PUT_BITS_SEV(ps_bitstream, ps_mb_hdr_b16x16->ai2_mvd[1][1], error_status, + "mv l1 y"); + } + } + + pu1_byte += sizeof(isvce_mb_hdr_b16x16_t); + } + } + + if(ps_svc_slice_header->i1_adaptive_residual_prediction_flag && + ps_ent_ctxt->u1_spatial_layer_id && (ps_mb_hdr->u1_base_mode_flag || !u1_is_intra_mb)) + { + PUT_BITS(ps_bitstream, ps_mb_hdr->u1_residual_prediction_flag, 1, error_status, + "residual_prediction_flag"); + } + + /* coded_block_pattern */ + if(ps_mb_hdr->u1_base_mode_flag || mb_type != I16x16) + { + PUT_BITS_UEV(ps_bitstream, gu1_cbp_map_tables[cbp][cbptable], error_status, + "coded_block_pattern"); + } + + if((cbp > 0) || (mb_type == I16x16)) + { + /* mb_qp_delta */ + PUT_BITS_SEV(ps_bitstream, mb_qp_delta, error_status, "mb_qp_delta"); + ps_ent_ctxt->ps_mb_qp_ctxt->u1_cur_mb_qp = ps_mb_hdr->u1_mb_qp; + } + + /* Ending bitstream offset for header in bits */ + bitstream_end_offset = isvce_get_num_bits(ps_bitstream); + + ps_ent_ctxt->u4_header_bits[is_inter] += bitstream_end_offset - bitstream_start_offset; + + /* start bitstream offset for residue in bits */ + bitstream_start_offset = bitstream_end_offset; + + /* residual */ + error_status = isvce_encode_residue(ps_ent_ctxt, mb_type, cbp); + + /* Ending bitstream offset for residue in bits */ + bitstream_end_offset = isvce_get_num_bits(ps_bitstream); + + ps_ent_ctxt->u4_residue_bits[is_inter] += bitstream_end_offset - bitstream_start_offset; + + /* store the index of the next mb syntax layer */ + ps_ent_ctxt->pv_mb_header_data = pu1_byte; + + return error_status; +} + +#if ENABLE_RE_ENC_AS_SKIP +/** +****************************************************************************** +* +* @brief re-encode frame as all skip MBs +* +* @par Description +* The frame is encoded as all skip MBs to comply with VBV restrictions +* +* @param[in] ps_entropy +* pointer to entropy context (handle) +* +* @return success or failure error code +* +****************************************************************************** +*/ +IH264E_ERROR_T isvce_reencode_as_skip_frame_cavlc(isvce_entropy_ctxt_t *ps_entropy) +{ + WORD32 i4_mb_skip_run; + + bitstrm_t *ps_bitstrm = ps_entropy->ps_bitstrm; + bitstrm_t *ps_bitstrm_after_slice_hdr = ps_entropy->ps_bitstrm_after_slice_hdr; + + ps_bitstrm->i4_bits_left_in_cw = ps_bitstrm_after_slice_hdr->i4_bits_left_in_cw; + ps_bitstrm->u4_cur_word = ps_bitstrm_after_slice_hdr->u4_cur_word; + ps_bitstrm->u4_strm_buf_offset = ps_bitstrm_after_slice_hdr->u4_strm_buf_offset; + ps_bitstrm->i4_zero_bytes_run = ps_bitstrm_after_slice_hdr->i4_zero_bytes_run; + + /* mb skip run */ + i4_mb_skip_run = ps_entropy->i4_wd_mbs * ps_entropy->i4_ht_mbs; + PUT_BITS_UEV(ps_bitstrm, i4_mb_skip_run, ps_entropy->i4_error_code, "mb skip run"); + + /* put rbsp trailing bits */ + ps_entropy->i4_error_code = ih264e_put_rbsp_trailing_bits(ps_bitstrm); + + return ps_entropy->i4_error_code; +} +#endif diff --git a/encoder/svc/isvce_cavlc.h b/encoder/svc/isvce_cavlc.h new file mode 100644 index 0000000..4a1952c --- /dev/null +++ b/encoder/svc/isvce_cavlc.h @@ -0,0 +1,126 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** +****************************************************************************** +* @file +* isvce_cavlc.h +* +* @brief +* This file contains enumerations, macros and extern declarations of H264 +* cavlc tables +* +* @author +* ittiam +* +* @remarks +* none +****************************************************************************** +*/ + +#ifndef _ISVCE_CAVLC_H_ +#define _ISVCE_CAVLC_H_ + +#include "ih264_typedefs.h" +#include "isvce_defs.h" +#include "isvce_structs.h" + +/*****************************************************************************/ +/* Function macro definitions */ +/*****************************************************************************/ + +/*****************************************************************************/ +/* Extern Function Declarations */ +/*****************************************************************************/ + +/** +******************************************************************************* +* +* @brief +* This function generates CAVLC coded bit stream for an Intra Slice. +* +* @description +* The mb syntax layer for intra slices constitutes luma mb mode, luma sub modes +* (if present), mb qp delta, coded block pattern, chroma mb mode and +* luma/chroma residue. These syntax elements are written as directed by table +* 7.3.5 of h264 specification. +* +* @param[in] ps_ent_ctxt +* pointer to entropy context +* +* @returns error code +* +* @remarks none +* +******************************************************************************* +*/ +IH264E_ERROR_T isvce_write_islice_mb_cavlc(isvce_entropy_ctxt_t *ps_ent_ctxt); + +/** +******************************************************************************* +* +* @brief +* This function generates CAVLC coded bit stream for Inter slices +* +* @description +* The mb syntax layer for inter slices constitutes luma mb mode, luma sub modes +* (if present), mb qp delta, coded block pattern, chroma mb mode and +* luma/chroma residue. These syntax elements are written as directed by table +* 7.3.5 of h264 specification +* +* @param[in] ps_ent_ctxt +* pointer to entropy context +* +* @returns error code +* +* @remarks none +* +******************************************************************************* +*/ +IH264E_ERROR_T isvce_write_pslice_mb_cavlc(isvce_entropy_ctxt_t *ps_ent_ctxt); + +/** +******************************************************************************* +* +* @brief +* This function generates CAVLC coded bit stream for Inter(B) slices +* +* @description +* The mb syntax layer for inter slices constitutes luma mb mode, luma sub modes +* (if present), mb qp delta, coded block pattern, chroma mb mode and +* luma/chroma residue. These syntax elements are written as directed by table +* 7.3.5 of h264 specification +* +* @param[in] ps_ent_ctxt +* pointer to entropy context +* +* @returns error code +* +* @remarks none +* +******************************************************************************* +*/ +IH264E_ERROR_T isvce_write_bslice_mb_cavlc(isvce_entropy_ctxt_t *ps_ent_ctxt); + +#if ENABLE_RE_ENC_AS_SKIP +IH264E_ERROR_T isvce_reencode_as_skip_frame_cavlc(isvce_entropy_ctxt_t *ps_entropy); +#endif + +#endif diff --git a/encoder/svc/isvce_core_coding.c b/encoder/svc/isvce_core_coding.c new file mode 100644 index 0000000..ecb453f --- /dev/null +++ b/encoder/svc/isvce_core_coding.c @@ -0,0 +1,2367 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** + ******************************************************************************* + * @file + * isvce_core_coding.c + * + * @brief + * This file contains routines that perform luma and chroma core coding for + * intra macroblocks + * + * @author + * ittiam + * + * @par List of Functions: + * - isvce_pack_l_mb_i16() + * - isvce_pack_c_mb_i8() + * - isvce_code_luma_intra_macroblock_16x16() + * - isvce_code_luma_intra_macroblock_4x4() + * - isvce_code_chroma_intra_macroblock_8x8() + * + * @remarks + * None + * + ******************************************************************************* + */ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System include files */ +#include +#include +#include + +/* User include files */ +#include "ih264_typedefs.h" +#include "ih264_debug.h" +#include "ih264_platform_macros.h" +#include "iv2.h" +#include "ive2.h" +#include "isvc_macros.h" +#include "isvc_defs.h" +#include "ih264e_config.h" +#include "isvce_defs.h" +#include "ih264_trans_data.h" +#include "ih264e_error.h" +#include "ih264e_bitstream.h" +#include "ime_distortion_metrics.h" +#include "ime_defs.h" +#include "ime_structs.h" +#include "isvc_structs.h" +#include "isvc_trans_quant_itrans_iquant.h" +#include "isvc_inter_pred_filters.h" +#include "isvc_mem_fns.h" +#include "ih264_padding.h" +#include "ih264_intra_pred_filters.h" +#include "ih264_deblk_edge_filters.h" +#include "isvc_cabac_tables.h" +#include "irc_cntrl_param.h" +#include "irc_frame_info_collector.h" +#include "isvce_rate_control.h" +#include "isvce_cabac_structs.h" +#include "isvce_structs.h" +#include "isvce_globals.h" +#include "isvce_core_coding.h" +#include "isvce_mc.h" +#include "isvce_ibl_eval.h" + +/*****************************************************************************/ +/* Function Definitions */ +/*****************************************************************************/ + +/** +******************************************************************************* +* +* @brief +* This function performs does the DCT transform then Hadamard transform +* and quantization for a macroblock when the mb mode is intra 16x16 mode +* +* @par Description: +* First cf4 is done on all 16 4x4 blocks of the 16x16 input block. +* Then hadamard transform is done on the DC coefficients +* Quantization is then performed on the 16x16 block, 4x4 wise +* +* @param[in] pu1_src +* Pointer to source sub-block +* +* @param[in] pu1_pred +* Pointer to prediction sub-block +* +* @param[in] pi2_out +* Pointer to residual sub-block +* The output will be in linear format +* The first 16 continuous locations will contain the values of Dc block +* After DC block and a stride 1st AC block will follow +* After one more stride next AC block will follow +* The blocks will be in raster scan order +* +* @param[in] i4_src_stride +* Source stride +* +* @param[in] i4_pred_stride +* Prediction stride +* +* @param[in] dst_strd +* Destination stride +* +* @param[in] pu2_scale_matrix +* The quantization matrix for 4x4 transform +* +* @param[in] pu2_threshold_matrix +* Threshold matrix +* +* @param[in] u4_qbits +* 15+QP/6 +* +* @param[in] u4_round_factor +* Round factor for quant +* +* @param[out] pu1_nnz +* Memory to store the non-zeros after transform +* The first byte will be the nnz of DC block +* From the next byte the AC nnzs will be stored in raster scan order +* +* @param u4_dc_flag +* Signals if Dc transform is to be done or not +* 1 -> Dc transform will be done +* 0 -> Dc transform will not be done +* +* @remarks +* +******************************************************************************* +*/ +void isvce_luma_16x16_resi_trans_dctrans_quant( + buffer_container_t *ps_src, buffer_container_t *ps_pred, buffer_container_t *ps_quant_coeffs, + buffer_container_t *ps_upsampled_res, isa_dependent_fxns_t *ps_isa_dependent_fxns, + const UWORD16 *pu2_scale_matrix, const UWORD16 *pu2_threshold_matrix, UWORD8 *pu1_nnz, + UWORD32 u4_qbits, UWORD32 u4_round_factor, UWORD32 u4_dc_flag, UWORD8 u1_use_upsampled_res) +{ + WORD32 blk_cntr; + WORD32 i4_offsetx, i4_offsety; + + enc_loop_fxns_t *ps_enc_loop_fxns = &ps_isa_dependent_fxns->s_enc_loop_fxns; + buffer_container_t s_src = ps_src[0]; + buffer_container_t s_pred = ps_pred[0]; + buffer_container_t s_quant_coeffs = ps_quant_coeffs[0]; + buffer_container_t s_upsampled_res = {0}; + resi_trans_quant_constants_t s_resi_trans_quant_constants = { + .pu2_scale_matrix = pu2_scale_matrix, + .pu2_threshold_matrix = pu2_threshold_matrix, + .u4_qbits = u4_qbits, + .u4_round_factor = u4_round_factor}; + + UWORD8 u1_resi_trans_fxn_idx = isvc_get_resi_trans_quant_variant_idx(u1_use_upsampled_res); + + /* Move to the ac addresses */ + pu1_nnz++; + s_quant_coeffs.pv_data = ((WORD16 *) s_quant_coeffs.pv_data) + s_quant_coeffs.i4_data_stride; + + if(u1_use_upsampled_res) + { + s_upsampled_res = ps_upsampled_res[0]; + } + + for(blk_cntr = 0; blk_cntr < NUM_LUMA4x4_BLOCKS_IN_MB; blk_cntr++) + { + IND2SUB_LUMA_MB(blk_cntr, i4_offsetx, i4_offsety); + + s_src.pv_data = + ((UWORD8 *) ps_src[0].pv_data) + i4_offsetx + i4_offsety * ps_src[0].i4_data_stride; + s_pred.pv_data = + ((UWORD8 *) ps_pred[0].pv_data) + i4_offsetx + i4_offsety * ps_pred[0].i4_data_stride; + s_quant_coeffs.pv_data = + ((WORD16 *) ps_quant_coeffs[0].pv_data) + blk_cntr * ps_quant_coeffs[0].i4_data_stride; + + if(u1_use_upsampled_res) + { + s_upsampled_res.pv_data = ((WORD16 *) ps_upsampled_res[0].pv_data) + i4_offsetx + + i4_offsety * ps_upsampled_res[0].i4_data_stride; + } + + /* Move to the ac addresses */ + s_quant_coeffs.pv_data = + ((WORD16 *) s_quant_coeffs.pv_data) + ps_quant_coeffs[0].i4_data_stride; + + s_quant_coeffs.i4_data_stride = 4; + + ps_enc_loop_fxns->apf_resi_trans_quant_4x4[u1_resi_trans_fxn_idx]( + &s_src, &s_pred, &s_quant_coeffs, &s_upsampled_res, &s_resi_trans_quant_constants, + &pu1_nnz[blk_cntr], ((WORD16 *) ps_quant_coeffs->pv_data) + blk_cntr, + u1_use_upsampled_res); + } + + if(!u4_dc_flag) + { + return; + } + + /* + * In case of i16x16, we need to remove the contribution of dc coeffs into + * nnz of each block. We are doing that in the packing function + */ + + /* Adjust pointers to point to dc values */ + s_quant_coeffs = ps_quant_coeffs[0]; + pu1_nnz--; + + u4_qbits++; + u4_round_factor <<= 1; + + ps_enc_loop_fxns->pf_hadamard_quant_4x4(((WORD16 *) s_quant_coeffs.pv_data), + ((WORD16 *) s_quant_coeffs.pv_data), + &s_resi_trans_quant_constants, &pu1_nnz[0]); +} + +/** +******************************************************************************* +* +* @brief +* This function performs the intra 16x16 inverse transform process for H264 +* it includes inverse Dc transform, inverse quant and then inverse transform +* +* @par Description: +* +* @param[in] pi2_src +* Input data, 16x16 size +* First 16 mem locations will have the Dc coffs in rater scan order in linear +*fashion after a stride 1st AC clock will be present again in raster can order +* Then each AC block of the 16x16 block will follow in raster scan order +* +* @param[in] pu1_pred +* The predicted data, 16x16 size +* Block by block form +* +* @param[in] pu1_out +* Output 16x16 +* In block by block form +* +* @param[in] i4_src_stride +* Source stride +* +* @param[in] i4_pred_stride +* input stride for prediction buffer +* +* @param[in] i4_out_stride +* input stride for output buffer +* +* @param[in] pu2_iscale_mat +* Inverse quantization matrix for 4x4 transform +* +* @param[in] pu2_weigh_mat +* weight matrix of 4x4 transform +* +* @param[in] u4_qp_div_6 +* QP/6 +* +* @param[in] pi4_tmp +* Input temporary buffer +* needs to be at least 20 in size +* +* @param[in] pu4_cntrl +* Controls the transform path +* total Last 17 bits are used +* the 16th th bit will correspond to DC block +* and 32-17 will correspond to the ac blocks in raster scan order +* bit equaling zero indicates that the entire 4x4 block is zero for DC +* For AC blocks a bit equaling zero will mean that all 15 AC coffs of the block +*is nonzero +* +* @param[in] pi4_tmp +* Input temporary buffer +* needs to be at least COFF_CNT_SUB_BLK_4x4+COFF_CNT_SUB_BLK_4x4 size +* +* @returns +* none +* +* @remarks +* The all zero case must be taken care outside +* +******************************************************************************* +*/ +void isvce_luma_16x16_idctrans_iquant_itrans_recon( + buffer_container_t *ps_src, buffer_container_t *ps_pred, buffer_container_t *ps_recon, + buffer_container_t *ps_res, buffer_container_t *ps_res_pred, + iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants, + isa_dependent_fxns_t *ps_isa_dependent_fxns, WORD32 *pi4_tmp, UWORD32 u4_cntrl, + UWORD32 u4_dc_trans_flag, UWORD8 u1_res_accumulate) +{ + /* Cntrl bits for 4x4 transforms + * u4_blk_cntrl : controls if a 4x4 block should be processed in ac path + * u4_dc_cntrl : controls is a 4x4 block is to be processed in dc path + * : dc block must contain only single dc coefficient + * u4_empty_blk_cntrl : control fot 4x4 block with no coeffs, ie no dc and ac + * : ie not (ac or dc) + */ + UWORD32 u4_blk_cntrl, u4_dc_cntrl, u4_empty_blk_cntrl; + UWORD32 u4_blk_id; + WORD32 i4_offset_x, i4_offset_y; + UWORD32 u4_dc_inc; + WORD16 *pi2_dc_src; + + enc_loop_fxns_t *ps_enc_loop_fxns = &ps_isa_dependent_fxns->s_enc_loop_fxns; + buffer_container_t s_src = ps_src[0]; + buffer_container_t s_pred = ps_pred[0]; + buffer_container_t s_recon = ps_recon[0]; + buffer_container_t s_res = ps_res[0]; + buffer_container_t s_res_pred = ps_res_pred[0]; + + /* Start index for inverse quant in a 4x4 block */ + WORD32 i4_iq_start_idx = (u4_dc_trans_flag == 0) ? 0 : 1; + const UWORD16 *pu2_iscale_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat; + const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat; + UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6; + UWORD8 u1_iq_it_recon_fxn_idx = + isvc_get_iq_it_recon_variant_idx(!!u4_dc_trans_flag, u1_res_accumulate); + + /* + * For intra blocks we need to do inverse dc transform + * In case if intra blocks, its here that we populate the dc bits in cntrl + * as they cannot be populated any earlier + */ + if(u4_dc_trans_flag) + { + UWORD32 cntr, u4_dc_cntrl; + + /* Do inv hadamard and place the results at the start of each AC block */ + ps_enc_loop_fxns->pf_ihadamard_scaling_4x4(ps_src->pv_data, ps_src->pv_data, pu2_iscale_mat, + pu2_weigh_mat, u4_qp_div_6, pi4_tmp); + + /* Update the cntrl flag */ + u4_dc_cntrl = 0; + + for(cntr = 0; cntr < DC_COEFF_CNT_LUMA_MB; cntr++) + { + u4_dc_cntrl |= ((((WORD16 *) ps_src->pv_data)[cntr] != 0) << (15 - cntr)); + } + + /* Mark dc bits as 1 if corresponding ac bit is 0 */ + u4_dc_cntrl = (~(u4_cntrl >> 16) & u4_dc_cntrl); + + /* Combine both ac and dc bits */ + u4_cntrl = (u4_cntrl & CNTRL_FLAG_AC_MASK_LUMA) | (u4_dc_cntrl & CNTRL_FLAG_DC_MASK_LUMA); + } + + /* Source for dc coeffs + * If the block is intra, we have to read dc values from first row of src + * then stride for each block is 1, other wise its src stride + */ + pi2_dc_src = ((WORD16 *) ps_src->pv_data) + (i4_iq_start_idx == 0) * ps_src->i4_data_stride; + u4_dc_inc = (i4_iq_start_idx == 0) ? ps_src->i4_data_stride : 1; + + /* Get the block bits */ + u4_blk_cntrl = (u4_cntrl & CNTRL_FLAG_AC_MASK_LUMA); + u4_dc_cntrl = (u4_cntrl & CNTRL_FLAG_DC_MASK_LUMA) << 16; + u4_empty_blk_cntrl = (~(u4_dc_cntrl | u4_blk_cntrl)) & 0xFFFF0000; + + /* Get first block to process */ + DEQUEUE_BLKID_FROM_CONTROL(u4_dc_cntrl, u4_blk_id); + + while(u4_blk_id < NUM_LUMA4x4_BLOCKS_IN_MB) + { + /* Compute address of src blocks */ + WORD32 i4_src_offset = u4_dc_inc * u4_blk_id; + + /* Tx blk coeffs are stored blk by blk */ + /* Hence, in order to access rows of each Tx blk, one needs to stride of + * TxxSize */ + s_src.i4_data_stride = 4; + s_src.pv_data = pi2_dc_src + i4_src_offset; + + IND2SUB_LUMA_MB(u4_blk_id, i4_offset_x, i4_offset_y); + + /* Compute address of out and pred blocks */ + s_pred.pv_data = + ((UWORD8 *) ps_pred->pv_data) + i4_offset_x + i4_offset_y * ps_pred->i4_data_stride; + s_recon.pv_data = + ((UWORD8 *) ps_recon->pv_data) + i4_offset_x + i4_offset_y * ps_recon->i4_data_stride; + s_res.pv_data = + ((WORD16 *) ps_res->pv_data) + i4_offset_x + i4_offset_y * ps_res->i4_data_stride; + s_res_pred.pv_data = ((WORD16 *) ps_res_pred->pv_data) + i4_offset_x + + i4_offset_y * ps_res_pred->i4_data_stride; + + ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4_dc[u1_iq_it_recon_fxn_idx]( + &s_src, &s_pred, &s_res_pred, &s_res, &s_recon, ps_iq_it_res_rec_constants, NULL, + pi2_dc_src + i4_src_offset, i4_iq_start_idx, u1_res_accumulate); + + DEQUEUE_BLKID_FROM_CONTROL(u4_dc_cntrl, u4_blk_id); + } + + /* now process ac/mixed blocks */ + DEQUEUE_BLKID_FROM_CONTROL(u4_blk_cntrl, u4_blk_id); + while(u4_blk_id < NUM_LUMA4x4_BLOCKS_IN_MB) + { + IND2SUB_LUMA_MB(u4_blk_id, i4_offset_x, i4_offset_y); + + /* Tx blk coeffs are stored blk by blk */ + /* Hence, in order to access rows of each Tx blk, one needs to stride of + * TxxSize */ + s_src.i4_data_stride = 4; + /* The AC blocks starts from 2nd row */ + s_src.pv_data = ((WORD16 *) ps_src->pv_data) + (u4_blk_id + 1) * ps_src->i4_data_stride; + + s_pred.pv_data = + ((UWORD8 *) ps_pred->pv_data) + i4_offset_x + i4_offset_y * ps_pred->i4_data_stride; + s_recon.pv_data = + ((UWORD8 *) ps_recon->pv_data) + i4_offset_x + i4_offset_y * ps_recon->i4_data_stride; + s_res.pv_data = + ((WORD16 *) ps_res->pv_data) + i4_offset_x + i4_offset_y * ps_res->i4_data_stride; + s_res_pred.pv_data = ((WORD16 *) ps_res_pred->pv_data) + i4_offset_x + + i4_offset_y * ps_res_pred->i4_data_stride; + + ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4[u1_iq_it_recon_fxn_idx]( + &s_src, &s_pred, &s_res_pred, &s_res, &s_recon, ps_iq_it_res_rec_constants, + (WORD16 *) pi4_tmp, pi2_dc_src + u4_blk_id, i4_iq_start_idx, u1_res_accumulate); + + DEQUEUE_BLKID_FROM_CONTROL(u4_blk_cntrl, u4_blk_id); + } + + /* Now process empty blocks */ + DEQUEUE_BLKID_FROM_CONTROL(u4_empty_blk_cntrl, u4_blk_id); + while(u4_blk_id < NUM_LUMA4x4_BLOCKS_IN_MB) + { + IND2SUB_LUMA_MB(u4_blk_id, i4_offset_x, i4_offset_y); + + /* Tx blk coeffs are stored blk by blk */ + /* Hence, in order to access rows of each Tx blk, one needs to stride of + * TxxSize */ + s_src.i4_data_stride = 4; + /* The AC blocks starts from 2nd row */ + s_src.pv_data = ((WORD16 *) ps_src->pv_data) + (u4_blk_id + 1) * ps_src->i4_data_stride; + + s_pred.pv_data = + ((UWORD8 *) ps_pred->pv_data) + i4_offset_x + i4_offset_y * ps_pred->i4_data_stride; + s_recon.pv_data = + ((UWORD8 *) ps_recon->pv_data) + i4_offset_x + i4_offset_y * ps_recon->i4_data_stride; + s_res.pv_data = + ((WORD16 *) ps_res->pv_data) + i4_offset_x + i4_offset_y * ps_res->i4_data_stride; + s_res_pred.pv_data = ((WORD16 *) ps_res_pred->pv_data) + i4_offset_x + + i4_offset_y * ps_res_pred->i4_data_stride; + + ps_enc_loop_fxns->pf_zcbf_iquant_itrans_recon_4x4( + &s_src, &s_pred, &s_res_pred, &s_res, &s_recon, ps_iq_it_res_rec_constants, NULL, + pi2_dc_src + u4_blk_id, i4_iq_start_idx, u1_res_accumulate); + + DEQUEUE_BLKID_FROM_CONTROL(u4_empty_blk_cntrl, u4_blk_id); + } +} + +/** +******************************************************************************* +* +* @brief +* This function performs does the DCT transform then Hadamard transform +* and quantization for a chroma macroblock +* +* @par Description: +* First cf4 is done on all 16 4x4 blocks of the 8x8input block +* Then hadamard transform is done on the DC coefficients +* Quantization is then performed on the 8x8 block, 4x4 wise +* +* @param[in] pu1_src +* Pointer to source sub-block +* The input is in interleaved format for two chroma planes +* +* @param[in] pu1_pred +* Pointer to prediction sub-block +* Prediction is in inter leaved format +* +* @param[in] pi2_out +* Pointer to residual sub-block +* The output will be in linear format +* The first 4 continuous locations will contain the values of DC block for U +* and then next 4 will contain for V. +* After DC block and a stride 1st AC block of U plane will follow +* After one more stride next AC block of V plane will follow +* The blocks will be in raster scan order +* +* After all the AC blocks of U plane AC blocks of V plane will follow in exact +* same way +* +* @param[in] i4_src_stride +* Source stride +* +* @param[in] i4_pred_stride +* Prediction stride +* +* @param[in] dst_strd +* Destination stride +* +* @param[in] pu2_scale_matrix +* The quantization matrix for 4x4 transform +* +* @param[in] pu2_threshold_matrix +* Threshold matrix +* +* @param[in] u4_qbits +* 15+QP/6 +* +* @param[in] u4_round_factor +* Round factor for quant +* +* @param[out] pu1_nnz +* Memory to store the non-zeros after transform +* The first byte will be the nnz od DC block for U plane +* From the next byte the AC nnzs will be storerd in raster scan order +* The fifth byte will be nnz of Dc block of V plane +* Then Ac blocks will follow +* +* @param u4_dc_flag +* Signals if Dc transform is to be done or not +* 1 -> Dc transform will be done +* 0 -> Dc transform will not be done +* +* @remarks +* +******************************************************************************* +*/ +void isvce_chroma_8x8_resi_trans_dctrans_quant( + buffer_container_t *ps_src, buffer_container_t *ps_pred, buffer_container_t *ps_quant_coeffs, + buffer_container_t *ps_upsampled_res, isa_dependent_fxns_t *ps_isa_dependent_fxns, + const UWORD16 *pu2_scale_matrix, const UWORD16 *pu2_threshold_matrix, UWORD8 *pu1_nnz, + UWORD32 u4_qbits, UWORD32 u4_round_factor, UWORD8 u1_use_upsampled_res) +{ + WORD32 blk_cntr; + WORD32 i4_offsetx, i4_offsety; + UWORD8 au1_dcnnz[2]; + + enc_loop_fxns_t *ps_enc_loop_fxns = &ps_isa_dependent_fxns->s_enc_loop_fxns; + buffer_container_t s_src = ps_src[0]; + buffer_container_t s_pred = ps_pred[0]; + buffer_container_t s_quant_coeffs = ps_quant_coeffs[0]; + buffer_container_t s_upsampled_res = {0}; + resi_trans_quant_constants_t s_resi_trans_quant_constants = { + .pu2_scale_matrix = pu2_scale_matrix, + .pu2_threshold_matrix = pu2_threshold_matrix, + .u4_qbits = u4_qbits, + .u4_round_factor = u4_round_factor}; + + UWORD8 u1_resi_trans_fxn_idx = isvc_get_resi_trans_quant_variant_idx(u1_use_upsampled_res); + + if(u1_use_upsampled_res) + { + s_upsampled_res = ps_upsampled_res[0]; + } + + /* Move to the ac addresses */ + pu1_nnz++; + + for(blk_cntr = 0; blk_cntr < NUM_CHROMA4x4_BLOCKS_IN_MB; blk_cntr++) + { + IND2SUB_CHROMA_MB(blk_cntr, i4_offsetx, i4_offsety); + + s_src.pv_data = + ((UWORD8 *) ps_src[0].pv_data) + i4_offsetx + i4_offsety * ps_src[0].i4_data_stride; + s_pred.pv_data = + ((UWORD8 *) ps_pred[0].pv_data) + i4_offsetx + i4_offsety * ps_pred[0].i4_data_stride; + s_quant_coeffs.pv_data = + ((WORD16 *) ps_quant_coeffs[0].pv_data) + blk_cntr * ps_quant_coeffs[0].i4_data_stride; + + if(u1_use_upsampled_res) + { + s_upsampled_res.pv_data = ((WORD16 *) ps_upsampled_res[0].pv_data) + i4_offsetx + + i4_offsety * ps_upsampled_res[0].i4_data_stride; + } + + /* Move to the ac addresses */ + s_quant_coeffs.pv_data = + ((WORD16 *) s_quant_coeffs.pv_data) + ps_quant_coeffs[0].i4_data_stride; + + s_quant_coeffs.i4_data_stride = 4; + + /* For chroma, v plane nnz is populated from position 5 */ + ps_enc_loop_fxns->apf_resi_trans_quant_chroma_4x4[u1_resi_trans_fxn_idx]( + &s_src, &s_pred, &s_quant_coeffs, &s_upsampled_res, &s_resi_trans_quant_constants, + &pu1_nnz[blk_cntr + (blk_cntr > 3)], ((WORD16 *) ps_quant_coeffs->pv_data) + blk_cntr, + u1_use_upsampled_res); + } + + /* Adjust pointers to point to dc values */ + s_quant_coeffs = ps_quant_coeffs[0]; + pu1_nnz--; + + s_resi_trans_quant_constants.u4_qbits++; + s_resi_trans_quant_constants.u4_round_factor <<= 1; + + ps_enc_loop_fxns->pf_hadamard_quant_2x2_uv(((WORD16 *) ps_quant_coeffs->pv_data), + ((WORD16 *) ps_quant_coeffs->pv_data), + &s_resi_trans_quant_constants, au1_dcnnz); + + /* Copy the dc nnzs */ + pu1_nnz[0] = au1_dcnnz[0]; + pu1_nnz[5] = au1_dcnnz[1]; +} + +/** +******************************************************************************* +* @brief +* This function performs the inverse transform with process for chroma MB of +*H264 +* +* @par Description: +* Does inverse DC transform ,inverse quantization inverse transform +* +* @param[in] pi2_src +* Input data, 16x16 size +* The input is in the form of, first 4 locations will contain DC coeffs of +* U plane, next 4 will contain DC coeffs of V plane, then AC blocks of U plane +* in raster scan order will follow, each block as linear array in raster scan +*order. After a stride next AC block will follow. After all AC blocks of U plane +* V plane AC blocks will follow in exact same order. +* +* @param[in] pu1_pred +* The predicted data, 8x16 size, U and V interleaved +* +* @param[in] pu1_out +* Output 8x16, U and V interleaved +* +* @param[in] i4_src_stride +* Source stride +* +* @param[in] i4_pred_stride +* input stride for prediction buffer +* +* @param[in] i4_out_stride +* input stride for output buffer +* +* @param[in] pu2_iscale_mat +* Inverse quantization martix for 4x4 transform +* +* @param[in] pu2_weigh_mat +* weight matrix of 4x4 transform +* +* @param[in] u4_qp_div_6 +* QP/6 +* +* @param[in] pi4_tmp +* Input temporary buffer +* needs to be at least COFF_CNT_SUB_BLK_4x4 + Number of Dc cofss for chroma * +*number of planes in size +* +* @param[in] pu4_cntrl +* Controls the transform path +* the 15 th bit will correspond to DC block of U plane , 14th will indicate the +*V plane Dc block 32-28 bits will indicate AC blocks of U plane in raster scan +*order 27-23 bits will indicate AC blocks of V plane in rater scan order The bit +*1 implies that there is at least one non zero coeff in a block +* +* @returns +* none +* +* @remarks +******************************************************************************* +*/ +void isvce_chroma_8x8_idctrans_iquant_itrans_recon( + buffer_container_t *ps_src, buffer_container_t *ps_pred, buffer_container_t *ps_recon, + buffer_container_t *ps_res, buffer_container_t *ps_res_pred, + iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants, + isa_dependent_fxns_t *ps_isa_dependent_fxns, WORD32 *pi4_tmp, UWORD32 u4_cntrl, + UWORD8 u1_res_accumulate) +{ + /* Cntrl bits for 4x4 transforms + * u4_blk_cntrl : controls if a 4x4 block should be processed in ac path + * u4_dc_cntrl : controls is a 4x4 block is to be processed in dc path + * : dc block must contain only single dc coefficient + * u4_empty_blk_cntrl : control fot 4x4 block with no coeffs, ie no dc and ac + * : ie not (ac or dc) + */ + UWORD32 u4_blk_cntrl, u4_dc_cntrl, u4_empty_blk_cntrl; + WORD32 u4_blk_id; + WORD32 i4_offset_x, i4_offset_y; + WORD16 *pi2_dc_src; + /* Increment for dc block */ + WORD32 i4_dc_inc; + + enc_loop_fxns_t *ps_enc_loop_fxns = &ps_isa_dependent_fxns->s_enc_loop_fxns; + buffer_container_t s_src = ps_src[0]; + buffer_container_t s_pred = ps_pred[0]; + buffer_container_t s_recon = ps_recon[0]; + buffer_container_t s_res = ps_res[0]; + buffer_container_t s_res_pred = ps_res_pred[0]; + + WORD16 i2_zero = 0; + const UWORD16 *pu2_iscale_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat; + const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat; + UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6; + UWORD8 u1_iq_it_recon_fxn_idx = isvc_get_iq_it_recon_variant_idx(0, u1_res_accumulate); + + /* + * Lets do the inverse transform for dc coeffs in chroma + */ + if(u4_cntrl & CNTRL_FLAG_DCBLK_MASK_CHROMA) + { + UWORD32 cntr, u4_dc_cntrl; + /* Do inv hadamard for u an v block */ + + ps_enc_loop_fxns->pf_ihadamard_scaling_2x2_uv(s_src.pv_data, s_src.pv_data, pu2_iscale_mat, + pu2_weigh_mat, u4_qp_div_6, NULL); + /* + * Update the cntrl flag + * Flag is updated as follows bits 15-11 -> u block dc bits + */ + u4_dc_cntrl = 0; + for(cntr = 0; cntr < 8; cntr++) + { + u4_dc_cntrl |= ((((WORD16 *) ps_src->pv_data)[cntr] != 0) << (15 - cntr)); + } + + /* Mark dc bits as 1 if corresponding ac bit is 0 */ + u4_dc_cntrl = (~(u4_cntrl >> 16) & u4_dc_cntrl); + /* Combine both ac and dc bits */ + u4_cntrl = + (u4_cntrl & CNTRL_FLAG_AC_MASK_CHROMA) | (u4_dc_cntrl & CNTRL_FLAG_DC_MASK_CHROMA); + + /* Since we populated the dc coffs, we have to read them from there */ + pi2_dc_src = ((WORD16 *) ps_src->pv_data); + i4_dc_inc = 1; + } + else + { + u4_cntrl = u4_cntrl & CNTRL_FLAG_AC_MASK_CHROMA; + pi2_dc_src = &i2_zero; + i4_dc_inc = 0; + } + + /* Get the block bits */ + u4_blk_cntrl = (u4_cntrl & CNTRL_FLAG_AC_MASK_CHROMA); + u4_dc_cntrl = (u4_cntrl & CNTRL_FLAG_DC_MASK_CHROMA) << 16; + u4_empty_blk_cntrl = (~(u4_dc_cntrl | u4_blk_cntrl)) & 0xFF000000; + + DEQUEUE_BLKID_FROM_CONTROL(u4_dc_cntrl, u4_blk_id); + + while(u4_blk_id < 8) + { + WORD32 dc_src_offset = u4_blk_id * i4_dc_inc; + + /* Tx blk coeffs are stored blk by blk */ + /* Hence, in order to access rows of each Tx blk, one needs to stride of + * TxxSize */ + s_src.i4_data_stride = 4; + s_src.pv_data = pi2_dc_src + dc_src_offset; + + IND2SUB_CHROMA_MB(u4_blk_id, i4_offset_x, i4_offset_y); + + s_pred.pv_data = + ((UWORD8 *) ps_pred->pv_data) + i4_offset_x + i4_offset_y * ps_pred->i4_data_stride; + s_recon.pv_data = + ((UWORD8 *) ps_recon->pv_data) + i4_offset_x + i4_offset_y * ps_recon->i4_data_stride; + s_res.pv_data = + ((WORD16 *) ps_res->pv_data) + i4_offset_x + i4_offset_y * ps_res->i4_data_stride; + s_res_pred.pv_data = ((WORD16 *) ps_res_pred->pv_data) + i4_offset_x + + i4_offset_y * ps_res_pred->i4_data_stride; + + ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4_dc[u1_iq_it_recon_fxn_idx]( + &s_src, &s_pred, &s_res_pred, &s_res, &s_recon, ps_iq_it_res_rec_constants, NULL, + s_src.pv_data, 0, u1_res_accumulate); + + /* Get next DC block to process */ + DEQUEUE_BLKID_FROM_CONTROL(u4_dc_cntrl, u4_blk_id); + } + + /* now process ac/mixed blocks */ + DEQUEUE_BLKID_FROM_CONTROL(u4_blk_cntrl, u4_blk_id); + while(u4_blk_id < 8) + { + WORD32 dc_src_offset = i4_dc_inc * u4_blk_id; + + /* Tx blk coeffs are stored blk by blk */ + /* Hence, in order to access rows of each Tx blk, one needs to stride of + * TxxSize */ + s_src.i4_data_stride = 4; + /* The AC blocks starts from 2nd row */ + s_src.pv_data = ((WORD16 *) ps_src->pv_data) + (u4_blk_id + 1) * ps_src->i4_data_stride; + + IND2SUB_CHROMA_MB(u4_blk_id, i4_offset_x, i4_offset_y); + + s_pred.pv_data = + ((UWORD8 *) ps_pred->pv_data) + i4_offset_x + i4_offset_y * ps_pred->i4_data_stride; + s_recon.pv_data = + ((UWORD8 *) ps_recon->pv_data) + i4_offset_x + i4_offset_y * ps_recon->i4_data_stride; + s_res.pv_data = + ((WORD16 *) ps_res->pv_data) + i4_offset_x + i4_offset_y * ps_res->i4_data_stride; + s_res_pred.pv_data = ((WORD16 *) ps_res_pred->pv_data) + i4_offset_x + + i4_offset_y * ps_res_pred->i4_data_stride; + + ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4[u1_iq_it_recon_fxn_idx]( + &s_src, &s_pred, &s_res_pred, &s_res, &s_recon, ps_iq_it_res_rec_constants, + (WORD16 *) pi4_tmp, pi2_dc_src + dc_src_offset, 0, u1_res_accumulate); + + DEQUEUE_BLKID_FROM_CONTROL(u4_blk_cntrl, u4_blk_id); + } + + /* Now process empty blocks */ + DEQUEUE_BLKID_FROM_CONTROL(u4_empty_blk_cntrl, u4_blk_id); + + while(u4_blk_id < 8) + { + WORD32 dc_src_offset = i4_dc_inc * u4_blk_id; + + IND2SUB_CHROMA_MB(u4_blk_id, i4_offset_x, i4_offset_y); + + /* Tx blk coeffs are stored blk by blk */ + /* Hence, in order to access rows of each Tx blk, one needs to stride of + * TxxSize */ + s_src.i4_data_stride = 4; + /* The AC blocks starts from 2nd row */ + s_src.pv_data = ((WORD16 *) ps_src->pv_data) + (u4_blk_id + 1) * ps_src->i4_data_stride; + + s_pred.pv_data = + ((UWORD8 *) ps_pred->pv_data) + i4_offset_x + i4_offset_y * ps_pred->i4_data_stride; + s_recon.pv_data = + ((UWORD8 *) ps_recon->pv_data) + i4_offset_x + i4_offset_y * ps_recon->i4_data_stride; + s_res.pv_data = + ((WORD16 *) ps_res->pv_data) + i4_offset_x + i4_offset_y * ps_res->i4_data_stride; + s_res_pred.pv_data = ((WORD16 *) ps_res_pred->pv_data) + i4_offset_x + + i4_offset_y * ps_res_pred->i4_data_stride; + + ps_enc_loop_fxns->pf_chroma_zcbf_iquant_itrans_recon_4x4( + &s_src, &s_pred, &s_res_pred, &s_res, &s_recon, ps_iq_it_res_rec_constants, + (WORD16 *) pi4_tmp, pi2_dc_src + dc_src_offset, 0, u1_res_accumulate); + + DEQUEUE_BLKID_FROM_CONTROL(u4_empty_blk_cntrl, u4_blk_id); + } +} + +/** +****************************************************************************** +* +* @brief This function packs residue of an i16x16 luma mb for entropy coding +* +* @par Description +* An i16 macro block contains two classes of units, dc 4x4 block and +* 4x4 ac blocks. while packing the mb, the dc block is sent first, and +* the 16 ac blocks are sent next in scan order. Each and every block is +* represented by 3 parameters (nnz, significant coefficient map and the +* residue coefficients itself). If a 4x4 unit does not have any coefficients +* then only nnz is sent. Inside a 4x4 block the individual coefficients are +* sent in scan order. +* +* The first byte of each block will be nnz of the block, if it is non zero, +* a 2 byte significance map is sent. This is followed by nonzero coefficients. +* This is repeated for 1 dc + 16 ac blocks. +* +* @param[in] pi2_res_mb +* pointer to residue mb +* +* @param[in, out] pv_mb_coeff_data +* buffer pointing to packed residue coefficients +* +* @param[in] u4_res_strd +* residual block stride +* +* @param[out] u1_cbp_l +* coded block pattern luma +* +* @param[in] pu1_nnz +* number of non zero coefficients in each 4x4 unit +* +* @param[out] +* Control signal for inverse transform of 16x16 blocks +* +* @return none +* +* @ remarks +* +****************************************************************************** +*/ +void isvce_pack_l_mb_i16(WORD16 *pi2_res_mb, void **pv_mb_coeff_data, WORD32 i4_res_strd, + UWORD8 *u1_cbp_l, UWORD8 *pu1_nnz, UWORD32 *pu4_cntrl) +{ + /* pointer to packed sub block buffer space */ + tu_sblk_coeff_data_t *ps_mb_coeff_data = (*pv_mb_coeff_data), *ps_mb_coeff_data_ac; + + /* no of non zero coefficients in the current sub block */ + UWORD32 u4_nnz_cnt; + + /* significant coefficient map */ + UWORD32 u4_s_map; + + /* pointer to scanning matrix */ + const UWORD8 *pu1_scan_order; + + /* number of non zeros in sub block */ + UWORD32 u4_nnz; + + /* coeff scan order */ + const UWORD8 u1_scan_order[16] = {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15}; + + /* temp var */ + UWORD32 coeff_cnt, mask, b4, u4_cntrl = 0; + + /*DC and AC coeff pointers*/ + WORD16 *pi2_res_mb_ac, *pi2_res_mb_dc; + + /********************************************************/ + /* pack dc coeff data for entropy coding */ + /********************************************************/ + + pi2_res_mb_dc = pi2_res_mb; + pu1_scan_order = gu1_luma_scan_order_dc; + + u4_nnz = *pu1_nnz; + u4_cntrl = 0; + + /* write number of non zero coefficients */ + ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz; + + if(u4_nnz) + { + for(u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz; coeff_cnt++) + { + if(pi2_res_mb_dc[pu1_scan_order[coeff_cnt]]) + { + /* write residue */ + ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = + pi2_res_mb_dc[pu1_scan_order[coeff_cnt]]; + u4_s_map |= mask; + } + mask <<= 1; + } + /* write significant coeff map */ + ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16); + (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + ALIGN2(u4_nnz_cnt); + + u4_cntrl = 0x00008000; // Set DC bit in ctrl code + } + else + { + (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue; + } + + /********************************************************/ + /* pack ac coeff data for entropy coding */ + /********************************************************/ + + pu1_nnz++; + pu1_scan_order = gu1_luma_scan_order; + pi2_res_mb += i4_res_strd; /*Move to AC block*/ + + ps_mb_coeff_data_ac = (*pv_mb_coeff_data); + + for(b4 = 0; b4 < 16; b4++) + { + ps_mb_coeff_data = (*pv_mb_coeff_data); + + u4_nnz = pu1_nnz[u1_scan_order[b4]]; + + /* Jump according to the scan order */ + pi2_res_mb_ac = pi2_res_mb + (i4_res_strd * u1_scan_order[b4]); + + /* + * Since this is a i16x16 block, we should not count dc coeff on indi + * vidual 4x4 blocks to nnz. But due to the implementation of 16x16 + * trans function, we add dc's nnz to u4_nnz too. Hence we adjust that + * here + */ + u4_nnz -= (pi2_res_mb_ac[0] != 0); + + /* write number of non zero coefficients */ + ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz; + + if(u4_nnz) + { + for(u4_nnz_cnt = 0, coeff_cnt = 1, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz; + coeff_cnt++) + { + if(pi2_res_mb_ac[pu1_scan_order[coeff_cnt]]) + { + /* write residue */ + ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = + pi2_res_mb_ac[pu1_scan_order[coeff_cnt]]; + u4_s_map |= mask; + } + mask <<= 1; + } + /* write significant coeff map */ + ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16); + (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + ALIGN2(u4_nnz_cnt); + *u1_cbp_l = 15; + + u4_cntrl |= (1 << (31 - u1_scan_order[b4])); + } + else + { + (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue; + } + } + + if(!(*u1_cbp_l)) + { + (*pv_mb_coeff_data) = ps_mb_coeff_data_ac; + } + + /* Store the cntrl signal */ + (*pu4_cntrl) = u4_cntrl; + return; +} + +/** +****************************************************************************** +* +* @brief This function packs residue of an p16x16 luma mb for entropy coding +* +* @par Description +* A p16x16 macro block contains two classes of units 16 4x4 ac blocks. +* while packing the mb, the dc block is sent first, and +* the 16 ac blocks are sent next in scan order. Each and every block is +* represented by 3 parameters (nnz, significant coefficient map and the +* residue coefficients itself). If a 4x4 unit does not have any coefficients +* then only nnz is sent. Inside a 4x4 block the individual coefficients are +* sent in scan order. +* +* The first byte of each block will be nnz of the block, if it is non zero, +* a 2 byte significance map is sent. This is followed by nonzero coefficients. +* This is repeated for 1 dc + 16 ac blocks. +* +* @param[in] pi2_res_mb +* pointer to residue mb +* +* @param[in, out] pv_mb_coeff_data +* buffer pointing to packed residue coefficients +* +* @param[in] i4_res_strd +* residual block stride +* +* @param[out] u1_cbp_l +* coded block pattern luma +* +* @param[in] pu1_nnz +* number of non zero coefficients in each 4x4 unit +* +* @param[out] pu4_cntrl +* Control signal for inverse transform +* +* @return none +* +* @remarks Killing coffs not yet coded +* +****************************************************************************** +*/ +void isvce_pack_l_mb(WORD16 *pi2_res_mb, void **pv_mb_coeff_data, WORD32 i4_res_strd, + UWORD8 *u1_cbp_l, UWORD8 *pu1_nnz, UWORD32 u4_thres_resi, UWORD32 *pu4_cntrl) +{ + /* pointer to packed sub block buffer space */ + tu_sblk_coeff_data_t *ps_mb_coeff_data, *ps_mb_coeff_data_b8, *ps_mb_coeff_data_mb; + + /* no of non zero coefficients in the current sub block */ + UWORD32 u4_nnz_cnt; + + /* significant coefficient map */ + UWORD32 u4_s_map; + + /* pointer to scanning matrix */ + const UWORD8 *pu1_scan_order = gu1_luma_scan_order; + + /* number of non zeros in sub block */ + UWORD32 u4_nnz; + + /* pointer to residual sub block */ + WORD16 *pi2_res_sb; + + /* coeff scan order */ + const UWORD8 u1_scan_order[16] = {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15}; + + /* coeff cost */ + const UWORD8 *pu1_coeff_cost = gu1_coeff_cost; + + /* temp var */ + UWORD32 u4_mb_coeff_cost = 0, u4_b8_coeff_cost = 0, coeff_cnt, mask, u4_cntrl = 0, b4, b8; + + /* temp var */ + WORD32 i4_res_val, i4_run = -1, dcac_block; + + /* When Hadamard transform is disabled, first row values are dont care, ignore + * them */ + pi2_res_mb += i4_res_strd; + + /* When Hadamard transform is disabled, first unit value is dont care, ignore + * this */ + pu1_nnz++; + + ps_mb_coeff_data_mb = ps_mb_coeff_data_b8 = (*pv_mb_coeff_data); + + /********************************************************/ + /* pack coeff data for entropy coding */ + /********************************************************/ + + for(b4 = 0; b4 < 16; b4++) + { + ps_mb_coeff_data = (*pv_mb_coeff_data); + + b8 = b4 >> 2; + + u4_nnz = pu1_nnz[u1_scan_order[b4]]; + + /* Jump according to the scan order */ + pi2_res_sb = pi2_res_mb + (i4_res_strd * u1_scan_order[b4]); + + /* write number of non zero coefficients */ + ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz; + + if(u4_nnz) + { + for(u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz; + coeff_cnt++) + { + /* number of runs of zero before, this is used to compute coeff cost */ + i4_run++; + + i4_res_val = pi2_res_sb[pu1_scan_order[coeff_cnt]]; + + if(i4_res_val) + { + /* write residue */ + ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = i4_res_val; + u4_s_map |= mask; + + if(u4_thres_resi) + { + /* compute coeff cost */ + if(i4_res_val == 1 || i4_res_val == -1) + { + if(i4_run < 6) u4_b8_coeff_cost += pu1_coeff_cost[i4_run]; + } + else + u4_b8_coeff_cost += 9; + + i4_run = -1; + } + } + + mask <<= 1; + } + + /* write significant coeff map */ + ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16); + (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + ALIGN2(u4_nnz_cnt); + + /* cbp */ + *u1_cbp_l |= (1 << b8); + + /* Cntrl map for inverse transform computation + * + * If coeff_cnt is zero, it means that only nonzero was a dc coeff + * Hence we have to set the 16 - u1_scan_order[b4]) position instead + * of 31 - u1_scan_order[b4] + */ + dcac_block = (coeff_cnt == 0) ? 16 : 31; + u4_cntrl |= (1 << (dcac_block - u1_scan_order[b4])); + } + else + { + (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue; + } + + /* Decide if the 8x8 unit has to be sent for entropy coding? */ + if((b4 + 1) % 4 == 0) + { + if(u4_thres_resi && (u4_b8_coeff_cost <= LUMA_SUB_BLOCK_SKIP_THRESHOLD) && + (*u1_cbp_l & (1 << b8))) + { + /* + * When we want to reset the full 8x8 block, we have to reset + * both the dc and ac coeff bits hence we have the symmetric + * arrangement of bits + */ + const UWORD32 cntrl_mask_map[4] = {0xcc00cc00, 0x33003300, 0x00cc00cc, 0x00330033}; + + /* restore cbp */ + *u1_cbp_l = (*u1_cbp_l & (~(1 << b8))); + + /* correct cntrl flag */ + u4_cntrl = u4_cntrl & (~cntrl_mask_map[(b4 >> 2)]); + + /* correct nnz */ + pu1_nnz[u1_scan_order[b4 - 3]] = 0; + pu1_nnz[u1_scan_order[b4 - 2]] = 0; + pu1_nnz[u1_scan_order[b4 - 1]] = 0; + pu1_nnz[u1_scan_order[b4]] = 0; + + /* reset blk cost */ + u4_b8_coeff_cost = 0; + } + + if(!(*u1_cbp_l & (1 << b8))) + { + (*pv_mb_coeff_data) = ps_mb_coeff_data_b8; + } + + u4_mb_coeff_cost += u4_b8_coeff_cost; + + u4_b8_coeff_cost = 0; + i4_run = -1; + ps_mb_coeff_data_b8 = (*pv_mb_coeff_data); + } + } + + if(u4_thres_resi && (u4_mb_coeff_cost <= LUMA_BLOCK_SKIP_THRESHOLD) && (*u1_cbp_l)) + { + (*pv_mb_coeff_data) = ps_mb_coeff_data_mb; + *u1_cbp_l = 0; + u4_cntrl = 0; + memset(pu1_nnz, 0, 16); + } + + (*pu4_cntrl) = u4_cntrl; + + return; +} + +/** +****************************************************************************** +* +* @brief This function packs residue of an i8x8 chroma mb for entropy coding +* +* @par Description +* An i8 chroma macro block contains two classes of units, dc 2x2 block and +* 4x4 ac blocks. while packing the mb, the dc block is sent first, and +* the 4 ac blocks are sent next in scan order. Each and every block is +* represented by 3 parameters (nnz, significant coefficient map and the +* residue coefficients itself). If a 4x4 unit does not have any coefficients +* then only nnz is sent. Inside a 4x4 block the individual coefficients are +* sent in scan order. +* +* The first byte of each block will be nnz of the block, if it is non zero, +* a 2 byte significance map is sent. This is followed by nonzero coefficients. +* This is repeated for 1 dc + 4 ac blocks. +* +* @param[in] pi2_res_mb +* pointer to residue mb +* +* @param[in, out] pv_mb_coeff_data +* buffer pointing to packed residue coefficients +* +* @param[in] u4_res_strd +* residual block stride +* +* @param[out] u1_cbp_c +* coded block pattern chroma +* +* @param[in] pu1_nnz +* number of non zero coefficients in each 4x4 unit +* +* @param[out] pu1_nnz +* Control signal for inverse transform +* +* @param[in] u4_swap_uv +* Swaps the order of U and V planes in entropy bitstream +* +* @return none +* +* @ remarks +* +****************************************************************************** +*/ +void isvce_pack_c_mb(WORD16 *pi2_res_mb, void **pv_mb_coeff_data, WORD32 i4_res_strd, + UWORD8 *u1_cbp_c, UWORD8 *pu1_nnz, UWORD32 u4_thres_resi, UWORD32 *pu4_cntrl, + UWORD32 u4_swap_uv) +{ + /* pointer to packed sub block buffer space */ + tu_sblk_coeff_data_t *ps_mb_coeff_data = (*pv_mb_coeff_data); + tu_sblk_coeff_data_t *ps_mb_coeff_data_dc, *ps_mb_coeff_data_ac; + + /* nnz pointer */ + UWORD8 *pu1_nnz_ac, *pu1_nnz_dc; + + /* nnz counter */ + UWORD32 u4_nnz_cnt; + + /* significant coefficient map */ + UWORD32 u4_s_map; + + /* pointer to scanning matrix */ + const UWORD8 *pu1_scan_order; + + /* no of non zero coefficients in the current sub block */ + UWORD32 u4_nnz; + + /* pointer to residual sub block, res val */ + WORD16 *pi2_res_sb, i2_res_val; + + /* temp var */ + UWORD32 coeff_cnt, mask, b4, plane; + + /* temp var */ + UWORD32 u4_coeff_cost; + WORD32 i4_run; + + /* coeff cost */ + const UWORD8 *pu1_coeff_cost = gu1_coeff_cost; + + /* pointer to packed buffer space */ + UWORD32 *pu4_mb_coeff_data = NULL; + + /* ac coded block pattern */ + UWORD8 u1_cbp_ac; + + /* Variable to store the current bit pos in cntrl variable*/ + UWORD32 cntrl_pos = 0; + + /********************************************************/ + /* pack dc coeff data for entropy coding */ + /********************************************************/ + pu1_scan_order = gu1_chroma_scan_order_dc; + pi2_res_sb = pi2_res_mb; + pu1_nnz_dc = pu1_nnz; + (*pu4_cntrl) = 0; + cntrl_pos = 15; + ps_mb_coeff_data_dc = (*pv_mb_coeff_data); + + /* Color space conversion between SP_UV and SP_VU + * We always assume SP_UV for all the processing + * Hence to get proper stream output we need to swap U and V channels here + * + * For that there are two paths we need to look for + * One is the path to bitstream , these variables should have the proper input + * configured UV or VU + * For the other path the inverse transform variables should have what ever + * ordering the input had + */ + + if(u4_swap_uv) + { + pu1_nnz_dc += 5; /* Move to NNZ of V planve */ + pi2_res_sb += 4; /* Move to DC coff of V plane */ + + cntrl_pos = 14; /* Control bit for V plane */ + } + + for(plane = 0; plane < 2; plane++) + { + ps_mb_coeff_data = (*pv_mb_coeff_data); + + u4_nnz = *pu1_nnz_dc; + /* write number of non zero coefficients U/V */ + ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz; + + if(u4_nnz) + { + for(u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz; + coeff_cnt++) + { + i2_res_val = pi2_res_sb[pu1_scan_order[coeff_cnt]]; + if(i2_res_val) + { + /* write residue U/V */ + ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = i2_res_val; + u4_s_map |= mask; + } + mask <<= 1; + } + /* write significant coeff map U/V */ + ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16); + (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + ALIGN2(u4_nnz_cnt); + *u1_cbp_c = 1; + + (*pu4_cntrl) |= (1 << cntrl_pos); + } + else + { + (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue; + } + + if(u4_swap_uv) + { + cntrl_pos++; /* Control bit for U plane */ + pu1_nnz_dc -= 5; /* Move to NNZ of U plane */ + pi2_res_sb -= 4; /* Move to DC coff of U plane */ + } + else + { + cntrl_pos--; /* Control bit for U plane */ + pu1_nnz_dc += 5; /* 4 for AC NNZ and 1 for DC */ + pi2_res_sb += 4; /* Move to DC coff of V plane */ + } + } + + /********************************************************/ + /* pack ac coeff data for entropy coding */ + /********************************************************/ + + pu1_scan_order = gu1_chroma_scan_order; + ps_mb_coeff_data_ac = (*pv_mb_coeff_data); + + if(u4_swap_uv) + { + pi2_res_sb = pi2_res_mb + i4_res_strd * 5; /* Move to V plane ,ie 1dc row+ 4 ac row */ + cntrl_pos = 27; /* The control bits are to be added for V bloc ie 31-4 th bit */ + pu1_nnz_ac = pu1_nnz + 6; /*Move the nnz to V block NNZ 1 dc + 1dc + 4 ac */ + } + else + { + pi2_res_sb = pi2_res_mb + i4_res_strd; /* Move to U plane ,ie 1dc row */ + cntrl_pos = 31; + pu1_nnz_ac = pu1_nnz + 1; /* Move the nnz to V block NNZ 1 dc */ + } + + for(plane = 0; plane < 2; plane++) + { + pu4_mb_coeff_data = (*pv_mb_coeff_data); + + u4_coeff_cost = 0; + i4_run = -1; + + /* get the current cbp, so that it automatically + * gets reverted in case of zero ac values */ + u1_cbp_ac = *u1_cbp_c; + + for(b4 = 0; b4 < 4; b4++) + { + ps_mb_coeff_data = (*pv_mb_coeff_data); + + u4_nnz = *pu1_nnz_ac; + + /* + * We are scanning only ac coeffs, but the nnz is for the + * complete 4x4 block. Hence we have to discount the nnz contributed + * by the dc coefficient + */ + u4_nnz -= (pi2_res_sb[0] != 0); + + /* write number of non zero coefficients U/V */ + ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz; + + if(u4_nnz) + { + for(u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz; + coeff_cnt++) + { + i2_res_val = pi2_res_sb[pu1_scan_order[coeff_cnt]]; + + i4_run++; + + if(i2_res_val) + { + /* write residue U/V */ + ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = i2_res_val; + u4_s_map |= mask; + + if(u4_thres_resi && (u4_coeff_cost < CHROMA_BLOCK_SKIP_THRESHOLD)) + { + /* compute coeff cost */ + if(i2_res_val == 1 || i2_res_val == -1) + { + if(i4_run < 6) u4_coeff_cost += pu1_coeff_cost[i4_run]; + } + else + u4_coeff_cost += 9; + + i4_run = -1; + } + } + mask <<= 1; + } + + /* write significant coeff map U/V */ + ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16); + (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + ALIGN2(u4_nnz_cnt); + u1_cbp_ac = 2; + + (*pu4_cntrl) |= 1 << cntrl_pos; + } + else + { + (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue; + } + + pu1_nnz_ac++; + pi2_res_sb += i4_res_strd; + cntrl_pos--; + } + + /* reset block */ + if(u4_thres_resi && (u4_coeff_cost < CHROMA_BLOCK_SKIP_THRESHOLD)) + { + pu4_mb_coeff_data[0] = 0; + pu4_mb_coeff_data[1] = 0; + pu4_mb_coeff_data[2] = 0; + pu4_mb_coeff_data[3] = 0; + (*pv_mb_coeff_data) = pu4_mb_coeff_data + 4; + + /* Generate the control signal */ + /* Zero out the current plane's AC coefficients */ + (*pu4_cntrl) &= ((plane == u4_swap_uv) ? 0x0FFFFFFF : 0xF0FFFFFF); + + /* Similarly do for the NNZ also */ + *(pu1_nnz_ac - 4) = 0; + *(pu1_nnz_ac - 3) = 0; + *(pu1_nnz_ac - 2) = 0; + *(pu1_nnz_ac - 1) = 0; + } + else + { + *u1_cbp_c = u1_cbp_ac; + } + + if(u4_swap_uv) + { + pi2_res_sb = + pi2_res_mb + i4_res_strd; /* Move to V plane ,ie 1dc row+ 4 ac row + 1 dc row */ + cntrl_pos = 31; /* The control bits are to be added for V bloc ie 31-4 th bit */ + pu1_nnz_ac = pu1_nnz + 1; /* Move the nnz to V block NNZ 1 dc + 1dc + 4 ac */ + + pu1_nnz_ac = pu1_nnz + 1; + } + else + pu1_nnz_ac = pu1_nnz + 6; /* Go to nnz of V plane */ + } + + /* restore the ptr basing on cbp */ + if(*u1_cbp_c == 0) + { + (*pv_mb_coeff_data) = ps_mb_coeff_data_dc; + } + else if(*u1_cbp_c == 1) + { + (*pv_mb_coeff_data) = ps_mb_coeff_data_ac; + } + + return; +} + +/** +******************************************************************************* +* +* @brief performs luma core coding when intra mode is i16x16 +* +* @par Description: +* If the current mb is to be coded as intra of mb type i16x16, the mb is first +* predicted using one of i16x16 prediction filters, basing on the intra mode +* chosen. Then, error is computed between the input blk and the estimated blk. +* This error is transformed (hierarchical transform i.e., dct followed by hada- +* -mard), quantized. The quantized coefficients are packed in scan order for +* entropy coding. +* +* @param[in] ps_proc_ctxt +* pointer to the current macro block context +* +* @returns u1_cbp_l +* coded block pattern luma +* +* @remarks none +* +******************************************************************************* +*/ + +UWORD8 isvce_code_luma_intra_macroblock_16x16(isvce_process_ctxt_t *ps_proc) +{ + buffer_container_t s_src; + buffer_container_t s_pred; + buffer_container_t s_recon; + buffer_container_t s_res; + buffer_container_t s_quant_coeffs; + + /*Cntrol signal for itrans*/ + UWORD32 u4_cntrl; + + isvce_codec_t *ps_codec = ps_proc->ps_codec; + quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0]; + isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns; + inter_pred_fxns_t *ps_inter_pred_fxns = &ps_isa_dependent_fxns->s_inter_pred_fxns; + iq_it_res_rec_constants_t s_iq_it_res_rec_constants = { + .pu2_iscal_mat = ps_qp_params->pu2_iscale_mat, + .pu2_weigh_mat = ps_qp_params->pu2_weigh_mat, + .u4_qp_div_6 = ps_qp_params->u1_qp_div}; + + UWORD8 *pu1_pred_mb = NULL; + WORD16 *pi2_res_mb = ps_proc->pi2_res_buf_intra_4x4; + WORD32 i4_pred_stride = ps_proc->i4_pred_strd; + WORD32 i4_res_strd = ps_proc->i4_res_strd; + UWORD8 u1_intra_mode = ps_proc->u1_l_i16_mode; + UWORD32 au4_nnz[5] = {0}; + UWORD8 u1_cbp_l = 0; + UWORD8 *pu1_nnz = (UWORD8 *) au4_nnz; + /* pointer to packed mb coeff data */ + void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data); + + if(u1_intra_mode == PLANE_I16x16) + { + pu1_pred_mb = ps_proc->pu1_pred_mb_intra_16x16_plane; + } + else + { + pu1_pred_mb = ps_proc->pu1_pred_mb_intra_16x16; + } + + s_src = ps_proc->s_src_buf_props.as_component_bufs[Y]; + s_recon = ps_proc->s_rec_buf_props.as_component_bufs[Y]; + s_pred.pv_data = pu1_pred_mb; + s_pred.i4_data_stride = i4_pred_stride; + s_quant_coeffs.pv_data = pi2_res_mb; + s_quant_coeffs.i4_data_stride = i4_res_strd; + + s_res = ps_codec->s_svc_ilp_data.ps_residual_bufs[ps_proc->u1_spatial_layer_id] + .as_component_bufs[Y]; + s_res.pv_data = ((WORD16 *) s_res.pv_data) + ps_proc->i4_mb_x * MB_SIZE + + ps_proc->i4_mb_y * MB_SIZE * s_res.i4_data_stride; + + /********************************************************/ + /* error estimation, */ + /* transform */ + /* quantization */ + /********************************************************/ + isvce_luma_16x16_resi_trans_dctrans_quant( + &s_src, &s_pred, &s_quant_coeffs, &ps_proc->ps_mb_res_buf->as_component_bufs[Y], + ps_isa_dependent_fxns, ps_qp_params->pu2_scale_mat, ps_qp_params->pu2_thres_mat, pu1_nnz, + ps_qp_params->u1_qbits, ps_qp_params->u4_dead_zone, ENABLE_DC_TRANSFORM, + ps_proc->ps_mb_info->u1_residual_prediction_flag); + + /********************************************************/ + /* pack coeff data for entropy coding */ + /********************************************************/ + isvce_pack_l_mb_i16(pi2_res_mb, pv_mb_coeff_data, i4_res_strd, &u1_cbp_l, pu1_nnz, &u4_cntrl); + + /********************************************************/ + /* ierror estimation, */ + /* itransform */ + /* iquantization */ + /********************************************************/ + /* + *if refernce frame is not to be computed + *we only need the right and bottom border 4x4 blocks to predict next intra + *blocks, hence only compute them + */ + if(!ps_proc->u4_compute_recon) + { + u4_cntrl &= 0x111F8000; + } + + if(u4_cntrl) + { + isvce_luma_16x16_idctrans_iquant_itrans_recon( + &s_quant_coeffs, &s_pred, &s_recon, &s_res, + &ps_proc->ps_mb_res_buf->as_component_bufs[Y], &s_iq_it_res_rec_constants, + ps_isa_dependent_fxns, ps_proc->pv_scratch_buff, u4_cntrl, ENABLE_DC_TRANSFORM, 0); + } + else + { + ps_inter_pred_fxns->pf_inter_pred_luma_copy(pu1_pred_mb, (UWORD8 *) s_recon.pv_data, + i4_pred_stride, s_recon.i4_data_stride, MB_SIZE, + MB_SIZE, NULL, 0); + } + + return (u1_cbp_l); +} + +/** +******************************************************************************* +* +* @brief performs luma core coding when intra mode is i4x4 +* +* @par Description: +* If the current mb is to be coded as intra of mb type i4x4, the mb is first +* predicted using one of i4x4 prediction filters, basing on the intra mode +* chosen. Then, error is computed between the input blk and the estimated blk. +* This error is dct transformed and quantized. The quantized coefficients are +* packed in scan order for entropy coding. +* +* @param[in] ps_proc_ctxt +* pointer to the current macro block context +* +* @returns u1_cbp_l +* coded block pattern luma +* +* @remarks +* The traversal of 4x4 subblocks in the 16x16 macroblock is as per the scan +*order mentioned in h.264 specification +* +******************************************************************************* +*/ +UWORD8 isvce_code_luma_intra_macroblock_4x4(isvce_process_ctxt_t *ps_proc) +{ + buffer_container_t s_src; + buffer_container_t s_pred; + buffer_container_t s_recon; + buffer_container_t s_res; + buffer_container_t s_res_pred; + buffer_container_t s_quant_coeffs; + + /* pointer to neighbors: left, top, top-left */ + UWORD8 *pu1_mb_a; + UWORD8 *pu1_mb_b; + UWORD8 *pu1_mb_c; + UWORD8 *pu1_mb_d; + WORD32 i4_ngbr_avbl; + UWORD8 u1_nnz; + UWORD32 u4_nnz_cnt; + /* significant coefficient map */ + UWORD32 u4_s_map; + /*Dummy variable for 4x4 trans fucntion*/ + WORD16 i2_dc_dummy; + UWORD32 i, b8, b4, u1_blk_x, u1_blk_y, u1_pix_x, u1_pix_y, coeff_cnt, mask; + + isvce_codec_t *ps_codec = ps_proc->ps_codec; + quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0]; + /* pointer to packed mb coeff data */ + tu_sblk_coeff_data_t *ps_mb_coeff_data, *ps_mb_coeff_data_b8; + isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns; + enc_loop_fxns_t *ps_enc_loop_fxns = &ps_isa_dependent_fxns->s_enc_loop_fxns; + inter_pred_fxns_t *ps_inter_pred_fxns = &ps_isa_dependent_fxns->s_inter_pred_fxns; + resi_trans_quant_constants_t s_resi_trans_quant_constants = { + .pu2_scale_matrix = ps_qp_params->pu2_scale_mat, + .pu2_threshold_matrix = ps_qp_params->pu2_thres_mat, + .u4_qbits = ps_qp_params->u1_qbits, + .u4_round_factor = ps_qp_params->u4_dead_zone}; + iq_it_res_rec_constants_t s_iq_it_res_rec_constants = { + .pu2_iscal_mat = ps_qp_params->pu2_iscale_mat, + .pu2_weigh_mat = ps_qp_params->pu2_weigh_mat, + .u4_qp_div_6 = ps_qp_params->u1_qp_div}; + + UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb; + WORD16 *pi2_res_mb = ps_proc->pi2_res_buf_intra_4x4; + WORD32 i4_pred_stride = ps_proc->i4_pred_strd; + UWORD8 u1_intra_mode = ps_proc->u1_l_i16_mode; + UWORD8 *pu1_ngbr_pels_i4 = ps_proc->au1_ngbr_pels; + UWORD8 u1_cbp_l = 0; + /* pointer to packed mb coeff data */ + void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data); + const UWORD8 *pu1_scan_order = gu1_luma_scan_order; + UWORD8 u1_resi_trans_fxn_idx = isvc_get_resi_trans_quant_variant_idx(0); + UWORD8 u1_iq_it_recon_fxn_idx = isvc_get_iq_it_recon_variant_idx(1, 0); + + s_src = ps_proc->s_src_buf_props.as_component_bufs[Y]; + s_recon = ps_proc->s_rec_buf_props.as_component_bufs[Y]; + s_pred.pv_data = pu1_pred_mb; + s_pred.i4_data_stride = i4_pred_stride; + s_quant_coeffs.pv_data = pi2_res_mb; + s_quant_coeffs.i4_data_stride = 4; + + /* Process 16 4x4 lum sub-blocks of the MB in scan order */ + for(b8 = 0; b8 < 4; b8++) + { + u1_blk_x = GET_BLK_RASTER_POS_X(b8) << 3; + u1_blk_y = GET_BLK_RASTER_POS_Y(b8) << 3; + + /* if in case cbp for the 8x8 block is zero, send no residue */ + ps_mb_coeff_data_b8 = *pv_mb_coeff_data; + + for(b4 = 0; b4 < 4; b4++) + { + /* index of pel in MB */ + u1_pix_x = u1_blk_x + (GET_SUB_BLK_RASTER_POS_X(b4) << 2); + u1_pix_y = u1_blk_y + (GET_SUB_BLK_RASTER_POS_Y(b4) << 2); + + /* Initialize source and reference pointers */ + s_src = ps_proc->s_src_buf_props.as_component_bufs[Y]; + s_recon = ps_proc->s_rec_buf_props.as_component_bufs[Y]; + s_src.pv_data = ((UWORD8 *) s_src.pv_data) + u1_pix_x + u1_pix_y * s_src.i4_data_stride; + s_recon.pv_data = + ((UWORD8 *) s_recon.pv_data) + u1_pix_x + u1_pix_y * s_recon.i4_data_stride; + + s_res = ps_codec->s_svc_ilp_data.ps_residual_bufs[ps_proc->u1_spatial_layer_id] + .as_component_bufs[Y]; + s_res.pv_data = ((WORD16 *) s_res.pv_data) + ps_proc->i4_mb_x * MB_SIZE + + ps_proc->i4_mb_y * MB_SIZE * s_res.i4_data_stride; + s_res.pv_data = ((WORD16 *) s_res.pv_data) + u1_pix_x + u1_pix_y * s_res.i4_data_stride; + + s_res_pred = ps_proc->ps_mb_res_buf->as_component_bufs[Y]; + s_res_pred.pv_data = + ((WORD16 *) s_res_pred.pv_data) + u1_pix_x + u1_pix_y * s_res_pred.i4_data_stride; + + /* pointer to left of ref macro block */ + pu1_mb_a = ((UWORD8 *) s_recon.pv_data) - 1; + /* pointer to top of ref macro block */ + pu1_mb_b = ((UWORD8 *) s_recon.pv_data) - s_recon.i4_data_stride; + /* pointer to topright of ref macro block */ + pu1_mb_c = pu1_mb_b + 4; + /* pointer to topleft macro block */ + pu1_mb_d = pu1_mb_b - 1; + + /* compute neighbor availability */ + i4_ngbr_avbl = ps_proc->au1_ngbr_avbl_4x4_subblks[(b8 << 2) + b4]; + + /* sub block intra mode */ + u1_intra_mode = ps_proc->au1_intra_luma_mb_4x4_modes[(b8 << 2) + b4]; + + /********************************************************/ + /* gather prediction pels from neighbors for prediction */ + /********************************************************/ + /* left pels */ + if(i4_ngbr_avbl & LEFT_MB_AVAILABLE_MASK) + { + for(i = 0; i < 4; i++) + pu1_ngbr_pels_i4[4 - 1 - i] = pu1_mb_a[i * s_recon.i4_data_stride]; + } + else + { + memset(pu1_ngbr_pels_i4, 0, 4); + } + + /* top pels */ + if(i4_ngbr_avbl & TOP_MB_AVAILABLE_MASK) + { + memcpy(pu1_ngbr_pels_i4 + 4 + 1, pu1_mb_b, 4); + } + else + { + memset(pu1_ngbr_pels_i4 + 5, 0, 4); + } + /* top left pels */ + if(i4_ngbr_avbl & TOP_LEFT_MB_AVAILABLE_MASK) + { + pu1_ngbr_pels_i4[4] = *pu1_mb_d; + } + else + { + pu1_ngbr_pels_i4[4] = 0; + } + /* top right pels */ + if(i4_ngbr_avbl & TOP_RIGHT_MB_AVAILABLE_MASK) + { + memcpy(pu1_ngbr_pels_i4 + 8 + 1, pu1_mb_c, 4); + } + else if(i4_ngbr_avbl & TOP_MB_AVAILABLE_MASK) + { + memset(pu1_ngbr_pels_i4 + 8 + 1, pu1_ngbr_pels_i4[8], 4); + } + + /********************************************************/ + /* prediction */ + /********************************************************/ + (ps_codec->apf_intra_pred_4_l)[u1_intra_mode](pu1_ngbr_pels_i4, pu1_pred_mb, 0, + i4_pred_stride, i4_ngbr_avbl); + + /********************************************************/ + /* error estimation, */ + /* transform */ + /* quantization */ + /********************************************************/ + ps_enc_loop_fxns->apf_resi_trans_quant_4x4[u1_resi_trans_fxn_idx]( + &s_src, &s_pred, &s_quant_coeffs, &s_res_pred, &s_resi_trans_quant_constants, + &u1_nnz, &i2_dc_dummy, 0); + + /********************************************************/ + /* pack coeff data for entropy coding */ + /********************************************************/ + ps_mb_coeff_data = *pv_mb_coeff_data; + + /* write number of non zero coefficients */ + ps_mb_coeff_data->i4_sig_map_nnz = u1_nnz; + + if(u1_nnz) + { + for(u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u1_nnz; + coeff_cnt++) + { + if(pi2_res_mb[pu1_scan_order[coeff_cnt]]) + { + /* write residue */ + ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = + pi2_res_mb[pu1_scan_order[coeff_cnt]]; + u4_s_map |= mask; + } + mask <<= 1; + } + /* write significant coeff map */ + ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16); + + /* update ptr to coeff data */ + (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + ALIGN2(u4_nnz_cnt); + + /* cbp */ + u1_cbp_l |= (1 << b8); + } + else + { + (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue; + } + + /********************************************************/ + /* ierror estimation, */ + /* itransform */ + /* iquantization */ + /********************************************************/ + if(u1_nnz) + { + buffer_container_t s_src = s_quant_coeffs; + + /* Tx blk coeffs are stored blk by blk */ + /* Hence, in order to access rows of each Tx blk, one needs to stride of + * TxxSize */ + s_src.i4_data_stride = 4; + + ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4[u1_iq_it_recon_fxn_idx]( + &s_src, &s_pred, &s_res_pred, &s_res, &s_recon, &s_iq_it_res_rec_constants, + (WORD16 *) ps_proc->pv_scratch_buff, s_src.pv_data, 0, 0); + } + else + { + ps_inter_pred_fxns->pf_inter_pred_luma_copy( + (UWORD8 *) s_pred.pv_data, (UWORD8 *) s_recon.pv_data, s_pred.i4_data_stride, + s_recon.i4_data_stride, BLK_SIZE, BLK_SIZE, NULL, 0); + } + } + + /* if the 8x8 block has no residue, nothing needs to be sent to entropy */ + if(!(u1_cbp_l & (1 << b8))) + { + *pv_mb_coeff_data = ps_mb_coeff_data_b8; + } + } + + return (u1_cbp_l); +} + +/** +******************************************************************************* +* +* @brief performs luma core coding when intra mode is i4x4 +* +* @par Description: +* If the current mb is to be coded as intra of mb type i4x4, the mb is first +* predicted using one of i4x4 prediction filters, basing on the intra mode +* chosen. Then, error is computed between the input blk and the estimated blk. +* This error is dct transformed and quantized. The quantized coefficients are +* packed in scan order for entropy coding. +* +* @param[in] ps_proc_ctxt +* pointer to the current macro block context +* +* @returns u1_cbp_l +* coded block pattern luma +* +* @remarks +* The traversal of 4x4 subblocks in the 16x16 macroblock is as per the scan +*order mentioned in h.264 specification +* +******************************************************************************* +*/ +UWORD8 isvce_code_luma_intra_macroblock_4x4_rdopt_on(isvce_process_ctxt_t *ps_proc) +{ + /* pointer to packed mb coeff data */ + tu_sblk_coeff_data_t *ps_mb_coeff_data, *ps_mb_coeff_data_b8; + + UWORD32 u4_nnz_cnt; + /* significant coefficient map */ + UWORD32 u4_s_map; + UWORD32 b8, b4, coeff_cnt, mask; + + isvce_codec_t *ps_codec = ps_proc->ps_codec; + isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns; + inter_pred_fxns_t *ps_inter_pred_fxns = &ps_isa_dependent_fxns->s_inter_pred_fxns; + + UWORD8 *pu1_ref_mb_intra_4x4 = ps_proc->pu1_ref_mb_intra_4x4; + UWORD8 *pu1_rec_mb = ((UWORD8 *) ps_proc->s_rec_buf_props.as_component_bufs[0].pv_data); + WORD16 *pi2_res_mb = ps_proc->pi2_res_buf_intra_4x4; + WORD32 i4_rec_strd = ps_proc->s_rec_buf_props.as_component_bufs[0].i4_data_stride; + UWORD8 *pu1_nnz = (UWORD8 *) ps_proc->au4_nnz_intra_4x4; + UWORD8 u1_cbp_l = 0; + void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data); + const UWORD8 *pu1_scan_order = gu1_luma_scan_order; + + /* Process 16 4x4 lum sub-blocks of the MB in scan order */ + for(b8 = 0; b8 < 4; b8++) + { + /* if in case cbp for the 8x8 block is zero, send no residue */ + ps_mb_coeff_data_b8 = *pv_mb_coeff_data; + + for(b4 = 0; b4 < 4; b4++, pu1_nnz++, pi2_res_mb += MB_SIZE) + { + /********************************************************/ + /* pack coeff data for entropy coding */ + /********************************************************/ + ps_mb_coeff_data = *pv_mb_coeff_data; + + /* write number of non zero coefficients */ + ps_mb_coeff_data->i4_sig_map_nnz = *pu1_nnz; + + if(*pu1_nnz) + { + for(u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < *pu1_nnz; + coeff_cnt++) + { + if(pi2_res_mb[pu1_scan_order[coeff_cnt]]) + { + /* write residue */ + ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = + pi2_res_mb[pu1_scan_order[coeff_cnt]]; + u4_s_map |= mask; + } + mask <<= 1; + } + /* write significant coeff map */ + ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16); + + /* update ptr to coeff data */ + (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + ALIGN2(u4_nnz_cnt); + + /* cbp */ + u1_cbp_l |= (1 << b8); + } + else + { + (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue; + } + } + + /* if the 8x8 block has no residue, nothing needs to be sent to entropy */ + if(!(u1_cbp_l & (1 << b8))) + { + *pv_mb_coeff_data = ps_mb_coeff_data_b8; + } + } + + ps_inter_pred_fxns->pf_inter_pred_luma_copy(pu1_ref_mb_intra_4x4, pu1_rec_mb, MB_SIZE, + i4_rec_strd, MB_SIZE, MB_SIZE, NULL, 0); + + return (u1_cbp_l); +} + +/** +******************************************************************************* +* +* @brief performs chroma core coding for intra macro blocks +* +* @par Description: +* If the current MB is to be intra coded with mb type chroma I8x8, the MB is +* first predicted using intra 8x8 prediction filters. The predicted data is +* compared with the input for error and the error is transformed. The DC +* coefficients of each transformed sub blocks are further transformed using +* Hadamard transform. The resulting coefficients are quantized, packed and sent +* for entropy coding. +* +* @param[in] ps_proc_ctxt +* pointer to the current macro block context +* +* @returns u1_cbp_c +* coded block pattern chroma +* +* @remarks +* The traversal of 4x4 subblocks in the 8x8 macroblock is as per the scan order +* mentioned in h.264 specification +* +******************************************************************************* +*/ +UWORD8 isvce_code_chroma_intra_macroblock_8x8(isvce_process_ctxt_t *ps_proc) +{ + buffer_container_t s_src; + buffer_container_t s_pred; + buffer_container_t s_recon; + buffer_container_t s_res; + buffer_container_t s_res_pred; + buffer_container_t s_quant_coeffs; + + /* Control signal for inverse transform */ + UWORD32 u4_cntrl; + + isvce_codec_t *ps_codec = ps_proc->ps_codec; + quant_params_t *ps_qp_params = ps_proc->ps_qp_params[1]; + isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns; + iq_it_res_rec_constants_t s_iq_it_res_rec_constants = { + .pu2_iscal_mat = ps_qp_params->pu2_iscale_mat, + .pu2_weigh_mat = ps_qp_params->pu2_weigh_mat, + .u4_qp_div_6 = ps_qp_params->u1_qp_div}; + + UWORD8 *pu1_pred_mb = NULL; + WORD16 *pi2_res_mb = ps_proc->pi2_res_buf; + WORD32 i4_pred_stride = ps_proc->i4_pred_strd; + WORD32 i4_res_strd = ps_proc->i4_res_strd; + UWORD8 u1_intra_mode = ps_proc->u1_c_i8_mode; + UWORD8 u1_cbp_c = 0; + UWORD8 au1_nnz[2 * (NUM_4x4_IN_8x8 + 1)] = {0}; + /* pointer to packed mb coeff data */ + void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data); + /* See if we need to swap U and V plances for entropy */ + UWORD32 u4_swap_uv = (ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_VU); + + if(PLANE_CH_I8x8 == u1_intra_mode) + { + pu1_pred_mb = ps_proc->pu1_pred_mb_intra_chroma_plane; + } + else + { + pu1_pred_mb = ps_proc->pu1_pred_mb_intra_chroma; + } + + s_src = ps_proc->s_src_buf_props.as_component_bufs[UV]; + s_recon = ps_proc->s_rec_buf_props.as_component_bufs[UV]; + s_pred.pv_data = pu1_pred_mb; + s_pred.i4_data_stride = i4_pred_stride; + s_quant_coeffs.pv_data = pi2_res_mb; + s_quant_coeffs.i4_data_stride = i4_res_strd; + + s_res = ps_codec->s_svc_ilp_data.ps_residual_bufs[ps_proc->u1_spatial_layer_id] + .as_component_bufs[UV]; + s_res.pv_data = ((WORD16 *) s_res.pv_data) + ps_proc->i4_mb_x * MB_SIZE + + ps_proc->i4_mb_y * (MB_SIZE / 2) * s_res.i4_data_stride; + + s_res_pred = ps_proc->ps_mb_res_buf->as_component_bufs[U]; + + /********************************************************/ + /* error estimation, */ + /* transform */ + /* quantization */ + /********************************************************/ + isvce_chroma_8x8_resi_trans_dctrans_quant( + &s_src, &s_pred, &s_quant_coeffs, &s_res_pred, ps_isa_dependent_fxns, + ps_qp_params->pu2_scale_mat, ps_qp_params->pu2_thres_mat, au1_nnz, ps_qp_params->u1_qbits, + ps_qp_params->u4_dead_zone, 0); + + /********************************************************/ + /* pack coeff data for entropy coding */ + /********************************************************/ + isvce_pack_c_mb(pi2_res_mb, pv_mb_coeff_data, i4_res_strd, &u1_cbp_c, au1_nnz, + ps_codec->u4_thres_resi, &u4_cntrl, u4_swap_uv); + + /********************************************************/ + /* ierror estimation, */ + /* itransform */ + /* iquantization */ + /********************************************************/ + isvce_chroma_8x8_idctrans_iquant_itrans_recon( + &s_quant_coeffs, &s_pred, &s_recon, &s_res, &s_res_pred, &s_iq_it_res_rec_constants, + ps_isa_dependent_fxns, ps_proc->pv_scratch_buff, u4_cntrl, 0); + + memcpy(ps_proc->au1_chroma_nnz, au1_nnz, sizeof(ps_proc->au1_chroma_nnz)); + + return (u1_cbp_c); +} + +/** +******************************************************************************* +* +* @brief performs luma core coding when mode is inter +* +* @par Description: +* If the current mb is to be coded as inter the mb is predicted based on the +* sub mb partitions and corresponding motion vectors generated by ME. Then, +* error is computed between the input blk and the estimated blk. This error is +* transformed, quantized. The quantized coefficients are packed in scan order +* for entropy coding +* +* @param[in] ps_proc_ctxt +* pointer to the current macro block context +* +* @returns u1_cbp_l +* coded block pattern luma +* +* @remarks none +* +******************************************************************************* +*/ + +UWORD8 isvce_code_luma_inter_macroblock_16x16(isvce_process_ctxt_t *ps_proc) +{ + buffer_container_t s_src; + buffer_container_t s_pred; + buffer_container_t s_recon; + buffer_container_t s_res; + buffer_container_t s_res_pred; + buffer_container_t s_quant_coeffs; + + /*Control signal of itrans*/ + UWORD32 u4_cntrl; + + isvce_codec_t *ps_codec = ps_proc->ps_codec; + quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0]; + isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns; + iq_it_res_rec_constants_t s_iq_it_res_rec_constants = { + .pu2_iscal_mat = ps_qp_params->pu2_iscale_mat, + .pu2_weigh_mat = ps_qp_params->pu2_weigh_mat, + .u4_qp_div_6 = ps_qp_params->u1_qp_div}; + + WORD16 *pi2_res_mb = ps_proc->pi2_res_buf_intra_4x4; + WORD32 i4_res_strd = ps_proc->i4_res_strd; + UWORD8 u1_cbp_l = 0; + UWORD8 *pu1_nnz = (UWORD8 *) ps_proc->au4_nnz; + /* pointer to packed mb coeff data */ + void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data); + + ps_proc->au4_nnz[0] = 0; + ps_proc->au4_nnz[1] = 0; + ps_proc->au4_nnz[2] = 0; + ps_proc->au4_nnz[3] = 0; + ps_proc->au4_nnz[4] = 0; + + /********************************************************/ + /* prediction */ + /********************************************************/ + isvce_motion_comp_luma(ps_proc, &s_pred); + + s_src = ps_proc->s_src_buf_props.as_component_bufs[0]; + s_recon = ps_proc->s_rec_buf_props.as_component_bufs[0]; + s_quant_coeffs.pv_data = pi2_res_mb; + s_quant_coeffs.i4_data_stride = i4_res_strd; + + s_res = ps_codec->s_svc_ilp_data.ps_residual_bufs[ps_proc->u1_spatial_layer_id] + .as_component_bufs[Y]; + s_res.pv_data = ((WORD16 *) s_res.pv_data) + ps_proc->i4_mb_x * MB_SIZE + + ps_proc->i4_mb_y * MB_SIZE * s_res.i4_data_stride; + + s_res_pred = ps_proc->ps_mb_res_buf->as_component_bufs[Y]; + + /********************************************************/ + /* error estimation, */ + /* transform */ + /* quantization */ + /********************************************************/ + if(ps_proc->u4_min_sad_reached == 0 || ps_proc->u4_min_sad != 0) + { + isvce_luma_16x16_resi_trans_dctrans_quant( + &s_src, &s_pred, &s_quant_coeffs, &s_res_pred, ps_isa_dependent_fxns, + ps_qp_params->pu2_scale_mat, ps_qp_params->pu2_thres_mat, pu1_nnz, + ps_qp_params->u1_qbits, ps_qp_params->u4_dead_zone, DISABLE_DC_TRANSFORM, + ps_proc->ps_mb_info->u1_residual_prediction_flag); + + /********************************************************/ + /* pack coeff data for entropy coding */ + /********************************************************/ + isvce_pack_l_mb(pi2_res_mb, pv_mb_coeff_data, i4_res_strd, &u1_cbp_l, pu1_nnz, + ps_codec->u4_thres_resi, &u4_cntrl); + } + else + { + u1_cbp_l = 0; + u4_cntrl = 0; + } + + /********************************************************/ + /* ierror estimation, */ + /* itransform */ + /* iquantization */ + /********************************************************/ + + /*If the frame is not to be used for P frame reference or dumping recon + * we only will use the reocn for only predicting intra Mbs + * THis will need only right and bottom edge 4x4 blocks recon + * Hence we selectively enable them using control signal(including DC) + */ + if(ps_proc->u4_compute_recon != 1) + { + u4_cntrl &= 0x111F0000; + } + + isvce_luma_16x16_idctrans_iquant_itrans_recon( + &s_quant_coeffs, &s_pred, &s_recon, &s_res, &s_res_pred, &s_iq_it_res_rec_constants, + ps_isa_dependent_fxns, ps_proc->pv_scratch_buff, u4_cntrl, DISABLE_DC_TRANSFORM, + ps_proc->ps_mb_info->u1_residual_prediction_flag); + + return (u1_cbp_l); +} + +/** +******************************************************************************* +* +* @brief performs chroma core coding for inter macro blocks +* +* @par Description: +* If the current mb is to be coded as inter predicted mb,based on the sub mb +*partitions and corresponding motion vectors generated by ME ,prediction is +*done. Then, error is computed between the input blk and the estimated blk. This +*error is transformed , quantized. The quantized coefficients are packed in scan +*order for entropy coding. +* +* @param[in] ps_proc_ctxt +* pointer to the current macro block context +* +* @returns u1_cbp_l +* coded block pattern chroma +* +* @remarks none +* +******************************************************************************* +*/ +UWORD8 isvce_code_chroma_inter_macroblock_8x8(isvce_process_ctxt_t *ps_proc) +{ + buffer_container_t s_src; + buffer_container_t s_pred; + buffer_container_t s_recon; + buffer_container_t s_res; + buffer_container_t s_res_pred; + buffer_container_t s_quant_coeffs; + + /*Control signal for inverse transform*/ + UWORD32 u4_cntrl; + + isvce_codec_t *ps_codec = ps_proc->ps_codec; + quant_params_t *ps_qp_params = ps_proc->ps_qp_params[1]; + isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns; + iq_it_res_rec_constants_t s_iq_it_res_rec_constants = { + .pu2_iscal_mat = ps_qp_params->pu2_iscale_mat, + .pu2_weigh_mat = ps_qp_params->pu2_weigh_mat, + .u4_qp_div_6 = ps_qp_params->u1_qp_div}; + + WORD16 *pi2_res_mb = ps_proc->pi2_res_buf; + WORD32 i4_res_strd = ps_proc->i4_res_strd; + UWORD8 u1_cbp_c = 0; + UWORD8 au1_nnz[2 * (NUM_4x4_IN_8x8 + 1)] = {0}; + /* pointer to packed mb coeff data */ + void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data); + /*See if we need to swap U and V plances for entropy*/ + UWORD32 u4_swap_uv = ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_VU; + + isvce_motion_comp_chroma(ps_proc, &s_pred); + + s_src = ps_proc->s_src_buf_props.as_component_bufs[UV]; + s_recon = ps_proc->s_rec_buf_props.as_component_bufs[UV]; + s_quant_coeffs.pv_data = pi2_res_mb; + s_quant_coeffs.i4_data_stride = i4_res_strd; + + s_res = ps_codec->s_svc_ilp_data.ps_residual_bufs[ps_proc->u1_spatial_layer_id] + .as_component_bufs[UV]; + s_res.pv_data = ((WORD16 *) s_res.pv_data) + ps_proc->i4_mb_x * MB_SIZE + + ps_proc->i4_mb_y * (MB_SIZE / 2) * s_res.i4_data_stride; + + s_res_pred = ps_proc->ps_mb_res_buf->as_component_bufs[UV]; + + /********************************************************/ + /* error estimation, */ + /* transform */ + /* quantization */ + /********************************************************/ + isvce_chroma_8x8_resi_trans_dctrans_quant( + &s_src, &s_pred, &s_quant_coeffs, &s_res_pred, ps_isa_dependent_fxns, + ps_qp_params->pu2_scale_mat, ps_qp_params->pu2_thres_mat, au1_nnz, ps_qp_params->u1_qbits, + ps_qp_params->u4_dead_zone, ps_proc->ps_mb_info->u1_residual_prediction_flag); + + /********************************************************/ + /* pack coeff data for entropy coding */ + /********************************************************/ + isvce_pack_c_mb(pi2_res_mb, pv_mb_coeff_data, i4_res_strd, &u1_cbp_c, au1_nnz, + ps_codec->u4_thres_resi, &u4_cntrl, u4_swap_uv); + + /********************************************************/ + /* ierror estimation, */ + /* itransform */ + /* iquantization */ + /********************************************************/ + + /* If the frame is not to be used for P frame reference or dumping recon + * we only will use the reocn for only predicting intra Mbs + * THis will need only right and bottom edge 4x4 blocks recon + * Hence we selectively enable them using control signal(including DC) + */ + if(!ps_proc->u4_compute_recon) + { + u4_cntrl &= 0x7700C000; + } + + isvce_chroma_8x8_idctrans_iquant_itrans_recon( + &s_quant_coeffs, &s_pred, &s_recon, &s_res, &s_res_pred, &s_iq_it_res_rec_constants, + ps_isa_dependent_fxns, ps_proc->pv_scratch_buff, u4_cntrl, + ps_proc->ps_mb_info->u1_residual_prediction_flag); + + memcpy(ps_proc->au1_chroma_nnz, au1_nnz, sizeof(ps_proc->au1_chroma_nnz)); + + return (u1_cbp_c); +} diff --git a/encoder/svc/isvce_core_coding.h b/encoder/svc/isvce_core_coding.h new file mode 100644 index 0000000..0d7405a --- /dev/null +++ b/encoder/svc/isvce_core_coding.h @@ -0,0 +1,125 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** +****************************************************************************** +* @file +* isvce_core_coding.h +* +* @brief +* This file contains extern declarations of core coding routines +* +* @author +* ittiam +* +* @remarks +* none +****************************************************************************** +*/ + +#ifndef _ISVCE_CORE_CODING_H_ +#define _ISVCE_CORE_CODING_H_ + +#include "isvce_structs.h" + +/*****************************************************************************/ +/* Constant Macros */ +/*****************************************************************************/ + +/** +****************************************************************************** +* @brief Enable/Disable Hadamard transform of DC Coeff's +****************************************************************************** +*/ +#define DISABLE_DC_TRANSFORM 0 +#define ENABLE_DC_TRANSFORM 1 + +/** +******************************************************************************* +* @brief bit masks for DC and AC control flags +******************************************************************************* +*/ + +#define DC_COEFF_CNT_LUMA_MB 16 +#define NUM_4X4_BLKS_LUMA_MB_ROW 4 +#define NUM_LUMA4x4_BLOCKS_IN_MB 16 +#define NUM_CHROMA4x4_BLOCKS_IN_MB 8 + +#define SIZE_4X4_BLK_HRZ TRANS_SIZE_4 +#define SIZE_4X4_BLK_VERT TRANS_SIZE_4 + +#define CNTRL_FLAG_DC_MASK_LUMA 0x0000FFFF +#define CNTRL_FLAG_AC_MASK_LUMA 0xFFFF0000 + +#define CNTRL_FLAG_AC_MASK_CHROMA_U 0xF0000000 +#define CNTRL_FLAG_DC_MASK_CHROMA_U 0x0000F000 + +#define CNTRL_FLAG_AC_MASK_CHROMA_V 0x0F000000 +#define CNTRL_FLAG_DC_MASK_CHROMA_V 0x00000F00 + +#define CNTRL_FLAG_AC_MASK_CHROMA (CNTRL_FLAG_AC_MASK_CHROMA_U | CNTRL_FLAG_AC_MASK_CHROMA_V) +#define CNTRL_FLAG_DC_MASK_CHROMA (CNTRL_FLAG_DC_MASK_CHROMA_U | CNTRL_FLAG_DC_MASK_CHROMA_V) + +#define CNTRL_FLAG_DCBLK_MASK_CHROMA 0x0000C000 + +/** +******************************************************************************* +* @brief macros for transforms +******************************************************************************* +*/ +#define DEQUEUE_BLKID_FROM_CONTROL(u4_cntrl, blk_lin_id) \ + { \ + blk_lin_id = CLZ(u4_cntrl); \ + u4_cntrl &= (0x7FFFFFFF >> blk_lin_id); \ + }; + +#define IND2SUB_LUMA_MB(u4_blk_id, i4_offset_x, i4_offset_y) \ + { \ + i4_offset_x = (u4_blk_id % 4) << 2; \ + i4_offset_y = (u4_blk_id / 4) << 2; \ + } + +#define IS_V_BLK(u4_blk_id) ((u4_blk_id) > 3) + +#define IND2SUB_CHROMA_MB(u4_blk_id, i4_offset_x, i4_offset_y) \ + { \ + i4_offset_x = ((u4_blk_id & 0x1) << 3) + IS_V_BLK(u4_blk_id); \ + i4_offset_y = (u4_blk_id & 0x2) << 1; \ + } + +/* Typedefs */ + +/*****************************************************************************/ +/* Function Declarations */ +/*****************************************************************************/ + +extern FT_CORE_CODING isvce_code_luma_intra_macroblock_16x16; + +extern FT_CORE_CODING isvce_code_luma_intra_macroblock_4x4; + +extern FT_CORE_CODING isvce_code_luma_intra_macroblock_4x4_rdopt_on; + +extern FT_CORE_CODING isvce_code_chroma_intra_macroblock_8x8; + +extern FT_CORE_CODING isvce_code_luma_inter_macroblock_16x16; + +extern FT_CORE_CODING isvce_code_chroma_inter_macroblock_8x8; + +#endif diff --git a/encoder/svc/isvce_deblk.c b/encoder/svc/isvce_deblk.c new file mode 100644 index 0000000..2828123 --- /dev/null +++ b/encoder/svc/isvce_deblk.c @@ -0,0 +1,1267 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** + ******************************************************************************* + * @file + * isvce_deblk.c + * + * @brief + * This file contains functions that are associated with deblocking + * + * @author + * ittiam + * + * @par List of Functions: + * - isvce_fill_bs_1mv_1ref_non_mbaff + * - isvce_compute_bs + * - isvce_filter_top_edge + * - isvce_filter_left_edge + * - isvce_deblock_mb + * + * @remarks + * None + * + ******************************************************************************* + */ +#include +#include + +#include "ih264e_config.h" +#include "ih264_typedefs.h" +#include "iv2.h" +#include "ive2.h" +#include "isvc_macros.h" +#include "isvc_defs.h" +#include "isvc_structs.h" +#include "ih264_trans_data.h" +#include "isvc_trans_quant_itrans_iquant.h" +#include "isvc_inter_pred_filters.h" +#include "isvc_mem_fns.h" +#include "ih264_padding.h" +#include "ih264_intra_pred_filters.h" +#include "ih264_deblk_edge_filters.h" +#include "isvc_cabac_tables.h" +#include "ih264_deblk_tables.h" +#include "isvce_defs.h" +#include "ih264e_error.h" +#include "ih264e_bitstream.h" +#include "ime_distortion_metrics.h" +#include "ime_defs.h" +#include "ime_structs.h" +#include "irc_cntrl_param.h" +#include "irc_frame_info_collector.h" +#include "isvce_rate_control.h" +#include "isvce_cabac_structs.h" +#include "isvce_structs.h" +#include "isvce_deblk.h" +#include "isvce_globals.h" + +static const UWORD32 gau4_isvce_packed_bs2[(1 << MAX_TU_IN_MB_COL) * 2] = { + /* BS TABLES FOR NORMAL EDGES */ + 0x00000000, 0x02000000, 0x00020000, 0x02020000, 0x00000200, 0x02000200, 0x00020200, 0x02020200, + 0x00000002, 0x02000002, 0x00020002, 0x02020002, 0x00000202, 0x02000202, 0x00020202, 0x02020202, + + /* BS TABLES FOR XTRA LEFT MB EDGES IN MBAFF CASE */ + 0x01010101, 0x02010101, 0x01020101, 0x02020101, 0x01010201, 0x02010201, 0x01020201, 0x02020201, + 0x01010102, 0x02010102, 0x01020102, 0x02020102, 0x01010202, 0x02010202, 0x01020202, 0x02020202}; + +static const UWORD16 gau2_isvce_4x4_v2h_reorder[(1 << MAX_TU_IN_MB_COL)] = { + 0x0000, 0x0001, 0x0010, 0x0011, 0x0100, 0x0101, 0x0110, 0x0111, + 0x1000, 0x1001, 0x1010, 0x1011, 0x1100, 0x1101, 0x1110, 0x1111}; + +static void isvce_fill_bs1_16x16mb_pslice(isvce_mb_info_t *ps_cur_mb, isvce_mb_info_t *ps_top_mb, + isvce_mb_info_t *ps_left_mb, UWORD32 *pu4_bs_table, + coordinates_t *ps_mb_pos) +{ + WORD16 i2_q_mv0, i2_q_mv1; + WORD16 i2_p_mv0, i2_p_mv1; + UWORD32 i; + UWORD32 u4_bs_horz = pu4_bs_table[0]; + UWORD32 u4_bs_vert = pu4_bs_table[4]; + + i2_q_mv0 = ps_cur_mb->as_pu->as_me_info[L0].s_mv.i2_mvx; + i2_q_mv1 = ps_cur_mb->as_pu->as_me_info[L0].s_mv.i2_mvy; + + if(ps_mb_pos->i4_ordinate) + { + /* Computing Bs for the top edge */ + for(i = 0; i < 4; i++) + { + UWORD32 u4_idx = 24 - (i << 3); + + /* check if Bs is already set */ + if(!((u4_bs_horz >> u4_idx) & 0xf)) + { + /************************************************************/ + /* If Bs is not set, use left edge and current edge mvs and */ + /* reference pictures addresses to evaluate Bs==1 */ + /************************************************************/ + UWORD32 u4_bs_temp1; + UWORD32 u4_bs; + + /*********************************************************/ + /* If any motion vector component differs by more than 1 */ + /* integer pel or if reference pictures are different Bs */ + /* is set to 1. Note that this condition shall be met for*/ + /* both (fwd-fwd,bwd-bwd) and (fwd-bwd,bwd-fwd) direction*/ + /*********************************************************/ + i2_p_mv0 = ps_top_mb->as_pu->as_me_info[L0].s_mv.i2_mvx; + i2_p_mv1 = ps_top_mb->as_pu->as_me_info[L0].s_mv.i2_mvy; + + u4_bs_temp1 = + ((ABS((i2_p_mv0 - i2_q_mv0)) >= 4) || (ABS((i2_p_mv1 - i2_q_mv1)) >= 4)); + + u4_bs = ((ps_cur_mb->as_pu->as_me_info[L0].i1_ref_idx != + ps_top_mb->as_pu->as_me_info[L0].i1_ref_idx) || + u4_bs_temp1); + + u4_bs_horz |= (u4_bs << u4_idx); + } + } + + pu4_bs_table[0] = u4_bs_horz; + } + + if(ps_mb_pos->i4_abscissa) + { + /* Computing Bs for the left edge */ + for(i = 0; i < 4; i++) + { + UWORD32 u4_idx = 24 - (i << 3); + + /* check if Bs is already set */ + if(!((u4_bs_vert >> u4_idx) & 0xf)) + { + /* If Bs is not set, evalaute conditions for Bs=1 */ + UWORD32 u4_bs_temp1; + UWORD32 u4_bs; + /*********************************************************/ + /* If any motion vector component differs by more than 1 */ + /* integer pel or if reference pictures are different Bs */ + /* is set to 1. Note that this condition shall be met for*/ + /* both (fwd-fwd,bwd-bwd) and (fwd-bwd,bwd-fwd) direction*/ + /*********************************************************/ + + i2_p_mv0 = ps_left_mb->as_pu->as_me_info[L0].s_mv.i2_mvx; + i2_p_mv1 = ps_left_mb->as_pu->as_me_info[L0].s_mv.i2_mvy; + + u4_bs_temp1 = + ((ABS((i2_p_mv0 - i2_q_mv0)) >= 4) | (ABS((i2_p_mv1 - i2_q_mv1)) >= 4)); + + u4_bs = ((ps_cur_mb->as_pu->as_me_info[L0].i1_ref_idx != + ps_left_mb->as_pu->as_me_info[L0].i1_ref_idx) || + u4_bs_temp1); + + u4_bs_vert |= (u4_bs << u4_idx); + } + } + + pu4_bs_table[4] = u4_bs_vert; + } +} + +static void isvce_fill_bs1_16x16mb_bslice(isvce_mb_info_t *ps_cur_mb, isvce_mb_info_t *ps_top_mb, + isvce_mb_info_t *ps_left_mb, UWORD32 *pu4_bs_table, + coordinates_t *ps_mb_pos) +{ + WORD16 i2_q_mv0, i2_q_mv1, i2_q_mv2, i2_q_mv3; + WORD16 i2_p_mv0, i2_p_mv1, i2_p_mv2, i2_p_mv3; + UWORD32 i; + UWORD32 u4_bs_horz = pu4_bs_table[0]; + UWORD32 u4_bs_vert = pu4_bs_table[4]; + + i2_q_mv0 = ps_cur_mb->as_pu->as_me_info[L0].s_mv.i2_mvx; + i2_q_mv1 = ps_cur_mb->as_pu->as_me_info[L0].s_mv.i2_mvy; + i2_q_mv2 = ps_cur_mb->as_pu->as_me_info[L1].s_mv.i2_mvx; + i2_q_mv3 = ps_cur_mb->as_pu->as_me_info[L1].s_mv.i2_mvy; + + /* Computing Bs for the top edge */ + if(ps_mb_pos->i4_ordinate) + { + for(i = 0; i < 4; i++) + { + UWORD32 u4_idx = 24 - (i << 3); + + /* check if Bs is already set */ + if(!((u4_bs_horz >> u4_idx) & 0xf)) + { + /************************************************************/ + /* If Bs is not set, use left edge and current edge mvs and */ + /* reference pictures addresses to evaluate Bs==1 */ + /************************************************************/ + UWORD32 u4_bs_temp1, u4_bs_temp2; + UWORD32 u4_bs; + + /*********************************************************/ + /* If any motion vector component differs by more than 1 */ + /* integer pel or if reference pictures are different Bs */ + /* is set to 1. Note that this condition shall be met for*/ + /* both (fwd-fwd,bwd-bwd) and (fwd-bwd,bwd-fwd) direction*/ + /*********************************************************/ + i2_p_mv0 = ps_top_mb->as_pu->as_me_info[L0].s_mv.i2_mvx; + i2_p_mv1 = ps_top_mb->as_pu->as_me_info[L0].s_mv.i2_mvy; + i2_p_mv2 = ps_top_mb->as_pu->as_me_info[L1].s_mv.i2_mvx; + i2_p_mv3 = ps_top_mb->as_pu->as_me_info[L1].s_mv.i2_mvy; + + u4_bs_temp1 = + ((ABS((i2_p_mv0 - i2_q_mv0)) >= 4) | (ABS((i2_p_mv1 - i2_q_mv1)) >= 4) | + (ABS((i2_p_mv2 - i2_q_mv2)) >= 4) | (ABS((i2_p_mv3 - i2_q_mv3)) >= 4)); + + u4_bs_temp2 = + ((ABS((i2_p_mv0 - i2_q_mv2)) >= 4) | (ABS((i2_p_mv1 - i2_q_mv3)) >= 4) | + (ABS((i2_p_mv2 - i2_q_mv0)) >= 4) | (ABS((i2_p_mv3 - i2_q_mv1)) >= 4)); + + u4_bs = ((ps_cur_mb->as_pu->as_me_info[L0].i1_ref_idx != + ps_top_mb->as_pu->as_me_info[L0].i1_ref_idx) || + (ps_cur_mb->as_pu->as_me_info[L1].i1_ref_idx != + ps_top_mb->as_pu->as_me_info[L1].i1_ref_idx) || + u4_bs_temp1) && + ((ps_cur_mb->as_pu->as_me_info[L0].i1_ref_idx != + ps_top_mb->as_pu->as_me_info[L1].i1_ref_idx) || + (ps_cur_mb->as_pu->as_me_info[L1].i1_ref_idx != + ps_top_mb->as_pu->as_me_info[L0].i1_ref_idx) || + u4_bs_temp2); + + u4_bs_horz |= (u4_bs << u4_idx); + } + } + + pu4_bs_table[0] = u4_bs_horz; + } + + /* Computing Bs for the left edge */ + if(ps_mb_pos->i4_abscissa) + { + for(i = 0; i < 4; i++) + { + UWORD32 u4_idx = 24 - (i << 3); + + /* check if Bs is already set */ + if(!((u4_bs_vert >> u4_idx) & 0xf)) + { + /* If Bs is not set, evalaute conditions for Bs=1 */ + UWORD32 u4_bs_temp1, u4_bs_temp2; + UWORD32 u4_bs; + /*********************************************************/ + /* If any motion vector component differs by more than 1 */ + /* integer pel or if reference pictures are different Bs */ + /* is set to 1. Note that this condition shall be met for*/ + /* both (fwd-fwd,bwd-bwd) and (fwd-bwd,bwd-fwd) direction*/ + /*********************************************************/ + + i2_p_mv0 = ps_left_mb->as_pu->as_me_info[L0].s_mv.i2_mvx; + i2_p_mv1 = ps_left_mb->as_pu->as_me_info[L0].s_mv.i2_mvy; + i2_p_mv2 = ps_left_mb->as_pu->as_me_info[L1].s_mv.i2_mvx; + i2_p_mv3 = ps_left_mb->as_pu->as_me_info[L1].s_mv.i2_mvy; + + u4_bs_temp1 = + ((ABS((i2_p_mv0 - i2_q_mv0)) >= 4) | (ABS((i2_p_mv1 - i2_q_mv1)) >= 4) | + (ABS((i2_p_mv2 - i2_q_mv2)) >= 4) | (ABS((i2_p_mv3 - i2_q_mv3)) >= 4)); + + u4_bs_temp2 = + ((ABS((i2_p_mv0 - i2_q_mv2)) >= 4) | (ABS((i2_p_mv1 - i2_q_mv3)) >= 4) | + (ABS((i2_p_mv2 - i2_q_mv0)) >= 4) | (ABS((i2_p_mv3 - i2_q_mv1)) >= 4)); + + u4_bs = ((ps_cur_mb->as_pu->as_me_info[L0].i1_ref_idx != + ps_left_mb->as_pu->as_me_info[L0].i1_ref_idx) || + (ps_cur_mb->as_pu->as_me_info[L1].i1_ref_idx != + ps_left_mb->as_pu->as_me_info[L1].i1_ref_idx) || + u4_bs_temp1) && + ((ps_cur_mb->as_pu->as_me_info[L0].i1_ref_idx != + ps_left_mb->as_pu->as_me_info[L1].i1_ref_idx) || + (ps_cur_mb->as_pu->as_me_info[L1].i1_ref_idx != + ps_left_mb->as_pu->as_me_info[L0].i1_ref_idx) || + u4_bs_temp2); + + u4_bs_vert |= (u4_bs << u4_idx); + } + } + + pu4_bs_table[4] = u4_bs_vert; + } +} + +static void isvce_fill_bs2_horz_vert(UWORD32 *pu4_bs, WORD32 u4_left_mb_csbp, WORD32 u4_top_mb_csbp, + WORD32 u4_cur_mb_csbp, coordinates_t *ps_mb_pos, + const UWORD32 *pu4_packed_bs2, + const UWORD16 *pu2_4x4_v2h_reorder) +{ + UWORD32 u4_nbr_horz_csbp, u4_nbr_vert_csbp; + UWORD32 u4_horz_bs2_dec, u4_vert_bs2_dec; + UWORD32 u4_left_mb_masked_csbp, u4_cur_mb_masked_csbp; + + UWORD32 u4_reordered_vert_bs2_dec, u4_temp; + + WORD32 u4_cur_mb_csbp_seq = 0; + WORD32 u4_top_mb_csbp_seq = 0; + WORD32 u4_left_mb_csbp_seq = 0; + + /* Convert the csbp packed data in sequential pattern from raster order */ + u4_cur_mb_csbp_seq |= u4_cur_mb_csbp & 3; // 0 1 + u4_cur_mb_csbp >>= 2; + u4_cur_mb_csbp_seq |= (u4_cur_mb_csbp & 3) << 4; // 4 5 + u4_cur_mb_csbp >>= 2; + u4_cur_mb_csbp_seq |= (u4_cur_mb_csbp & 3) << 2; // 2 3 + u4_cur_mb_csbp >>= 2; + u4_cur_mb_csbp_seq |= (u4_cur_mb_csbp & 3) << 6; // 6 7 + u4_cur_mb_csbp >>= 2; + u4_cur_mb_csbp_seq |= (u4_cur_mb_csbp & 3) << 8; // 8 9 + u4_cur_mb_csbp >>= 2; + u4_cur_mb_csbp_seq |= (u4_cur_mb_csbp & 3) << 12; // 12 13 + u4_cur_mb_csbp >>= 2; + u4_cur_mb_csbp_seq |= (u4_cur_mb_csbp & 3) << 10; // 10 11 + u4_cur_mb_csbp >>= 2; + u4_cur_mb_csbp_seq |= (u4_cur_mb_csbp & 3) << 14; // 14 15 + + u4_left_mb_csbp_seq |= u4_left_mb_csbp & 3; // 0 1 + u4_left_mb_csbp >>= 2; + u4_left_mb_csbp_seq |= (u4_left_mb_csbp & 3) << 4; // 4 5 + u4_left_mb_csbp >>= 2; + u4_left_mb_csbp_seq |= (u4_left_mb_csbp & 3) << 2; // 2 3 + u4_left_mb_csbp >>= 2; + u4_left_mb_csbp_seq |= (u4_left_mb_csbp & 3) << 6; // 6 7 + u4_left_mb_csbp >>= 2; + u4_left_mb_csbp_seq |= (u4_left_mb_csbp & 3) << 8; // 8 9 + u4_left_mb_csbp >>= 2; + u4_left_mb_csbp_seq |= (u4_left_mb_csbp & 3) << 12; // 12 13 + u4_left_mb_csbp >>= 2; + u4_left_mb_csbp_seq |= (u4_left_mb_csbp & 3) << 10; // 10 11 + u4_left_mb_csbp >>= 2; + u4_left_mb_csbp_seq |= (u4_left_mb_csbp & 3) << 14; // 14 15 + + /* Required only the last row of top MB */ + u4_top_mb_csbp = u4_top_mb_csbp >> 10; // 12 13 + u4_top_mb_csbp_seq |= (u4_top_mb_csbp & 3); + u4_top_mb_csbp = u4_top_mb_csbp >> 4; // 14 15 + u4_top_mb_csbp_seq |= ((u4_top_mb_csbp & 3) << 2); + + /* u4_nbr_horz_csbp=11C|10C|9C|8C|7C|6C|5C|4C|3C|2C|1C|0C|15T|14T|13T|12T */ + u4_nbr_horz_csbp = (u4_cur_mb_csbp_seq << 4) | u4_top_mb_csbp_seq; + u4_horz_bs2_dec = u4_cur_mb_csbp_seq | u4_nbr_horz_csbp; + + /* u4_left_mb_masked_csbp = 15L|0|0|0|11L|0|0|0|7L|0|0|0|3L|0|0|0 */ + u4_left_mb_masked_csbp = u4_left_mb_csbp_seq & CSBP_RIGHT_BLOCK_MASK; + + /* u4_cur_mb_masked_csbp =14C|13C|12C|x|10C|9C|8C|x|6C|5C|4C|x|2C|1C|0C|x */ + u4_cur_mb_masked_csbp = (u4_cur_mb_csbp_seq << 1) & (~CSBP_LEFT_BLOCK_MASK); + + /* u4_nbr_vert_csbp=14C|13C|12C|15L|10C|9C|8C|11L|6C|5C|4C|7L|2C|1C|0C|3L */ + u4_nbr_vert_csbp = (u4_cur_mb_masked_csbp) | (u4_left_mb_masked_csbp >> 3); + + u4_vert_bs2_dec = u4_cur_mb_csbp_seq | u4_nbr_vert_csbp; + + /* Fill horz edges (0,1,2,3) boundary strengths 2 using look up table */ + if(ps_mb_pos->i4_ordinate) + { + pu4_bs[0] = pu4_packed_bs2[u4_horz_bs2_dec & 0xF]; + } + + pu4_bs[1] = pu4_packed_bs2[(u4_horz_bs2_dec >> 4) & 0xF]; + pu4_bs[2] = pu4_packed_bs2[(u4_horz_bs2_dec >> 8) & 0xF]; + pu4_bs[3] = pu4_packed_bs2[(u4_horz_bs2_dec >> 12) & 0xF]; + + /* Do 4x4 tranpose of u4_vert_bs2_dec by using look up table for reorder */ + u4_reordered_vert_bs2_dec = pu2_4x4_v2h_reorder[u4_vert_bs2_dec & 0xF]; + u4_temp = pu2_4x4_v2h_reorder[(u4_vert_bs2_dec >> 4) & 0xF]; + u4_reordered_vert_bs2_dec |= (u4_temp << 1); + u4_temp = pu2_4x4_v2h_reorder[(u4_vert_bs2_dec >> 8) & 0xF]; + u4_reordered_vert_bs2_dec |= (u4_temp << 2); + u4_temp = pu2_4x4_v2h_reorder[(u4_vert_bs2_dec >> 12) & 0xF]; + u4_reordered_vert_bs2_dec |= (u4_temp << 3); + + /* Fill vert edges (4,5,6,7) boundary strengths 2 using look up table */ + if(ps_mb_pos->i4_abscissa) + { + pu4_bs[4] = pu4_packed_bs2[u4_reordered_vert_bs2_dec & 0xF]; + } + + pu4_bs[5] = pu4_packed_bs2[(u4_reordered_vert_bs2_dec >> 4) & 0xF]; + pu4_bs[6] = pu4_packed_bs2[(u4_reordered_vert_bs2_dec >> 8) & 0xF]; + pu4_bs[7] = pu4_packed_bs2[(u4_reordered_vert_bs2_dec >> 12) & 0xF]; +} + +/* brief Fills the BS for edges falling on a IBL boundary */ +static void isvce_fill_bs_ibl(isvce_mb_info_t *ps_cur_mb, isvce_mb_info_t *ps_top_mb, + isvce_mb_info_t *ps_left_mb, UWORD32 *pu4_bs_table) +{ + /*! Flow of the module is as follows */ + /*! 1. checks if MB edge is falling on IBL boundary */ + /*! 2. if only Mb edge then it fills the BS based on INTRA or INTER + stauts */ + /*! 3. if the current MB is IBL and neighbours are also neighbours + then it uses the current layer t_coeff flag to decide the + BS of a particular edge */ + /*! 4. fills the BS for all the edges in curretn MB if IBL */ + + UWORD16 u2_top_horz_nnz; + UWORD8 u1_top_mb_ibl, u1_left_mb_ibl; + UWORD32 i4_i, i4_edge; + UWORD8 u1_bs; + UWORD8 u1_cnd; + UWORD8 u1_top_intra; + UWORD8 u1_left_intra; + UWORD8 u1_p_nnz, u1_q_nnz; + UWORD8 u1_curr_mb_ibl; + UWORD16 u2_curr_nnz; + UWORD8 u1_left_mb_nnz = 0, u1_left_nnz; + WORD32 i4_horz_start = 0; + WORD32 i4_vertical_start = 0; + + u1_top_mb_ibl = ps_top_mb ? (ps_top_mb->u1_base_mode_flag && ps_top_mb->u1_is_intra) : 0; + u1_left_mb_ibl = ps_left_mb ? (ps_left_mb->u1_base_mode_flag && ps_left_mb->u1_is_intra) : 0; + + u1_curr_mb_ibl = ps_cur_mb ? (ps_cur_mb->u1_base_mode_flag && ps_cur_mb->u1_is_intra) : 0; + + u1_top_intra = ps_top_mb ? ps_top_mb->u1_is_intra : 0; + u1_left_intra = ps_left_mb ? ps_left_mb->u1_is_intra : 0; + + /* return if none of the current top and left is IBL */ + if((0 == u1_curr_mb_ibl) && (0 == u1_top_mb_ibl) && (0 == u1_left_mb_ibl)) + { + return; + } + + /* set up the vertical and horz MB edge skip flags */ + if(0 != u1_curr_mb_ibl) + { + /* if top is not IBL */ + if(0 == u1_top_mb_ibl) + { + i4_horz_start = 1; + } + + /* if left in not IBL */ + if(0 == u1_left_mb_ibl) + { + i4_vertical_start = 1; + } + } + + /* Fill BS for mb egdex assuming non IBL case */ + + /* only the MB edges fall across IBL boundary */ + if((0 != u1_curr_mb_ibl) || (0 != u1_top_mb_ibl) || (0 != u1_left_mb_ibl)) + { + UWORD16 u2_temp, u2_i, u1_i; + u2_temp = ps_left_mb ? ps_left_mb->u4_res_csbp : 0; + for(u2_i = 0; u2_i < MAX_TU_IN_MB_COL; u2_i++) + { + UWORD8 u1_zscan_idx = gau1_raster_to_zscan_map[u2_i * 4 + MAX_TU_IN_MB_ROW - 1]; + u1_left_mb_nnz |= ((u2_temp & (1 << u1_zscan_idx)) ? 1 << u2_i : 0); + } + + u2_curr_nnz = ps_cur_mb->u4_res_csbp; + + u2_top_horz_nnz = 0; + if(ps_top_mb) + { + /* last row of top MB */ + for(u1_i = 12; u1_i < 16; u1_i++) + { + UWORD8 u1_zscan_idx = gau1_raster_to_zscan_map[u1_i]; + u2_top_horz_nnz |= + ((ps_top_mb->u4_res_csbp & (1 << u1_zscan_idx)) ? 1 << (u1_i - 12) : 0); + } + } + else + { + u2_top_horz_nnz = 0; + } + + /* top is intra and not ibl */ + if(0 != u1_top_intra) + { + pu4_bs_table[0] = 0x04040404; + } + /* left is intra and not ibl */ + if(0 != u1_left_intra) + { + pu4_bs_table[4] = 0x04040404; + } + + /* assume neighbours are inter and update bs */ + /* Edge = 0 means Vert Edges and Edge = 1 means Horz edges */ + for(i4_edge = 0; i4_edge < 2; i4_edge++) + { + UWORD8 u1_p_nnz = 0, u1_q_nnz = 0; + UWORD32 u4_bs_edge = 0; + WORD32 i4_bit_mask; + WORD32 i4_curr_intra_flag; + WORD32 i4_neibor_intra_flag; + + if(((1 == i4_horz_start) && (i4_edge == 1))) continue; + if(((1 == i4_vertical_start) && (i4_edge == 0))) continue; + + i4_curr_intra_flag = (0 != u1_curr_mb_ibl); + + if(0 != i4_edge) + { + /* initialize for the TOP edge */ + u1_p_nnz = (UWORD8) u2_top_horz_nnz; + for(i4_i = 0; i4_i < MAX_TU_IN_MB_ROW; i4_i++) + { + UWORD8 u1_zscan_idx = gau1_raster_to_zscan_map[i4_i]; + u1_q_nnz |= ((u2_curr_nnz & (1 << u1_zscan_idx)) ? (1 << i4_i) : 0); + } + + i4_neibor_intra_flag = (u1_top_mb_ibl || u1_top_intra); + } + else + { + u1_p_nnz = u1_left_mb_nnz; + for(u2_i = 0; u2_i < MAX_TU_IN_MB_COL; u2_i++) + { + UWORD8 u1_zscan_idx = gau1_raster_to_zscan_map[u2_i * 4]; + u1_q_nnz |= ((u2_curr_nnz & (1 << u1_zscan_idx)) ? 1 << u2_i : 0); + } + + i4_neibor_intra_flag = (u1_left_mb_ibl || u1_left_intra); + } + + i4_bit_mask = 1; + /* find bs of 4 edges */ + for(i4_i = 0; i4_i < 4; i4_i++) + { + UWORD8 u1_p_nnz_temp, u1_q_nnz_temp; + + u1_p_nnz_temp = (u1_p_nnz & i4_bit_mask); + u1_q_nnz_temp = (u1_q_nnz & i4_bit_mask); + + u1_cnd = ((u1_p_nnz_temp && (!i4_neibor_intra_flag)) || + (u1_q_nnz_temp && (!i4_curr_intra_flag))); + + u1_bs = u1_cnd ? 2 : 1; + + /* update the bs of the edge */ + u4_bs_edge = (u4_bs_edge << 8) + u1_bs; + i4_bit_mask <<= 1; + + } /* end of loop over blk edges */ + + /* update the bs of edges */ + if(i4_edge && !u1_top_intra) + { + pu4_bs_table[0] = u4_bs_edge; + } + else if(!i4_edge && !u1_left_intra) + { + pu4_bs_table[4] = u4_bs_edge; + } + } /* end of loop over v1 vetical and horizontal edge */ + } + + /* current MB is IBL */ + if(0 != u1_curr_mb_ibl) + { + WORD32 i4_bit_mask_edge = 1; + UWORD16 u2_temp, u2_i, u1_i; + + u1_left_mb_nnz = 0; + u2_temp = ps_left_mb ? ps_left_mb->u4_csbp : 0; + for(u2_i = 0; u2_i < MAX_TU_IN_MB_COL; u2_i++) + { + UWORD8 u1_zscan_idx = gau1_raster_to_zscan_map[u2_i * 4 + MAX_TU_IN_MB_ROW - 1]; + u1_left_mb_nnz |= ((u2_temp & (1 << u1_zscan_idx)) ? 1 << u2_i : 0); + } + + u2_curr_nnz = ps_cur_mb->u4_csbp; + + u2_top_horz_nnz = 0; + if(ps_top_mb) + { + for(u1_i = 12; u1_i < 16; u1_i++) + { + UWORD8 u1_zscan_idx = gau1_raster_to_zscan_map[u1_i]; + u2_top_horz_nnz |= + ((ps_top_mb->u4_csbp & (1 << u1_zscan_idx)) ? 1 << (u1_i - 12) : 0); + } + } + else + { + u2_top_horz_nnz = 0; + } + + /* all are IBL edges then use only t_coeff of current layer */ + /* loop over all edges */ + for(i4_edge = 0; i4_edge < 4; i4_edge++) + { + UWORD16 u2_curr_horz_nnz = 0; + WORD32 i4_bit_mask = 1; + + u1_left_nnz = (u1_left_mb_nnz & i4_bit_mask_edge); + + for(i4_i = 0; i4_i < 4; i4_i++) + { + UWORD8 u1_curr_nnz, u1_top_nnz; + UWORD8 u1_zscan_idx = gau1_raster_to_zscan_map[(4 * i4_edge) + i4_i]; + + u2_curr_horz_nnz |= ((ps_cur_mb->u4_csbp & (1 << u1_zscan_idx)) ? (1 << i4_i) : 0); + u1_curr_nnz = (u2_curr_horz_nnz & i4_bit_mask); + u1_top_nnz = (u2_top_horz_nnz & i4_bit_mask); + + /* update bs horizontal */ + if(!((1 == i4_horz_start) && (0 == i4_edge))) + { + u1_p_nnz = u1_top_nnz; + u1_q_nnz = u1_curr_nnz; + u1_cnd = !(u1_p_nnz || u1_q_nnz); + u1_bs = u1_cnd ? 0 : 1; + pu4_bs_table[i4_edge] = (pu4_bs_table[i4_edge] << 8) + u1_bs; + } + + /* update bs vertical */ + if(!((1 == i4_vertical_start) && (0 == i4_i))) + { + u1_p_nnz = u1_left_nnz; + u1_q_nnz = u1_curr_nnz; + u1_cnd = !(u1_p_nnz || u1_q_nnz); + u1_bs = u1_cnd ? 0 : 1; + pu4_bs_table[i4_i + 4] = (pu4_bs_table[i4_i + 4] << 8) + u1_bs; + } + /* store the current nnz to left nnz */ + u1_left_nnz = u1_curr_nnz; + i4_bit_mask <<= 1; + } + /* store the current row nnz to top row nnz */ + u2_top_horz_nnz = u2_curr_horz_nnz; + i4_bit_mask_edge <<= 1; + } + } +} + +void isvce_compute_bs(isvce_process_ctxt_t *ps_proc, UWORD8 u1_inter_layer_deblk_flag) +{ + coordinates_t s_mb_pos; + + UWORD32 *pu4_pic_vert_bs; + UWORD32 *pu4_pic_horz_bs; + + isvce_bs_ctxt_t *ps_bs = &(ps_proc->s_deblk_ctxt.s_bs_ctxt); + block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl; + nbr_info_t *ps_nbr_info = &ps_proc->s_nbr_info; + isvce_mb_info_t *ps_left_mb = ps_ngbr_avbl->u1_mb_a ? ps_nbr_info->ps_left_mb_info : NULL; + isvce_mb_info_t *ps_top_mb = + ps_ngbr_avbl->u1_mb_b ? &ps_nbr_info->ps_top_row_mb_info[ps_bs->i4_mb_x] : NULL; + isvce_mb_info_t *ps_cur_mb = ps_proc->ps_mb_info; + + UWORD32 u1_left_mb_intra, u1_left_mb_ibl; + + UWORD16 u2_left_csbp, u2_top_csbp, u2_cur_csbp; + + UWORD32 u4_cur_mb_intra, u1_top_mb_intra, u4_cur_mb_fld; + UWORD32 u4_cur_mb_ibl, u1_top_mb_ibl; + UWORD32 au4_bs_table[8]; + UWORD32 *pu4_bs_table; + + u4_cur_mb_intra = ps_cur_mb->u1_is_intra; + u4_cur_mb_ibl = ps_cur_mb->u1_base_mode_flag && ps_cur_mb->u1_is_intra; + u4_cur_mb_fld = 0; + + u1_top_mb_intra = ps_top_mb ? ps_top_mb->u1_is_intra : 0; + u1_top_mb_ibl = ps_top_mb ? (ps_top_mb->u1_base_mode_flag && ps_top_mb->u1_is_intra) : 0; + + u1_left_mb_intra = ps_left_mb ? ps_left_mb->u1_is_intra : 0; + u1_left_mb_ibl = ps_left_mb ? (ps_left_mb->u1_base_mode_flag && ps_left_mb->u1_is_intra) : 0; + + pu4_bs_table = au4_bs_table; + memset(pu4_bs_table, 0, sizeof(pu4_bs_table[0]) * NUM_EDGES_IN_MB * 2); + + s_mb_pos.i4_abscissa = ps_bs->i4_mb_x; + s_mb_pos.i4_ordinate = ps_bs->i4_mb_y; + + if(!u1_inter_layer_deblk_flag) + { + pu4_pic_vert_bs = + ps_bs->pu4_pic_vert_bs + + ((s_mb_pos.i4_ordinate * ps_proc->i4_wd_mbs) + s_mb_pos.i4_abscissa) * NUM_EDGES_IN_MB; + pu4_pic_horz_bs = + ps_bs->pu4_pic_horz_bs + + ((s_mb_pos.i4_ordinate * ps_proc->i4_wd_mbs) + s_mb_pos.i4_abscissa) * NUM_EDGES_IN_MB; + } + else + { + pu4_pic_vert_bs = + ps_bs->pu4_intra_base_vert_bs + + ((s_mb_pos.i4_ordinate * ps_proc->i4_wd_mbs) + s_mb_pos.i4_abscissa) * NUM_EDGES_IN_MB; + pu4_pic_horz_bs = + ps_bs->pu4_intra_base_horz_bs + + ((s_mb_pos.i4_ordinate * ps_proc->i4_wd_mbs) + s_mb_pos.i4_abscissa) * NUM_EDGES_IN_MB; + } + + if(u4_cur_mb_intra && !(u4_cur_mb_ibl)) + { + pu4_bs_table[4] = ps_bs->i4_mb_x ? 0x04040404 : 0; + pu4_bs_table[0] = ps_bs->i4_mb_y ? 0x04040404 : 0; + pu4_bs_table[1] = 0x03030303; + pu4_bs_table[2] = 0x03030303; + pu4_bs_table[3] = 0x03030303; + pu4_bs_table[5] = 0x03030303; + pu4_bs_table[6] = 0x03030303; + pu4_bs_table[7] = 0x03030303; + } + else + { + isvce_fill_bs_ibl(ps_cur_mb, ps_top_mb, ps_left_mb, pu4_bs_table); + + if(!u4_cur_mb_ibl) + { + UWORD32 u4_bs_0, u4_bs_4; + + UWORD32 u4_is_b = (ps_proc->i4_slice_type == BSLICE); + + u2_cur_csbp = ps_cur_mb->u4_csbp; + u2_left_csbp = ps_left_mb ? ps_left_mb->u4_csbp : 0; + u2_top_csbp = ps_top_mb ? ps_top_mb->u4_csbp : 0; + + u2_cur_csbp |= (ps_cur_mb->u4_res_csbp); + u2_left_csbp |= ps_left_mb ? ps_left_mb->u4_res_csbp : 0; + u2_top_csbp |= ps_top_mb ? ps_top_mb->u4_res_csbp : 0; + + u4_bs_0 = pu4_bs_table[0]; + u4_bs_4 = pu4_bs_table[4]; + + isvce_fill_bs2_horz_vert(pu4_bs_table, u2_left_csbp, u2_top_csbp, u2_cur_csbp, + &s_mb_pos, (gau4_isvce_packed_bs2), + (gau2_isvce_4x4_v2h_reorder)); + + if(u1_left_mb_intra) + { + pu4_bs_table[4] = 0x04040404; + } + else if(u1_left_mb_ibl) + { + pu4_bs_table[4] = u4_bs_4; + } + + if(u1_top_mb_intra) + { + pu4_bs_table[0] = u4_cur_mb_fld ? 0x03030303 : 0x04040404; + } + else if(u1_top_mb_ibl) + { + pu4_bs_table[0] = u4_bs_0; + } + + if(!u4_is_b) + { + isvce_fill_bs1_16x16mb_pslice(ps_cur_mb, ps_top_mb, ps_left_mb, pu4_bs_table, + &s_mb_pos); + } + else + { + isvce_fill_bs1_16x16mb_bslice(ps_cur_mb, ps_top_mb, ps_left_mb, pu4_bs_table, + &s_mb_pos); + } + } + } + + pu4_pic_horz_bs[0] = pu4_bs_table[0]; + pu4_pic_horz_bs[1] = pu4_bs_table[1]; + pu4_pic_horz_bs[2] = pu4_bs_table[2]; + pu4_pic_horz_bs[3] = pu4_bs_table[3]; + + pu4_pic_vert_bs[0] = pu4_bs_table[4]; + pu4_pic_vert_bs[1] = pu4_bs_table[5]; + pu4_pic_vert_bs[2] = pu4_bs_table[6]; + pu4_pic_vert_bs[3] = pu4_bs_table[7]; +} + +/** +******************************************************************************* +* +* @brief This function performs deblocking of top horizontal edge +* +* @par Description: +* This function performs deblocking of top horizontal edge +* +* @param[in] ps_codec +* pointer to codec context +* +* @param[in] ps_proc +* pointer to proc context +* +* @param[in] pu1_mb_qp +* pointer to mb quantization param +* +* @param[in] pu1_cur_pic_luma +* pointer to recon buffer luma +* +* @param[in] pu1_cur_pic_chroma +* pointer to recon buffer chroma +* +* @param[in] pu4_pic_horz_bs +* pointer to horizontal blocking strength +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +static void isvce_filter_top_edge(isvce_codec_t *ps_codec, UWORD8 u1_qp_p, UWORD8 u1_qp_q, + UWORD8 *pu1_cur_pic_luma, WORD32 i4_luma_stride, + UWORD8 *pu1_cur_pic_chroma, WORD32 i4_chroma_stride, + UWORD32 *pu4_pic_horz_bs) +{ + UWORD32 u4_alpha_luma, u4_beta_luma, u4_qp_luma, u4_idx_A_luma, u4_idx_B_luma; + UWORD32 u4_alpha_chroma, u4_beta_chroma, u4_qp_chroma, u4_idx_A_chroma, u4_idx_B_chroma; + + /********/ + /* luma */ + /********/ + u4_qp_luma = (u1_qp_p + u1_qp_q + 1) >> 1; + + /* filter offset A and filter offset B have to be received from slice header + */ + /* TODO : for now lets set these offsets as zero */ + + u4_idx_A_luma = MIN(51, u4_qp_luma + 0); + u4_idx_B_luma = MIN(51, u4_qp_luma + 0); + + /* alpha, beta computation */ + u4_alpha_luma = gu1_ih264_alpha_table[u4_idx_A_luma]; + u4_beta_luma = gu1_ih264_beta_table[u4_idx_B_luma]; + + /**********/ + /* chroma */ + /**********/ + u4_qp_chroma = (gu1_qpc_fqpi[u1_qp_p] + gu1_qpc_fqpi[u1_qp_q] + 1) >> 1; + + /* filter offset A and filter offset B have to be received from slice header + */ + /* TODO : for now lets set these offsets as zero */ + + u4_idx_A_chroma = MIN(51, u4_qp_chroma + 0); + u4_idx_B_chroma = MIN(51, u4_qp_chroma + 0); + + /* alpha, beta computation */ + u4_alpha_chroma = gu1_ih264_alpha_table[u4_idx_A_chroma]; + u4_beta_chroma = gu1_ih264_beta_table[u4_idx_B_chroma]; + + /* deblk edge */ + /* top Horizontal edge - allowed to be deblocked ? */ + if(pu4_pic_horz_bs[0] == 0x04040404) + { + /* strong filter */ + ps_codec->pf_deblk_luma_horz_bs4(pu1_cur_pic_luma, i4_luma_stride, u4_alpha_luma, + u4_beta_luma); + ps_codec->pf_deblk_chroma_horz_bs4(pu1_cur_pic_chroma, i4_chroma_stride, u4_alpha_chroma, + u4_beta_chroma, u4_alpha_chroma, u4_beta_chroma); + } + else + { + /* normal filter */ + ps_codec->pf_deblk_luma_horz_bslt4(pu1_cur_pic_luma, i4_luma_stride, u4_alpha_luma, + u4_beta_luma, pu4_pic_horz_bs[0], + gu1_ih264_clip_table[u4_idx_A_luma]); + + ps_codec->pf_deblk_chroma_horz_bslt4( + pu1_cur_pic_chroma, i4_chroma_stride, u4_alpha_chroma, u4_beta_chroma, u4_alpha_chroma, + u4_beta_chroma, pu4_pic_horz_bs[0], gu1_ih264_clip_table[u4_idx_A_chroma], + gu1_ih264_clip_table[u4_idx_A_chroma]); + } +} + +/** +******************************************************************************* +* +* @brief This function performs deblocking of left vertical edge +* +* @par Description: +* This function performs deblocking of top horizontal edge +* +* @param[in] ps_codec +* pointer to codec context +* +* @param[in] ps_proc +* pointer to proc context +* +* @param[in] pu1_mb_qp +* pointer to mb quantization param +* +* @param[in] pu1_cur_pic_luma +* pointer to recon buffer luma +* +* @param[in] pu1_cur_pic_chroma +* pointer to recon buffer chroma +* +* @param[in] pu4_pic_vert_bs +* pointer to vertical blocking strength +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +static void isvce_filter_left_edge(isvce_codec_t *ps_codec, UWORD8 u1_qp_p, UWORD8 u1_qp_q, + UWORD8 *pu1_cur_pic_luma, WORD32 i4_luma_stride, + UWORD8 *pu1_cur_pic_chroma, WORD32 i4_chroma_stride, + UWORD32 *pu4_pic_vert_bs) +{ + UWORD32 u4_alpha_luma, u4_beta_luma, u4_qp_luma, u4_idx_A_luma, u4_idx_B_luma; + UWORD32 u4_alpha_chroma, u4_beta_chroma, u4_qp_chroma, u4_idx_A_chroma, u4_idx_B_chroma; + + /********/ + /* luma */ + /********/ + u4_qp_luma = (u1_qp_p + u1_qp_q + 1) >> 1; + + /* filter offset A and filter offset B have to be received from slice header + */ + /* TODO : for now lets set these offsets as zero */ + + u4_idx_A_luma = MIN(51, u4_qp_luma + 0); + u4_idx_B_luma = MIN(51, u4_qp_luma + 0); + + /* alpha, beta computation */ + u4_alpha_luma = gu1_ih264_alpha_table[u4_idx_A_luma]; + u4_beta_luma = gu1_ih264_beta_table[u4_idx_B_luma]; + + /**********/ + /* chroma */ + /**********/ + u4_qp_chroma = (gu1_qpc_fqpi[u1_qp_p] + gu1_qpc_fqpi[u1_qp_q] + 1) >> 1; + + /* filter offset A and filter offset B have to be received from slice header + */ + /* TODO : for now lets set these offsets as zero */ + + u4_idx_A_chroma = MIN(51, u4_qp_chroma + 0); + u4_idx_B_chroma = MIN(51, u4_qp_chroma + 0); + + /* alpha, beta computation */ + u4_alpha_chroma = gu1_ih264_alpha_table[u4_idx_A_chroma]; + u4_beta_chroma = gu1_ih264_beta_table[u4_idx_B_chroma]; + + /* deblk edge */ + if(pu4_pic_vert_bs[0] == 0x04040404) + { + /* strong filter */ + ps_codec->pf_deblk_luma_vert_bs4(pu1_cur_pic_luma, i4_luma_stride, u4_alpha_luma, + u4_beta_luma); + ps_codec->pf_deblk_chroma_vert_bs4(pu1_cur_pic_chroma, i4_chroma_stride, u4_alpha_chroma, + u4_beta_chroma, u4_alpha_chroma, u4_beta_chroma); + } + else + { + /* normal filter */ + ps_codec->pf_deblk_luma_vert_bslt4(pu1_cur_pic_luma, i4_luma_stride, u4_alpha_luma, + u4_beta_luma, pu4_pic_vert_bs[0], + gu1_ih264_clip_table[u4_idx_A_luma]); + + ps_codec->pf_deblk_chroma_vert_bslt4( + pu1_cur_pic_chroma, i4_chroma_stride, u4_alpha_chroma, u4_beta_chroma, u4_alpha_chroma, + u4_beta_chroma, pu4_pic_vert_bs[0], gu1_ih264_clip_table[u4_idx_A_chroma], + gu1_ih264_clip_table[u4_idx_A_chroma]); + } +} + +static UWORD8 isvce_get_deblk_mb_qp(isvce_process_ctxt_t *ps_proc, coordinates_t *ps_mb_pos) +{ + UWORD8 u1_mb_qp; + + isvce_deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt; + isvce_bs_ctxt_t *ps_bs_ctxt = &ps_deblk->s_bs_ctxt; + coordinates_t s_cur_mb_pos = {ps_deblk->i4_mb_x, ps_deblk->i4_mb_y}; + + UWORD32 u4_mb_idx = ps_mb_pos->i4_abscissa + ps_mb_pos->i4_ordinate * ps_proc->i4_wd_mbs; + + if((s_cur_mb_pos.i4_abscissa != ps_mb_pos->i4_abscissa) || + (s_cur_mb_pos.i4_ordinate != ps_mb_pos->i4_ordinate)) + { + u1_mb_qp = ps_bs_ctxt->pu1_pic_qp[u4_mb_idx]; + } + else + { + isvce_mb_info_t *ps_mb_info = + ps_proc->ps_cur_mv_buf->ps_svc_layer_data[ps_proc->u1_spatial_layer_id].ps_mb_info + + u4_mb_idx; + + if((0 == ps_mb_pos->i4_abscissa) && (0 == ps_mb_pos->i4_ordinate)) + { + u1_mb_qp = ps_mb_info->u1_mb_qp; + } + else + { + if((ps_mb_info->u4_cbp > 0) || (I16x16 == ps_mb_info->u2_mb_type)) + { + u1_mb_qp = ps_mb_info->u1_mb_qp; + } + else + { + u1_mb_qp = ps_bs_ctxt->pu1_pic_qp[u4_mb_idx - 1]; + } + } + } + + return u1_mb_qp; +} + +/** +******************************************************************************* +* +* @brief This function performs deblocking on an mb +* +* @par Description: +* This function performs deblocking on an mb +* +* @param[in] ps_proc +* process context corresponding to the job +* +* @param[in] ps_deblk +* pointer to deblock context +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +void isvce_deblock_mb(isvce_process_ctxt_t *ps_proc, isvce_deblk_ctxt_t *ps_deblk, + UWORD8 u1_inter_layer_deblk_flag) +{ + UWORD8 u1_mb_a, u1_mb_b; + UWORD32 *pu4_pic_vert_bs; + UWORD32 *pu4_pic_horz_bs; + UWORD8 u1_cur_mb_qp; + UWORD8 u1_left_mb_qp; + UWORD8 u1_top_mb_qp; + UWORD32 u4_alpha_luma, u4_beta_luma, u4_idx_A_luma, u4_idx_B_luma; + UWORD32 u4_alpha_chroma, u4_beta_chroma, u4_qp_chroma, u4_idx_A_chroma, u4_idx_B_chroma; + + isvce_codec_t *ps_codec = ps_proc->ps_codec; + coordinates_t s_cur_mb_pos = {ps_deblk->i4_mb_x, ps_deblk->i4_mb_y}; + coordinates_t s_left_mb_pos = {ps_deblk->i4_mb_x - 1, ps_deblk->i4_mb_y}; + coordinates_t s_top_mb_pos = {ps_deblk->i4_mb_x, ps_deblk->i4_mb_y - 1}; + + WORD32 i4_mb_x = ps_deblk->i4_mb_x, i4_mb_y = ps_deblk->i4_mb_y; + WORD32 i4_luma_stride = ps_deblk->s_rec_pic_buf_props.as_component_bufs[0].i4_data_stride; + UWORD8 *pu1_cur_pic_luma = + (UWORD8 *) (ps_deblk->s_rec_pic_buf_props.as_component_bufs[0].pv_data) + + (i4_mb_x * MB_SIZE) + ((i4_mb_y * MB_SIZE) * i4_luma_stride); + WORD32 i4_chroma_stride = ps_deblk->s_rec_pic_buf_props.as_component_bufs[1].i4_data_stride; + UWORD8 *pu1_cur_pic_chroma = + (UWORD8 *) (ps_deblk->s_rec_pic_buf_props.as_component_bufs[1].pv_data) + + (i4_mb_x * MB_SIZE) + (i4_mb_y * (MB_SIZE / 2) * i4_chroma_stride); + UWORD32 push_ptr = (i4_mb_y * ps_proc->i4_wd_mbs) + i4_mb_x; + + if(!u1_inter_layer_deblk_flag) + { + pu4_pic_vert_bs = ps_deblk->s_bs_ctxt.pu4_pic_vert_bs; + pu4_pic_horz_bs = ps_deblk->s_bs_ctxt.pu4_pic_horz_bs; + } + else + { + pu4_pic_vert_bs = ps_deblk->s_bs_ctxt.pu4_intra_base_vert_bs; + pu4_pic_horz_bs = ps_deblk->s_bs_ctxt.pu4_intra_base_horz_bs; + } + + /* derive neighbor availability */ + /* In slice mode the edges of mbs that lie on the slice boundary are not + * deblocked */ + /* deblocking filter idc '2' */ + if(ps_codec->s_cfg.e_slice_mode != IVE_SLICE_MODE_NONE) + { + /* slice index */ + UWORD8 *pu1_slice_idx = ps_deblk->pu1_slice_idx; + + pu1_slice_idx += (i4_mb_y * ps_proc->i4_wd_mbs); + /* left macroblock availability */ + u1_mb_a = (i4_mb_x == 0 || (pu1_slice_idx[i4_mb_x - 1] != pu1_slice_idx[i4_mb_x])) ? 0 : 1; + /* top macroblock availability */ + u1_mb_b = (i4_mb_y == 0 || + (pu1_slice_idx[i4_mb_x - ps_proc->i4_wd_mbs] != pu1_slice_idx[i4_mb_x])) + ? 0 + : 1; + } + else + { + /* left macroblock availability */ + u1_mb_a = (i4_mb_x == 0) ? 0 : 1; + /* top macroblock availability */ + u1_mb_b = (i4_mb_y == 0) ? 0 : 1; + } + + pu4_pic_vert_bs += push_ptr * NUM_EDGES_IN_MB; + pu4_pic_horz_bs += push_ptr * NUM_EDGES_IN_MB; + + /********/ + /* luma */ + /********/ + u1_cur_mb_qp = isvce_get_deblk_mb_qp(ps_proc, &s_cur_mb_pos); + ps_deblk->s_bs_ctxt.pu1_pic_qp[push_ptr] = u1_cur_mb_qp; + + /* filter offset A and filter offset B have to be received from slice header + */ + /* TODO : for now lets set these offsets as zero */ + + u4_idx_A_luma = MIN(51, u1_cur_mb_qp + 0); + u4_idx_B_luma = MIN(51, u1_cur_mb_qp + 0); + + /* alpha, beta computation */ + u4_alpha_luma = gu1_ih264_alpha_table[u4_idx_A_luma]; + u4_beta_luma = gu1_ih264_beta_table[u4_idx_B_luma]; + + /**********/ + /* chroma */ + /**********/ + u4_qp_chroma = gu1_qpc_fqpi[u1_cur_mb_qp]; + + /* filter offset A and filter offset B have to be received from slice header + */ + /* TODO : for now lets set these offsets as zero */ + + u4_idx_A_chroma = MIN(51, u4_qp_chroma + 0); + u4_idx_B_chroma = MIN(51, u4_qp_chroma + 0); + + /* alpha, beta computation */ + u4_alpha_chroma = gu1_ih264_alpha_table[u4_idx_A_chroma]; + u4_beta_chroma = gu1_ih264_beta_table[u4_idx_B_chroma]; + + /* Deblock vertical edges */ + /* left vertical edge 0 - allowed to be deblocked ? */ + if(u1_mb_a) + { + u1_left_mb_qp = isvce_get_deblk_mb_qp(ps_proc, &s_left_mb_pos); + + isvce_filter_left_edge(ps_codec, u1_left_mb_qp, u1_cur_mb_qp, pu1_cur_pic_luma, + i4_luma_stride, pu1_cur_pic_chroma, i4_chroma_stride, + pu4_pic_vert_bs); + } + + /* vertical edge 1 */ + if(pu4_pic_vert_bs[1] == 0x04040404) + { + /* strong filter */ + ps_codec->pf_deblk_luma_vert_bs4(pu1_cur_pic_luma + 4, i4_luma_stride, u4_alpha_luma, + u4_beta_luma); + } + else + { + /* normal filter */ + ps_codec->pf_deblk_luma_vert_bslt4(pu1_cur_pic_luma + 4, i4_luma_stride, u4_alpha_luma, + u4_beta_luma, pu4_pic_vert_bs[1], + gu1_ih264_clip_table[u4_idx_A_luma]); + } + + /* vertical edge 2 */ + if(pu4_pic_vert_bs[2] == 0x04040404) + { + /* strong filter */ + ps_codec->pf_deblk_luma_vert_bs4(pu1_cur_pic_luma + 8, i4_luma_stride, u4_alpha_luma, + u4_beta_luma); + ps_codec->pf_deblk_chroma_vert_bs4(pu1_cur_pic_chroma + 8, i4_chroma_stride, + u4_alpha_chroma, u4_beta_chroma, u4_alpha_chroma, + u4_beta_chroma); + } + else + { + /* normal filter */ + ps_codec->pf_deblk_luma_vert_bslt4(pu1_cur_pic_luma + 8, i4_luma_stride, u4_alpha_luma, + u4_beta_luma, pu4_pic_vert_bs[2], + gu1_ih264_clip_table[u4_idx_A_luma]); + + ps_codec->pf_deblk_chroma_vert_bslt4( + pu1_cur_pic_chroma + 8, i4_chroma_stride, u4_alpha_chroma, u4_beta_chroma, + u4_alpha_chroma, u4_beta_chroma, pu4_pic_vert_bs[2], + gu1_ih264_clip_table[u4_idx_A_chroma], gu1_ih264_clip_table[u4_idx_A_chroma]); + } + + /* vertical edge 3 */ + if(pu4_pic_vert_bs[3] == 0x04040404) + { + /* strong filter */ + ps_codec->pf_deblk_luma_vert_bs4(pu1_cur_pic_luma + 12, i4_luma_stride, u4_alpha_luma, + u4_beta_luma); + } + else + { + /* normal filter */ + ps_codec->pf_deblk_luma_vert_bslt4(pu1_cur_pic_luma + 12, i4_luma_stride, u4_alpha_luma, + u4_beta_luma, pu4_pic_vert_bs[3], + gu1_ih264_clip_table[u4_idx_A_luma]); + } + + /* Deblock Horizontal edges */ + /* Horizontal edge 0 */ + if(u1_mb_b) + { + u1_top_mb_qp = isvce_get_deblk_mb_qp(ps_proc, &s_top_mb_pos); + + isvce_filter_top_edge(ps_codec, u1_top_mb_qp, u1_cur_mb_qp, pu1_cur_pic_luma, + i4_luma_stride, pu1_cur_pic_chroma, i4_chroma_stride, + pu4_pic_horz_bs); + } + + /* horizontal edge 1 */ + if(pu4_pic_horz_bs[1] == 0x04040404) + { + /* strong filter */ + ps_codec->pf_deblk_luma_horz_bs4(pu1_cur_pic_luma + 4 * i4_luma_stride, i4_luma_stride, + u4_alpha_luma, u4_beta_luma); + } + else + { + /* normal filter */ + ps_codec->pf_deblk_luma_horz_bslt4(pu1_cur_pic_luma + 4 * i4_luma_stride, i4_luma_stride, + u4_alpha_luma, u4_beta_luma, pu4_pic_horz_bs[1], + gu1_ih264_clip_table[u4_idx_A_luma]); + } + + /* horizontal edge 2 */ + if(pu4_pic_horz_bs[2] == 0x04040404) + { + /* strong filter */ + ps_codec->pf_deblk_luma_horz_bs4(pu1_cur_pic_luma + 8 * i4_luma_stride, i4_luma_stride, + u4_alpha_luma, u4_beta_luma); + ps_codec->pf_deblk_chroma_horz_bs4(pu1_cur_pic_chroma + 4 * i4_chroma_stride, + i4_chroma_stride, u4_alpha_chroma, u4_beta_chroma, + u4_alpha_chroma, u4_beta_chroma); + } + else + { + /* normal filter */ + ps_codec->pf_deblk_luma_horz_bslt4(pu1_cur_pic_luma + 8 * i4_luma_stride, i4_luma_stride, + u4_alpha_luma, u4_beta_luma, pu4_pic_horz_bs[2], + gu1_ih264_clip_table[u4_idx_A_luma]); + + ps_codec->pf_deblk_chroma_horz_bslt4( + pu1_cur_pic_chroma + 4 * i4_chroma_stride, i4_chroma_stride, u4_alpha_chroma, + u4_beta_chroma, u4_alpha_chroma, u4_beta_chroma, pu4_pic_horz_bs[2], + gu1_ih264_clip_table[u4_idx_A_chroma], gu1_ih264_clip_table[u4_idx_A_chroma]); + } + + /* horizontal edge 3 */ + if(pu4_pic_horz_bs[3] == 0x04040404) + { + /* strong filter */ + ps_codec->pf_deblk_luma_horz_bs4(pu1_cur_pic_luma + 12 * i4_luma_stride, i4_luma_stride, + u4_alpha_luma, u4_beta_luma); + } + else + { + /* normal filter */ + ps_codec->pf_deblk_luma_horz_bslt4(pu1_cur_pic_luma + 12 * i4_luma_stride, i4_luma_stride, + u4_alpha_luma, u4_beta_luma, pu4_pic_horz_bs[3], + gu1_ih264_clip_table[u4_idx_A_luma]); + } +} diff --git a/encoder/svc/isvce_deblk.h b/encoder/svc/isvce_deblk.h new file mode 100644 index 0000000..4772b00 --- /dev/null +++ b/encoder/svc/isvce_deblk.h @@ -0,0 +1,53 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** +****************************************************************************** +* @file +* isvce_deblk.h +* +* @brief +* This file contains extern declarations of deblocking routines +* +* @author +* ittiam +* +* @remarks +* none +****************************************************************************** +*/ + +#ifndef _ISVCE_DEBLK_H_ +#define _ISVCE_DEBLK_H_ + +#include "ih264_typedefs.h" +#include "isvce_structs.h" + +#define CSBP_LEFT_BLOCK_MASK 0x1111 +#define CSBP_RIGHT_BLOCK_MASK 0x8888 + +#define NUM_EDGES_IN_MB 4 + +extern void isvce_compute_bs(isvce_process_ctxt_t *ps_proc, UWORD8 u1_inter_layer_deblk_flag); + +extern void isvce_deblock_mb(isvce_process_ctxt_t *ps_proc, isvce_deblk_ctxt_t *ps_deblk, + UWORD8 u1_inter_layer_deblk_flag); + +#endif diff --git a/encoder/svc/isvce_defs.h b/encoder/svc/isvce_defs.h new file mode 100644 index 0000000..c277abd --- /dev/null +++ b/encoder/svc/isvce_defs.h @@ -0,0 +1,345 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ +/** +******************************************************************************* +* @file +* isvce_defs.h +* +* @brief +* Definitions used in the encoder +* +* @author +* ittiam +* +* @remarks +* None +* +******************************************************************************* +*/ + +#ifndef _ISVCE_DEFS_H_ +#define _ISVCE_DEFS_H_ + +#include "ih264e_defs.h" + +#define SVC_MAX_NUM_BFRAMES 0 + +#define DEFAULT_INIT_QP 1 + +#define SVC_MAX_NUM_INP_FRAMES ((SVC_MAX_NUM_BFRAMES) + 2) + +#define LOG2_MAX_FRAME_NUM_MINUS4 12 + +#define ENC_MAX_PU_IN_MB ((MB_SIZE / ENC_MIN_PU_SIZE) * (MB_SIZE / ENC_MIN_PU_SIZE)) + +#define MAX_REF_FRAMES_PER_PRED_DIR 1 + +#define SVC_MAX_SLICE_HDR_CNT 1 + +#define MAX_LAYER_REFERENCE_PICS 1 + +#define ENABLE_RESIDUAL_PREDICTION 1 + +#define ENABLE_ILP_MV 1 + +#define USE_ILP_MV_IN_ME (1 && (ENABLE_ILP_MV)) + +#define USE_ILP_MV_AS_MVP (1 && (ENABLE_ILP_MV)) + +#define MAX_MVP_IDX (USE_ILP_MV_AS_MVP ? 1 : 0) + +#define ENABLE_IBL_MODE 1 + +#define ENABLE_INTRA_BASE_DEBLOCK (0 && (ENABLE_IBL_MODE)) + +#define ENABLE_MODE_STAT_VISUALISER 0 + +#define FORCE_FAST_INTRA4X4 0 + +#define FORCE_DISTORTION_BASED_INTRA_4X4_GATING 1 + +#define ENABLE_INTRA16X16_BASED_INTRA4X4_GATING 0 + +#define ENABLE_ILP_BASED_INTRA4X4_GATING 0 + +#define DISABLE_POST_ENC_SKIP 1 + +#define ENABLE_RE_ENC_AS_SKIP 1 + +#define MAX_ILP_MV_IN_NBR_RGN 4 + +/* L, T, TL, TR, Zero, Skip, 'Temporal Skip', ILP */ +#define MAX_FPEL_SEARCH_CANDIDATES (7 + MAX_PU_IN_MB + MAX_ILP_MV_IN_NBR_RGN) + +#define NUM_SVCE_RC_MEMTABS 45 + +#define SVCE_MAX_INP_DIM 1920 + +#define SVCE_MAX_INP_FRAME_SIZE (1920 * 1088) + +/** + *************************************************************************** + * Enum to hold various mem records being request + **************************************************************************** + */ +typedef enum ISVCE_MEMREC_TYPES_T +{ + /** + * Codec Object at API level + */ + ISVCE_MEM_REC_IV_OBJ, + + /** + * Codec context + */ + ISVCE_MEM_REC_CODEC, + + /** + * Cabac context + */ + ISVCE_MEM_REC_CABAC, + + /** + * Cabac context_mb_info + */ + ISVCE_MEM_REC_CABAC_MB_INFO, + + /** + * entropy context + */ + ISVCE_MEM_REC_ENTROPY, + + /** + * Buffer to hold coeff data + */ + ISVCE_MEM_REC_MB_COEFF_DATA, + + /** + * Buffer to hold coeff data + */ + ISVCE_MEM_REC_MB_HEADER_DATA, + + /** + * Motion vector bank + */ + ISVCE_MEM_REC_MVBANK, + + /** + * Motion vector bits + */ + ISVCE_MEM_REC_MVBITS, + + /** + * Holds mem records passed to the codec. + */ + ISVCE_MEM_REC_BACKUP, + + /** + * Holds SPS + */ + ISVCE_MEM_REC_SPS, + + /** + * Holds PPS + */ + ISVCE_MEM_REC_PPS, + + /** + * Holds SVC NALU Extension data + */ + ISVCE_MEM_REC_SVC_NALU_EXT, + + /** + * Holds subset SPS data + */ + ISVCE_MEM_REC_SUBSET_SPS, + + /** + * Holds Slice Headers + */ + ISVCE_MEM_REC_SLICE_HDR, + + /** + * Holds SVC Slice Headers + */ + ISVCE_MEM_REC_SVC_SLICE_HDR, + + /** + * Contains map indicating slice index per MB basis + */ + ISVCE_MEM_REC_SLICE_MAP, + + /** + * Holds thread handles + */ + ISVCE_MEM_REC_THREAD_HANDLE, + + /** + * Holds control call mutex + */ + ISVCE_MEM_REC_CTL_MUTEX, + + /** + * Holds entropy call mutex + */ + ISVCE_MEM_REC_ENTROPY_MUTEX, + + /** + * Holds memory for Process JOB Queue + */ + ISVCE_MEM_REC_PROC_JOBQ, + + /** + * Holds memory for Entropy JOB Queue + */ + ISVCE_MEM_REC_ENTROPY_JOBQ, + + /** + * Contains status map indicating processing status per MB basis + */ + ISVCE_MEM_REC_PROC_MAP, + + /** + * Contains status map indicating deblocking status per MB basis + */ + ISVCE_MEM_REC_DBLK_MAP, + + /* + * Contains AIR map and mask + */ + ISVCE_MEM_REC_AIR_MAP, + + /** + * Contains status map indicating ME status per MB basis + */ + ISVCE_MEM_REC_ME_MAP, + + /** + * Holds dpb manager context + */ + ISVCE_MEM_REC_DPB_MGR, + + /** + * Holds intermediate buffers needed during processing stage + * Memory for process contexts is allocated in this memtab + */ + ISVCE_MEM_REC_PROC_SCRATCH, + + /** + * Holds buffers for vert_bs, horz_bs and QP (all frame level) + */ + ISVCE_MEM_REC_QUANT_PARAM, + + /** + * Holds top row syntax information + */ + ISVCE_MEM_REC_TOP_ROW_SYN_INFO, + + /** + * Holds buffers for vert_bs, horz_bs and QP (all frame level) + */ + ISVCE_MEM_REC_BS_QP, + + /** + * Holds input buffer manager context + */ + ISVCE_MEM_REC_INP_PIC, + + /** + * Holds output buffer manager context + */ + ISVCE_MEM_REC_OUT, + + /** + * Holds picture buffer manager context and array of pic_buf_ts + * Also holds reference picture buffers in non-shared mode + */ + ISVCE_MEM_REC_REF_PIC, + + /* + * Mem record for color space conversion + */ + ISVCE_MEM_REC_CSC, + + /** + * NMB info struct + */ + ISVCE_MEM_REC_MB_INFO_NMB, + + /** + * SVC Spatial layer Inputs + */ + ISVCE_MEM_SVC_SPAT_INP, + + /** + * Downscaler memory records + */ + ISVCE_MEM_DOWN_SCALER, + + /** + * SVC ILP data + */ + ISVCE_MEM_SVC_ILP_DATA, + + /** + * SVC ILP MV Context + */ + ISVCE_MEM_SVC_ILP_MV_CTXT, + + /** + * SVC ResPred Context + */ + ISVCE_MEM_SVC_RES_PRED_CTXT, + + /** + * SVC inter-layer intra pred context + */ + ISVCE_MEM_SVC_INTRA_PRED_CTXT, + + /** + * RC Utils Context + */ + ISVCE_MEM_SVC_RC_UTILS_CTXT, + + /** + * SubPic RC Context + */ + ISVCE_MEM_SVC_SUB_PIC_RC_CTXT, + +#if ENABLE_MODE_STAT_VISUALISER + ISVCE_MEM_MODE_STAT_VISUALISER_BUF, +#endif + + /** + * Rate control of memory records. + */ + ISVCE_MEM_REC_RC, + + /** + * Place holder to compute number of memory records. + */ + ISVCE_MEM_REC_CNT = ISVCE_MEM_REC_RC + NUM_SVCE_RC_MEMTABS, + + /* + * Do not add anything below + */ +} ISVCE_MEMREC_TYPES_T; + +#endif diff --git a/encoder/svc/isvce_downscaler.c b/encoder/svc/isvce_downscaler.c new file mode 100644 index 0000000..8822312 --- /dev/null +++ b/encoder/svc/isvce_downscaler.c @@ -0,0 +1,537 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** +******************************************************************************* +* @file +* isvce_downscaler.c +* +* @brief +* Contains downscaler functions required by the SVC encoder +* +* @author +* ittiam +* +* @par List of Functions: +* - isvce_get_downscaler_data_size() +* - isvce_get_downscaler_padding_dims() +* - isvce_get_downscaler_normalized_filtered_pixel() +* - isvce_horizontal_downscale_and_transpose() +* - isvce_process_downscaler() +* - isvce_initialize_downscaler() +* +* @remarks +* None +* +******************************************************************************* +*/ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* system include files */ +#include +#include + +#include "ih264_typedefs.h" +#include "ih264_macros.h" +#include "isvc_macros.h" +#include "ih264_platform_macros.h" +#include "iv2.h" +#include "isvc_defs.h" +#include "isvce_defs.h" +#include "isvc_structs.h" +#include "isvc_structs.h" +#include "isvce_downscaler.h" +#include "isvce_downscaler_private_defs.h" + +/** +****************************************************************************** +* @brief lanczos filter coefficients for 2x downscaling +* @remarks Though the length of the filter is 8, the +* same coefficients +* are replicated so that 2 rows can be processed at one +* go in SIMD +****************************************************************************** +*/ +static WORD8 gai1_lanczos_coefficients_2x[NUM_SCALER_FILTER_PHASES][NUM_SCALER_FILTER_TAPS * 2] = { + {-7, 0, 39, 64, 39, 0, -7, 0, -7, 0, 39, 64, 39, 0, -7, 0}, + {-6, 0, 33, 62, 41, 4, -6, 0, -6, 0, 33, 62, 41, 4, -6, 0}, + {-5, -1, 29, 57, 45, 9, -5, -1, -5, -1, 29, 57, 45, 9, -5, -1}, + {-4, -2, 23, 55, 48, 14, -4, -2, -4, -2, 23, 55, 48, 14, -4, -2}, + {-3, -3, 18, 52, 52, 18, -3, -3, -3, -3, 18, 52, 52, 18, -3, -3}, + {-2, -4, 13, 49, 54, 24, -2, -4, -2, -4, 13, 49, 54, 24, -2, -4}, + {-1, -5, 9, 44, 58, 29, -1, -5, -1, -5, 9, 44, 58, 29, -1, -5}, + {0, -6, 3, 42, 61, 34, 0, -6, 0, -6, 3, 42, 61, 34, 0, -6}}; + +/** +****************************************************************************** +* @brief lanczos filter coefficients for 1.5x downscaling +* @remarks Though the length of the filter is 8, the same coefficients +* are replicated so that 2 rows can be processed at one go in SIMD. +****************************************************************************** +*/ +static WORD8 gai1_lanczos_coefficients_3by2x[NUM_SCALER_FILTER_PHASES][NUM_SCALER_FILTER_TAPS * 2] = + {{0, -11, 32, 86, 32, -11, 0, 0, 0, -11, 32, 86, 32, -11, 0, 0}, + {0, -10, 26, 79, 39, -5, 0, 0, 0, -10, 26, 79, 39, -5, 0, 0}, + {0, -8, 21, 72, 46, 0, -2, 0, 0, -8, 21, 72, 46, 0, -2, 0}, + {0, -6, 15, 66, 52, 3, -3, 0, 0, -6, 15, 66, 52, 3, -3, 0}, + {0, -6, 10, 60, 60, 10, -6, 0, 0, -6, 10, 60, 60, 10, -6, 0}, + {0, -3, 3, 52, 66, 15, -6, 0, 0, -3, 3, 52, 66, 15, -6, 0}, + {0, -2, 0, 46, 72, 21, -8, 0, 0, -2, 0, 46, 72, 21, -8, 0}, + {0, 0, -5, 39, 79, 26, -10, 0, 0, 0, -5, 39, 79, 26, -10, 0}}; + +/** +******************************************************************************* +* +* @brief +* gets the memory size required for downscaler +* +* @par Description: +* returns the memory required by the downscaler context and state structs +* for allocation. +* +* @returns +* +* @remarks +* +* +******************************************************************************* +*/ + +UWORD32 isvce_get_downscaler_data_size(UWORD8 u1_num_spatial_layers, DOUBLE d_scaling_factor, + UWORD32 u4_width, UWORD32 u4_height) +{ + UWORD32 u4_size = 0; + + if(u1_num_spatial_layers > 1) + { + u4_size += sizeof(downscaler_state_t); + + u4_size += + (u4_height + NUM_SCALER_FILTER_TAPS * 2) * ((UWORD32) (u4_width / d_scaling_factor)); + } + + return u4_size; +} + +/** +******************************************************************************* +* +* @brief +* gets the padding size required for filtering +* +* @par Description: +* gets the padding size required for filtering +* +* @returns +* +* @remarks +* +* +******************************************************************************* +*/ + +void isvce_get_downscaler_padding_dims(padding_dims_t *ps_pad_dims) +{ + ps_pad_dims->u1_left_pad_size = ALIGN8(NUM_SCALER_FILTER_TAPS / 2); + ps_pad_dims->u1_right_pad_size = ALIGN8(NUM_SCALER_FILTER_TAPS / 2); + ps_pad_dims->u1_top_pad_size = NUM_SCALER_FILTER_TAPS / 2; + ps_pad_dims->u1_bottom_pad_size = NUM_SCALER_FILTER_TAPS / 2; +} + +/** +******************************************************************************* +* +* @brief +* processes downscaler +* +* @par Description: +* calls the function for padding and scaling +* +* @param[in] ps_scaler +* pointer to downdownscaler context +* +* @param[in] ps_src_buf_props +* pointer to source buffer props struct +* +* @param[in] u4_blk_wd +* width of the block to be processed +* +* @param[in] u4_blk_ht +* height of the block to be processed +* +* @returns +* +* @remarks +* +* +******************************************************************************* +*/ + +void isvce_process_downscaler(downscaler_ctxt_t *ps_scaler, yuv_buf_props_t *ps_src_buf_props, + yuv_buf_props_t *ps_dst_buf_props, UWORD32 u4_blk_wd, + UWORD32 u4_blk_ht) +{ + buffer_container_t s_src_buf; + buffer_container_t s_dst_buf; + + UWORD32 u4_scaled_block_size_x, u4_scaled_block_size_y; + + downscaler_state_t *ps_scaler_state = (downscaler_state_t *) ps_scaler->pv_scaler_state; + + ASSERT(ps_src_buf_props->e_color_format == IV_YUV_420SP_UV); + + u4_scaled_block_size_x = (UWORD32) (u4_blk_wd / ps_scaler->d_scaling_factor); + u4_scaled_block_size_y = (UWORD32) (u4_blk_ht / ps_scaler->d_scaling_factor); + + /* luma */ + s_src_buf = ps_src_buf_props->as_component_bufs[Y]; + s_src_buf.pv_data = ((UWORD8 *) s_src_buf.pv_data) - (NUM_SCALER_FILTER_TAPS / 2) - + (NUM_SCALER_FILTER_TAPS / 2) * s_src_buf.i4_data_stride; + + s_dst_buf.pv_data = ps_scaler_state->pv_scratch_buf; + s_dst_buf.i4_data_stride = u4_blk_ht + NUM_SCALER_FILTER_TAPS; + + ps_scaler_state->pf_downscaler(ps_scaler, &s_src_buf, &s_dst_buf, ps_scaler_state->pai1_filters, + u4_scaled_block_size_x, u4_blk_ht + NUM_SCALER_FILTER_TAPS, 0); + + s_src_buf = s_dst_buf; + s_dst_buf = ps_dst_buf_props->as_component_bufs[Y]; + + ps_scaler_state->pf_downscaler(ps_scaler, &s_src_buf, &s_dst_buf, ps_scaler_state->pai1_filters, + u4_scaled_block_size_y, u4_scaled_block_size_x, 0); + + /* chroma */ + u4_blk_ht /= 2; + u4_scaled_block_size_y /= 2; + + s_src_buf = ps_src_buf_props->as_component_bufs[U]; + s_src_buf.pv_data = ((UWORD8 *) s_src_buf.pv_data) - NUM_SCALER_FILTER_TAPS - + (NUM_SCALER_FILTER_TAPS / 2) * s_src_buf.i4_data_stride; + + s_dst_buf.pv_data = ps_scaler_state->pv_scratch_buf; + s_dst_buf.i4_data_stride = u4_blk_ht + NUM_SCALER_FILTER_TAPS; + + ps_scaler_state->pf_downscaler(ps_scaler, &s_src_buf, &s_dst_buf, ps_scaler_state->pai1_filters, + u4_scaled_block_size_x, u4_blk_ht + NUM_SCALER_FILTER_TAPS, 1); + + s_src_buf = s_dst_buf; + s_dst_buf = ps_dst_buf_props->as_component_bufs[U]; + + ps_scaler_state->pf_downscaler(ps_scaler, &s_src_buf, &s_dst_buf, ps_scaler_state->pai1_filters, + u4_scaled_block_size_y, u4_scaled_block_size_x, 0); +} + +/** +******************************************************************************* +* +* @brief +* normalized dot product computer for downscaler +* +* @par Description: +* Given the downscaler filter coefficients, source buffer, the function +* calculates the dot product between them, adds an offset and normalizes it +* +* @param[in] ps_scaler +* pointer to src buf +* +* @param[in] pi1_filter +* pointer to filter coefficients +* +* @returns +* +* @remarks +* +******************************************************************************* +*/ + +static UWORD8 isvce_get_downscaler_normalized_filtered_pixel(UWORD8 *pu1_src, WORD8 *pi1_filter) +{ + WORD32 i; + WORD32 i4_norm_dot_product; + UWORD8 u1_out_pixel; + WORD32 i4_dot_product_sum = 0; + WORD32 i4_rounding_offset = 1 << (FILTER_COEFF_Q - 1); + WORD32 i4_normalizing_factor = 1 << FILTER_COEFF_Q; + + for(i = 0; i < NUM_SCALER_FILTER_TAPS; i++) + { + i4_dot_product_sum += (pu1_src[i] * pi1_filter[i]); + } + + i4_norm_dot_product = ((i4_dot_product_sum + i4_rounding_offset) / i4_normalizing_factor); + u1_out_pixel = (UWORD8) CLIP_U8(i4_norm_dot_product); + + return u1_out_pixel; +} + +/** +******************************************************************************* +* +* @brief +* horizontal scaler function +* +* @par Description: +* Does horizontal scaling for the given block +* +* @param[in] ps_scaler +* pointer to downscaler context +* +* @param[in] ps_src +* pointer to source buffer container +* +* @param[in] ps_dst +* pointer to destination buffer container +* +* @param[in] pai1_filters +* pointer to array of downscaler filters +* +* @param[in] u4_blk_wd +* width of the block after horizontal scaling (output block width) +* +* @param[in] u4_blk_ht +* height of the current block (input block height) +* +* @param[in] u1_is_chroma +* flag suggesting whether the buffer is luma or chroma +* +* +* @returns +* +* @remarks +* The same function is used for vertical scaling too as +* the horizontally scaled input in stored in transpose fashion. +* +******************************************************************************* +*/ + +static void isvce_horizontal_downscale_and_transpose( + downscaler_ctxt_t *ps_scaler, buffer_container_t *ps_src, buffer_container_t *ps_dst, + FILTER_COEFF_ARRAY pai1_filters, UWORD32 u4_blk_wd, UWORD32 u4_blk_ht, UWORD8 u1_is_chroma) +{ + WORD32 i, j, k; + UWORD8 u1_phase; + UWORD8 u1_filtered_out_pixel; + UWORD8 *pu1_src_j, *pu1_dst_j; + UWORD8 u1_filtered_out_u_pixel, u1_filtered_out_v_pixel; + UWORD8 *pu1_in_pixel; + UWORD8 *pu1_out_pixel; + WORD8 *pi1_filter_grid; + UWORD16 u2_full_pixel_inc; + UWORD8 au1_temp_u_buff[NUM_SCALER_FILTER_TAPS]; + UWORD8 au1_temp_v_buff[NUM_SCALER_FILTER_TAPS]; + + downscaler_state_t *ps_scaler_state = (downscaler_state_t *) ps_scaler->pv_scaler_state; + + UWORD32 u4_center_pixel_pos = ps_scaler_state->i4_init_offset; + UWORD32 u4_src_horz_increments = ps_scaler_state->u4_horz_increment; + UWORD8 *pu1_src = ps_src->pv_data; + UWORD32 u4_in_stride = ps_src->i4_data_stride; + UWORD8 *pu1_dst = ps_dst->pv_data; + UWORD32 u4_out_stride = ps_dst->i4_data_stride; + UWORD32 u4_center_pixel_pos_src = u4_center_pixel_pos; + + /* Offset the input so that the input pixel to be processed + co-incides with the centre of filter (4th coefficient)*/ + pu1_src += (1 + u1_is_chroma); + + ASSERT((1 << DOWNSCALER_Q) == ps_scaler_state->u4_vert_increment); + + if(!u1_is_chroma) + { + for(j = 0; j < (WORD32) u4_blk_ht; j++) + { + pu1_src_j = pu1_src + (j * u4_in_stride); + pu1_dst_j = pu1_dst + j; + + u4_center_pixel_pos = u4_center_pixel_pos_src; + + for(i = 0; i < (WORD32) u4_blk_wd; i++) + { + u1_phase = get_filter_phase(u4_center_pixel_pos); + pi1_filter_grid = pai1_filters[u1_phase]; + + /* Doing the Calculation for current Loop Count */ + u2_full_pixel_inc = u4_center_pixel_pos >> DOWNSCALER_Q; + pu1_in_pixel = pu1_src_j + (u2_full_pixel_inc << u1_is_chroma); + pu1_out_pixel = pu1_dst_j + ((i << u1_is_chroma) * u4_out_stride); + + u1_filtered_out_pixel = + isvce_get_downscaler_normalized_filtered_pixel(pu1_in_pixel, pi1_filter_grid); + *pu1_out_pixel = u1_filtered_out_pixel; + + /* Update the context for next Loop Count */ + u4_center_pixel_pos += u4_src_horz_increments; + } + } + } + else + { + for(j = 0; j < (WORD32) u4_blk_ht; j++) + { + pu1_src_j = pu1_src + (j * u4_in_stride); + pu1_dst_j = pu1_dst + j; + + u4_center_pixel_pos = u4_center_pixel_pos_src; + + for(i = 0; i < (WORD32) u4_blk_wd; i++) + { + u1_phase = get_filter_phase(u4_center_pixel_pos); + pi1_filter_grid = pai1_filters[u1_phase]; + + /*Doing the Calculation for current Loop Count */ + u2_full_pixel_inc = u4_center_pixel_pos >> DOWNSCALER_Q; + pu1_in_pixel = pu1_src_j + (u2_full_pixel_inc << u1_is_chroma); + pu1_out_pixel = pu1_dst_j + ((i << u1_is_chroma) * u4_out_stride); + + for(k = 0; k < NUM_SCALER_FILTER_TAPS; k++) + { + au1_temp_u_buff[k] = *(pu1_in_pixel + (2 * k)); + au1_temp_v_buff[k] = *(pu1_in_pixel + ((2 * k) + 1)); + } + + u1_filtered_out_u_pixel = isvce_get_downscaler_normalized_filtered_pixel( + au1_temp_u_buff, pi1_filter_grid); + u1_filtered_out_v_pixel = isvce_get_downscaler_normalized_filtered_pixel( + au1_temp_v_buff, pi1_filter_grid); + *pu1_out_pixel = u1_filtered_out_u_pixel; + *(pu1_out_pixel + u4_out_stride) = u1_filtered_out_v_pixel; + + /* Update the context for next Loop Count */ + u4_center_pixel_pos += u4_src_horz_increments; + } + } + } +} + +void isvce_downscaler_function_selector(downscaler_state_t *ps_scaler_state, IV_ARCH_T e_arch) +{ + switch(e_arch) + { +#if defined(X86) + case ARCH_X86_SSE42: + { + ps_scaler_state->pf_downscaler = isvce_horizontal_downscale_and_transpose_sse42; + + break; + } +#elif defined(ARMV8) + case ARCH_ARM_A53: + case ARCH_ARM_A57: + case ARCH_ARM_V8_NEON: + { + ps_scaler_state->pf_downscaler = isvce_horizontal_downscale_and_transpose_neon; + + break; + } +#elif !defined(DISABLE_NEON) + case ARCH_ARM_A9Q: + case ARCH_ARM_A9A: + case ARCH_ARM_A9: + case ARCH_ARM_A7: + case ARCH_ARM_A5: + case ARCH_ARM_A15: + { + ps_scaler_state->pf_downscaler = isvce_horizontal_downscale_and_transpose_neon; + + break; + } +#endif + default: + { + ps_scaler_state->pf_downscaler = isvce_horizontal_downscale_and_transpose; + + break; + } + } +} + +/** +******************************************************************************* +* +* @brief +* initializes the downscaler context +* +* @par Description: +* initializes the downscaler context for the given scaling factor +* with padding size, filter size, etc. +* +* @param[in] ps_scaler +* pointer downscaler context +* +* @param[in] ps_mem_rec +* pointer to memory allocated to downscaler process +* +* @param[in] d_scaling_factor +* scaling reatio of width/ height between two consecutive SVC layers +* +* @param[in] u1_num_spatial_layers +* scaling reatio of width/ height between two consecutive SVC layers +* +* @param[in] u4_wd +* width of the input +* +* @param[in] u4_ht +* height of the input +* +* @param[in] e_arch +* architecure type +* +* @returns +* +* @remarks +* when ARM intrinsics are added, update should be done here +* +******************************************************************************* +*/ + +void isvce_initialize_downscaler(downscaler_ctxt_t *ps_scaler, iv_mem_rec_t *ps_mem_rec, + DOUBLE d_scaling_factor, UWORD8 u1_num_spatial_layers, + UWORD32 u4_in_width, UWORD32 u4_in_height, IV_ARCH_T e_arch) +{ + if(u1_num_spatial_layers > 1) + { + downscaler_state_t *ps_scaler_state; + + UWORD8 *pu1_buf = (UWORD8 *) ps_mem_rec->pv_base; + + ps_scaler_state = (downscaler_state_t *) pu1_buf; + pu1_buf += sizeof(ps_scaler_state[0]); + + ps_scaler_state->pv_scratch_buf = pu1_buf; + ps_scaler_state->u4_in_wd = u4_in_width; + ps_scaler_state->u4_in_ht = u4_in_height; + + ps_scaler->pv_scaler_state = ps_scaler_state; + ps_scaler->d_scaling_factor = d_scaling_factor; + ps_scaler->u1_num_spatial_layers = u1_num_spatial_layers; + + isvce_downscaler_function_selector(ps_scaler_state, e_arch); + + ps_scaler_state->u4_horz_increment = (UWORD32) (d_scaling_factor * (1 << DOWNSCALER_Q)); + + ps_scaler_state->u4_vert_increment = (1 << DOWNSCALER_Q); + ps_scaler_state->i4_init_offset = 0; + ps_scaler_state->pai1_filters = (d_scaling_factor == 2.0) ? gai1_lanczos_coefficients_2x + : gai1_lanczos_coefficients_3by2x; + } +} diff --git a/encoder/svc/isvce_downscaler.h b/encoder/svc/isvce_downscaler.h new file mode 100644 index 0000000..bd8e4f3 --- /dev/null +++ b/encoder/svc/isvce_downscaler.h @@ -0,0 +1,205 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** +******************************************************************************* +* @file +* isvce_downscaler.h +* +* @brief +* Contains downscaler functions required by the SVC encoder +* +* @author +* ittiam +* +* @par List of Functions: +* - isvce_get_downscaler_data_size() +* - isvce_get_downscaler_padding_dims() +* - isvce_isvce_process_ctxt_t_downscaler() +* - isvce_get_downscaler_normalized_filtered_pixel() +* - isvce_horizontal_downscale_and_transpose() +* - isvce_process_downscaler() +* - isvce_initialize_downscaler() +* +* @remarks +* None +* +******************************************************************************* +*/ + +#ifndef _ISVCE_DOWNSCALER_H_ +#define _ISVCE_DOWNSCALER_H_ + +#include "ih264_typedefs.h" +#include "iv2.h" +#include "isvc_defs.h" +#include "isvc_structs.h" +#include "isvce_defs.h" + +typedef struct +{ + /** + * pointer to the state of downscaler + */ + void *pv_scaler_state; + + /** + * scaling factor between the dimensions of two consecutive SVC layers + */ + DOUBLE d_scaling_factor; + + /** + * Num spatial layers + */ + UWORD8 u1_num_spatial_layers; + +} downscaler_ctxt_t; + +typedef struct +{ + UWORD8 u1_left_pad_size; + + UWORD8 u1_right_pad_size; + + UWORD8 u1_top_pad_size; + + UWORD8 u1_bottom_pad_size; + +} padding_dims_t; + +/** +******************************************************************************* +* +* @brief +* initializes the downscaler context +* +* @par Description: +* initializes the downscaler context for the given scaling factor +* with padding size, filter size, etc. +* +* @param[in] ps_scaler +* pointer downscaler context +* +* @param[in] ps_mem_rec +* pointer to memory allocated to downscaler process +* +* @param[in] d_scaling_factor +* scaling reatio of width/ height between two consecutive SVC layers +* +* @param[in] u1_num_spatial_layers +* scaling reatio of width/ height between two consecutive SVC layers +* +* @param[in] u4_wd +* width of the input +* +* @param[in] u4_ht +* height of the input +* +* @param[in] e_arch +* architecure type +* +* @returns +* +* @remarks +* when ARM intrinsics are added, update should be done here +* +******************************************************************************* +*/ + +extern void isvce_initialize_downscaler(downscaler_ctxt_t *ps_scaler, iv_mem_rec_t *ps_mem_rec, + DOUBLE d_scaling_factor, UWORD8 u1_num_spatial_layers, + UWORD32 u4_in_width, UWORD32 u4_in_height, + IV_ARCH_T e_arch); + +/** +******************************************************************************* +* +* @brief +* gets the memory size required for downscaler +* +* @par Description: +* returns the memory required by the downscaler context and state structs +* for allocation. +* +* @returns +* +* @remarks +* +* +******************************************************************************* +*/ + +extern UWORD32 isvce_get_downscaler_data_size(UWORD8 u1_num_spatial_layers, DOUBLE d_scaling_factor, + UWORD32 u4_width, UWORD32 u4_height); + +/** +******************************************************************************* +* +* @brief +* processes downscaler +* +* @par Description: +* calls the function for padding and scaling +* +* @param[in] ps_scaler +* pointer to downdownscaler context +* +* @param[in] ps_src_buf_props +* pointer to source buffer props struct +* +* @param[in] u4_blk_wd +* width of the block to be processed +* +* @param[in] u4_blk_ht +* height of the block to be processed +* +* @returns +* +* @remarks +* +* +******************************************************************************* +*/ + +extern void isvce_process_downscaler(downscaler_ctxt_t *ps_scaler, + yuv_buf_props_t *ps_src_buf_props, + yuv_buf_props_t *ps_dst_buf_props, UWORD32 u4_blk_wd, + UWORD32 u4_blk_ht); + +/** +******************************************************************************* +* +* @brief +* gets the padding size required for filtering +* +* @par Description: +* gets the padding size required for filtering +* +* @returns +* +* @remarks +* +* +******************************************************************************* +*/ + +extern void isvce_get_downscaler_padding_dims(padding_dims_t *ps_pad_dims); + +#endif diff --git a/encoder/svc/isvce_downscaler_private_defs.h b/encoder/svc/isvce_downscaler_private_defs.h new file mode 100644 index 0000000..87ad374 --- /dev/null +++ b/encoder/svc/isvce_downscaler_private_defs.h @@ -0,0 +1,124 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +#ifndef _ISVCE_DOWNSCALER_PRIVATE_DEFS_H_ +#define _ISVCE_DOWNSCALER_PRIVATE_DEFS_H_ +#include "ih264_typedefs.h" +#include "isvc_macros.h" +#include "ih264_debug.h" +#include "isvc_structs.h" +#include "isvce_downscaler.h" + +/* Macros */ +#define DOWNSCALER_Q 16 + +#define FILTER_COEFF_Q 7 + +#define NUM_SCALER_FILTER_TAPS 8 + +#define NUM_SCALER_FILTER_PHASES 8 + +/* Typedefs */ +typedef WORD8 (*FILTER_COEFF_ARRAY)[NUM_SCALER_FILTER_TAPS * 2]; + +typedef void FT_DOWNSCALER(downscaler_ctxt_t *ps_scaler_state, buffer_container_t *ps_src, + buffer_container_t *ps_dst, FILTER_COEFF_ARRAY pai1_filters, + UWORD32 u4_blk_wd, UWORD32 u4_blk_ht, UWORD8 u1_is_chroma); + +/* Structs */ +typedef struct +{ + /** + * pointer to scratch buf + */ + void *pv_scratch_buf; + + /** + * initial offset while calculating input pixel location + */ + WORD32 i4_init_offset; + + /** + * increment to the centre pixel in horizontal direction + */ + UWORD32 u4_horz_increment; + + /** + * increment to the centre pixel in vertical direction + */ + UWORD32 u4_vert_increment; + + /** + * pointer to the filter coefficients + */ + FILTER_COEFF_ARRAY pai1_filters; + + /** + * function pointer to the leaf level function for horizontal scaling + */ + FT_DOWNSCALER *pf_downscaler; + + /** + * width of the input (highest SVC layer) + */ + UWORD32 u4_in_wd; + + /** + * height of the input (highest SVC layer) + */ + UWORD32 u4_in_ht; + +} downscaler_state_t; + +static FORCEINLINE UWORD32 get_filter_phase(UWORD32 u4_center_pixel_pos) +{ + UWORD32 au4_phase_binning_pos[NUM_SCALER_FILTER_PHASES + 1]; + UWORD32 i; + + ASSERT(NUM_SCALER_FILTER_PHASES == 8); + + for(i = 0; i < NUM_SCALER_FILTER_PHASES + 1; i++) + { + au4_phase_binning_pos[i] = (i << DOWNSCALER_Q) / NUM_SCALER_FILTER_PHASES; + } + + u4_center_pixel_pos = u4_center_pixel_pos % (1 << DOWNSCALER_Q); + + for(i = 0; i < NUM_SCALER_FILTER_PHASES; i++) + { + if((u4_center_pixel_pos < au4_phase_binning_pos[i + 1]) && + (u4_center_pixel_pos >= au4_phase_binning_pos[i])) + { + return i; + } + } + + ASSERT(0); + + return 0; +} + +/* SSE42 Declarations */ +extern FT_DOWNSCALER isvce_horizontal_downscale_and_transpose_sse42; + +/* NEON Declarations */ +extern FT_DOWNSCALER isvce_horizontal_downscale_and_transpose_neon; + +#endif diff --git a/encoder/svc/isvce_encode.c b/encoder/svc/isvce_encode.c new file mode 100644 index 0000000..1dab028 --- /dev/null +++ b/encoder/svc/isvce_encode.c @@ -0,0 +1,790 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** +****************************************************************************** +* @file +* isvce_encode.c +* +* @brief +* This file contains functions for encoding the input yuv frame in synchronous +* api mode +* +* @author +* ittiam +* +* List of Functions +* - isvce_join_threads() +* - isvce_wait_for_thread() +* - isvce_encode() +* +****************************************************************************** +*/ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ih264_typedefs.h" +/* Dependencies of ih264_buf_mgr.h */ +/* Dependencies of ih264_list.h */ +#include "ih264_error.h" +/* Dependencies of ih264_common_tables.h */ +#include "ih264_defs.h" +#include "ih264_structs.h" +#include "ih264_buf_mgr.h" +#include "ih264_common_tables.h" +#include "ih264_list.h" +#include "ih264_platform_macros.h" +#include "ih264_trans_data.h" +#include "ih264_size_defs.h" +/* Dependencies of ih264e_cabac_structs.h */ +#include "ih264_cabac_tables.h" +/* Dependencies of ime_structs.h */ +#include "ime_defs.h" +#include "ime_distortion_metrics.h" +/* Dependencies of ih264e_structs.h */ +#include "iv2.h" +#include "ive2.h" +#include "ih264_defs.h" +#include "ih264_deblk_edge_filters.h" +#include "ih264_inter_pred_filters.h" +#include "ih264_structs.h" +#include "ih264_trans_quant_itrans_iquant.h" +/* Dependencies of ih264e_bitstream.h */ +#include "ih264e_error.h" +#include "ih264e_bitstream.h" +#include "ih264e_cabac_structs.h" +#include "irc_cntrl_param.h" +#include "irc_frame_info_collector.h" +#include "ime_statistics.h" +#include "ime_structs.h" +/* Dependencies of 'ih264e_utils.h' */ +#include "ih264e_defs.h" +#include "ih264e_structs.h" +#include "ih264e_utils.h" +#include "ime.h" +#include "isvce.h" +#include "isvce_cabac.h" +#include "isvce_deblk.h" +#include "isvce_defs.h" +#include "isvce_downscaler.h" +#include "isvce_encode_header.h" +#include "isvce_fmt_conv.h" +#include "isvce_ibl_eval.h" +#include "isvce_ilp_mv.h" +#include "isvce_intra_modes_eval.h" +#include "isvce_me.h" +#include "isvce_process.h" +#include "isvce_rate_control.h" +#include "isvce_residual_pred.h" +#include "isvce_sub_pic_rc.h" +#include "isvce_utils.h" + +#define SEI_BASED_FORCE_IDR 1 + +/*****************************************************************************/ +/* Function Definitions */ +/*****************************************************************************/ + +/** +****************************************************************************** +* +* @brief This function puts the current thread to sleep for a duration +* of sleep_us +* +* @par Description +* ithread_yield() method causes the calling thread to yield execution to +*another thread that is ready to run on the current processor. The operating +*system selects the thread to yield to. ithread_usleep blocks the current thread +*for the specified number of milliseconds. In other words, yield just says, end +*my timeslice prematurely, look around for other threads to run. If there is +*nothing better than me, continue. Sleep says I don't want to run for x +* milliseconds. Even if no other thread wants to run, don't make me run. +* +* @param[in] sleep_us +* thread sleep duration +* +* @returns error_status +* +****************************************************************************** +*/ +IH264E_ERROR_T isvce_wait_for_thread(UWORD32 sleep_us) +{ + /* yield thread */ + ithread_yield(); + + /* put thread to sleep */ + ithread_sleep(sleep_us); + + return IH264E_SUCCESS; +} + +/** +****************************************************************************** +* +* @brief +* Encodes in synchronous api mode +* +* @par Description +* This routine processes input yuv, encodes it and outputs bitstream and recon +* +* @param[in] ps_codec_obj +* Pointer to codec object at API level +* +* @param[in] pv_api_ip +* Pointer to input argument structure +* +* @param[out] pv_api_op +* Pointer to output argument structure +* +* @returns Status +* +****************************************************************************** +*/ +WORD32 isvce_encode(iv_obj_t *ps_codec_obj, void *pv_api_ip, void *pv_api_op) +{ + /* error status */ + IH264E_ERROR_T error_status = IH264E_SUCCESS; + + /* codec ctxt */ + isvce_codec_t *ps_codec = (isvce_codec_t *) ps_codec_obj->pv_codec_handle; + + /* input frame to encode */ + isvce_video_encode_ip_t *ps_video_encode_ip = pv_api_ip; + + /* output buffer to write stream */ + isvce_video_encode_op_t *ps_video_encode_op = pv_api_op; + + /* i/o structures */ + isvce_inp_buf_t s_inp_buf; + isvce_out_buf_t s_out_buf; + + WORD32 ctxt_sel = 0, i4_rc_pre_enc_skip; + WORD32 i, j; + + ASSERT(MAX_CTXT_SETS == 1); + + /********************************************************************/ + /* BEGIN INIT */ + /********************************************************************/ + /* reset output structure */ + ps_video_encode_op->s_ive_op.u4_error_code = IV_SUCCESS; + ps_video_encode_op->s_ive_op.output_present = 0; + ps_video_encode_op->s_ive_op.dump_recon = 0; + ps_video_encode_op->s_ive_op.u4_encoded_frame_type = IV_NA_FRAME; + + /* Check for output memory allocation size */ + { + UWORD32 u4_min_bufsize = + MIN_STREAM_SIZE * ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers; + UWORD32 u4_bufsize_per_layer = ps_video_encode_ip->s_ive_ip.s_out_buf.u4_bufsize / + ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers; + + if(ps_video_encode_ip->s_ive_ip.s_out_buf.u4_bufsize < u4_min_bufsize) + { + error_status = IH264E_INSUFFICIENT_OUTPUT_BUFFER; + + SET_ERROR_ON_RETURN(error_status, IVE_UNSUPPORTEDPARAM, + ps_video_encode_op->s_ive_op.u4_error_code, IV_FAIL); + } + + for(i = 0; i < ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers; i++) + { + s_out_buf.as_bits_buf[i] = ps_video_encode_ip->s_ive_ip.s_out_buf; + + s_out_buf.as_bits_buf[i].u4_bufsize = u4_bufsize_per_layer; + s_out_buf.as_bits_buf[i].pv_buf = + ((UWORD8 *) ps_video_encode_ip->s_ive_ip.s_out_buf.pv_buf) + + u4_bufsize_per_layer * i; + } + } + + s_out_buf.u4_is_last = 0; + s_out_buf.u4_timestamp_low = ps_video_encode_ip->s_ive_ip.u4_timestamp_low; + s_out_buf.u4_timestamp_high = ps_video_encode_ip->s_ive_ip.u4_timestamp_high; + + /* api call cnt */ + ps_codec->i4_encode_api_call_cnt += 1; + + /* codec context selector */ + ctxt_sel = ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS; + + /* reset status flags */ + ps_codec->ai4_pic_cnt[ctxt_sel] = -1; + ps_codec->s_rate_control.post_encode_skip[ctxt_sel] = 0; + ps_codec->s_rate_control.pre_encode_skip[ctxt_sel] = 0; + + /* pass output buffer to codec */ + ps_codec->as_out_buf[ctxt_sel] = s_out_buf; + + /* initialize codec ctxt with default params for the first encode api call */ + if(ps_codec->i4_encode_api_call_cnt == 0) + { + isvce_codec_init(ps_codec); + } + + /* parse configuration params */ + for(i = 0; i < MAX_ACTIVE_CONFIG_PARAMS; i++) + { + isvce_cfg_params_t *ps_cfg = &ps_codec->as_cfg[i]; + + if(1 == ps_cfg->u4_is_valid) + { + if(((ps_cfg->u4_timestamp_high == ps_video_encode_ip->s_ive_ip.u4_timestamp_high) && + (ps_cfg->u4_timestamp_low == ps_video_encode_ip->s_ive_ip.u4_timestamp_low)) || + ((WORD32) ps_cfg->u4_timestamp_high == -1) || + ((WORD32) ps_cfg->u4_timestamp_low == -1)) + { + error_status = isvce_codec_update_config(ps_codec, ps_cfg); + SET_ERROR_ON_RETURN(error_status, IVE_UNSUPPORTEDPARAM, + ps_video_encode_op->s_ive_op.u4_error_code, IV_FAIL); + + ps_cfg->u4_is_valid = 0; + } + } + } + /* Force IDR based on SEI params */ +#if SEI_BASED_FORCE_IDR + { + sei_mdcv_params_t *ps_sei_mdcv_params = &ps_codec->s_sei.s_sei_mdcv_params; + sei_mdcv_params_t *ps_cfg_sei_mdcv_params = &ps_codec->s_cfg.s_sei.s_sei_mdcv_params; + sei_cll_params_t *ps_sei_cll_params = &ps_codec->s_sei.s_sei_cll_params; + sei_cll_params_t *ps_cfg_sei_cll_params = &ps_codec->s_cfg.s_sei.s_sei_cll_params; + sei_ave_params_t *ps_sei_ave_params = &ps_codec->s_sei.s_sei_ave_params; + sei_ave_params_t *ps_cfg_sei_ave_params = &ps_codec->s_cfg.s_sei.s_sei_ave_params; + + if((ps_sei_mdcv_params->au2_display_primaries_x[0] != + ps_cfg_sei_mdcv_params->au2_display_primaries_x[0]) || + (ps_sei_mdcv_params->au2_display_primaries_x[1] != + ps_cfg_sei_mdcv_params->au2_display_primaries_x[1]) || + (ps_sei_mdcv_params->au2_display_primaries_x[2] != + ps_cfg_sei_mdcv_params->au2_display_primaries_x[2]) || + (ps_sei_mdcv_params->au2_display_primaries_y[0] != + ps_cfg_sei_mdcv_params->au2_display_primaries_y[0]) || + (ps_sei_mdcv_params->au2_display_primaries_y[1] != + ps_cfg_sei_mdcv_params->au2_display_primaries_y[1]) || + (ps_sei_mdcv_params->au2_display_primaries_y[2] != + ps_cfg_sei_mdcv_params->au2_display_primaries_y[2]) || + (ps_sei_mdcv_params->u2_white_point_x != ps_cfg_sei_mdcv_params->u2_white_point_x) || + (ps_sei_mdcv_params->u2_white_point_y != ps_cfg_sei_mdcv_params->u2_white_point_y) || + (ps_sei_mdcv_params->u4_max_display_mastering_luminance != + ps_cfg_sei_mdcv_params->u4_max_display_mastering_luminance) || + (ps_sei_mdcv_params->u4_min_display_mastering_luminance != + ps_cfg_sei_mdcv_params->u4_min_display_mastering_luminance)) + { + ps_codec->s_sei.s_sei_mdcv_params = ps_codec->s_cfg.s_sei.s_sei_mdcv_params; + ps_codec->s_sei.u1_sei_mdcv_params_present_flag = 1; + } + else + { + ps_codec->s_sei.u1_sei_mdcv_params_present_flag = 0; + } + + if((ps_sei_cll_params->u2_max_content_light_level != + ps_cfg_sei_cll_params->u2_max_content_light_level) || + (ps_sei_cll_params->u2_max_pic_average_light_level != + ps_cfg_sei_cll_params->u2_max_pic_average_light_level)) + { + ps_codec->s_sei.s_sei_cll_params = ps_codec->s_cfg.s_sei.s_sei_cll_params; + ps_codec->s_sei.u1_sei_cll_params_present_flag = 1; + } + else + { + ps_codec->s_sei.u1_sei_cll_params_present_flag = 0; + } + + if((ps_sei_ave_params->u4_ambient_illuminance != + ps_cfg_sei_ave_params->u4_ambient_illuminance) || + (ps_sei_ave_params->u2_ambient_light_x != ps_cfg_sei_ave_params->u2_ambient_light_x) || + (ps_sei_ave_params->u2_ambient_light_y != ps_cfg_sei_ave_params->u2_ambient_light_y)) + { + ps_codec->s_sei.s_sei_ave_params = ps_codec->s_cfg.s_sei.s_sei_ave_params; + ps_codec->s_sei.u1_sei_ave_params_present_flag = 1; + } + else + { + ps_codec->s_sei.u1_sei_ave_params_present_flag = 0; + } + + if((1 == ps_codec->s_sei.u1_sei_mdcv_params_present_flag) || + (1 == ps_codec->s_sei.u1_sei_cll_params_present_flag) || + (1 == ps_codec->s_sei.u1_sei_ave_params_present_flag)) + { + ps_codec->force_curr_frame_type = IV_IDR_FRAME; + } + } +#endif + + /* In case of alt ref and B pics we will have non reference frame in stream */ + if(ps_codec->s_cfg.u4_enable_alt_ref || ps_codec->s_cfg.u4_num_bframes) + { + ps_codec->i4_non_ref_frames_in_stream = 1; + } + + if(ps_codec->i4_encode_api_call_cnt == 0) + { + /********************************************************************/ + /* number of mv/ref bank buffers used by the codec, */ + /* 1 to handle curr frame */ + /* 1 to store information of ref frame */ + /* 1 more additional because of the codec employs 2 ctxt sets */ + /* to assist asynchronous API */ + /********************************************************************/ + + /* initialize mv bank buffer manager */ + error_status = isvce_svc_au_data_mgr_add_bufs(ps_codec); + + SET_ERROR_ON_RETURN(error_status, IVE_FATALERROR, + ps_video_encode_op->s_ive_op.u4_error_code, IV_FAIL); + + /* initialize ref bank buffer manager */ + error_status = isvce_svc_au_buf_mgr_add_bufs(ps_codec); + + SET_ERROR_ON_RETURN(error_status, IVE_FATALERROR, + ps_video_encode_op->s_ive_op.u4_error_code, IV_FAIL); + + /* for the first frame, generate header when not requested explicitly */ + if(ps_codec->i4_header_mode == 0 && ps_codec->u4_header_generated == 0) + { + ps_codec->i4_gen_header = 1; + } + } + + /* generate header and return when encoder is operated in header mode */ + if(ps_codec->i4_header_mode == 1) + { + /* whenever the header is generated, this implies a start of sequence + * and a sequence needs to be started with IDR + */ + ps_codec->force_curr_frame_type = IV_IDR_FRAME; + + s_inp_buf.s_svc_params = ps_codec->s_cfg.s_svc_params; + s_inp_buf.s_inp_props.s_raw_buf = ps_video_encode_ip->s_ive_ip.s_inp_buf; + s_inp_buf.s_inp_props.s_raw_buf.au4_wd[Y] = ps_codec->s_cfg.u4_wd; + s_inp_buf.s_inp_props.s_raw_buf.au4_ht[Y] = ps_codec->s_cfg.u4_ht; + + isvce_init_svc_dimension(&s_inp_buf); + + /* generate header */ + error_status = isvce_generate_sps_pps(ps_codec, &s_inp_buf); + + /* send the input to app */ + ps_video_encode_op->s_ive_op.s_inp_buf = ps_video_encode_ip->s_ive_ip.s_inp_buf; + ps_video_encode_op->s_ive_op.u4_timestamp_low = + ps_video_encode_ip->s_ive_ip.u4_timestamp_low; + ps_video_encode_op->s_ive_op.u4_timestamp_high = + ps_video_encode_ip->s_ive_ip.u4_timestamp_high; + + ps_video_encode_op->s_ive_op.u4_is_last = ps_video_encode_ip->s_ive_ip.u4_is_last; + + /* send the output to app */ + ps_video_encode_op->s_ive_op.output_present = 1; + ps_video_encode_op->s_ive_op.dump_recon = 0; + ps_video_encode_op->s_ive_op.s_out_buf = ps_codec->as_out_buf[ctxt_sel].as_bits_buf[0]; + + for(i = 1; i < ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers; i++) + { + memmove(((UWORD8 *) ps_video_encode_op->s_ive_op.s_out_buf.pv_buf + + ps_video_encode_op->s_ive_op.s_out_buf.u4_bytes), + ps_codec->as_out_buf[ctxt_sel].as_bits_buf[i].pv_buf, + ps_codec->as_out_buf[ctxt_sel].as_bits_buf[i].u4_bytes); + + ps_video_encode_op->s_ive_op.s_out_buf.u4_bytes += + ps_codec->as_out_buf[ctxt_sel].as_bits_buf[i].u4_bytes; + } + + /* error status */ + SET_ERROR_ON_RETURN(error_status, IVE_FATALERROR, + ps_video_encode_op->s_ive_op.u4_error_code, IV_FAIL); + + /* indicates that header has been generated previously */ + ps_codec->u4_header_generated = 1; + + /* api call cnt */ + ps_codec->i4_encode_api_call_cnt--; + + /* header mode tag is not sticky */ + ps_codec->i4_header_mode = 0; + ps_codec->i4_gen_header = 0; + + return IV_SUCCESS; + } + + /* curr pic cnt */ + ps_codec->i4_pic_cnt += 1; + + i4_rc_pre_enc_skip = 0; + for(i = 0; i < ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers; i++) + { + i4_rc_pre_enc_skip = + isvce_input_queue_update(ps_codec, &ps_video_encode_ip->s_ive_ip, &s_inp_buf, i); + } + + s_out_buf.u4_is_last = s_inp_buf.s_inp_props.u4_is_last; + ps_video_encode_op->s_ive_op.u4_is_last = s_inp_buf.s_inp_props.u4_is_last; + + /* Only encode if the current frame is not pre-encode skip */ + if(!i4_rc_pre_enc_skip && s_inp_buf.s_inp_props.s_raw_buf.apv_bufs[0]) + { + isvce_process_ctxt_t *ps_proc = &ps_codec->as_process[ctxt_sel * MAX_PROCESS_THREADS]; + + WORD32 num_thread_cnt = ps_codec->s_cfg.u4_num_cores - 1; + + ps_codec->ai4_pic_cnt[ctxt_sel] = ps_codec->i4_pic_cnt; + + error_status = isvce_svc_au_init(ps_codec, &s_inp_buf); + + SET_ERROR_ON_RETURN(error_status, IVE_FATALERROR, + ps_video_encode_op->s_ive_op.u4_error_code, IV_FAIL); + + isvce_nalu_info_au_init(ps_codec->as_nalu_descriptors, + ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers); + +#if ENABLE_MODE_STAT_VISUALISER + isvce_msv_get_input_frame(ps_codec->ps_mode_stat_visualiser, &s_inp_buf); +#endif + + for(i = 0; i < ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers; i++) + { + isvce_svc_layer_pic_init(ps_codec, &s_inp_buf, i); + + for(j = 0; j < num_thread_cnt; j++) + { + ithread_create(ps_codec->apv_proc_thread_handle[j], NULL, isvce_process_thread, + &ps_codec->as_process[j + 1]); + + ps_codec->ai4_process_thread_created[j] = 1; + + ps_codec->i4_proc_thread_cnt++; + } + + /* launch job */ + isvce_process_thread(ps_proc); + + /* Join threads at the end of encoding a frame */ + isvce_join_threads(ps_codec); + + ih264_list_reset(ps_codec->pv_proc_jobq); + + ih264_list_reset(ps_codec->pv_entropy_jobq); + } + +#if ENABLE_MODE_STAT_VISUALISER + isvce_msv_dump_visualisation(ps_codec->ps_mode_stat_visualiser); +#endif + + isvce_sub_pic_rc_dump_data(ps_codec->as_process->ps_sub_pic_rc_ctxt); + } + + /**************************************************************************** + * RECON + * Since we have forward dependent frames, we cannot return recon in + *encoding order. It must be in poc order, or input pic order. To achieve this + *we introduce a delay of 1 to the recon wrt encode. Now since we have that + * delay, at any point minimum of pic_cnt in our ref buffer will be the + * correct frame. For ex let our GOP be IBBP [1 2 3 4] . The encode order + * will be [1 4 2 3] .Now since we have a delay of 1, when we are done with + * encoding 4, the min in the list will be 1. After encoding 2, it will be + * 2, 3 after 3 and 4 after 4. Hence we can return in sequence. Note + * that the 1 delay is critical. Hence if we have post enc skip, we must + * skip here too. Note that since post enc skip already frees the recon + * buffer we need not do any thing here + * + * We need to return a recon when ever we consume an input buffer. This + * comsumption include a pre or post enc skip. Thus dump recon is set for + * all cases except when + * 1) We are waiting -> ps_codec->i4_pic_cnt > + *ps_codec->s_cfg.u4_num_bframe An exception need to be made for the case when + *we have the last buffer since we need to flush out the on remainig recon. + ****************************************************************************/ + + ps_video_encode_op->s_ive_op.dump_recon = 0; + + if(ps_codec->s_cfg.u4_enable_recon && + ((ps_codec->i4_pic_cnt > (WORD32) ps_codec->s_cfg.u4_num_bframes) || + s_inp_buf.s_inp_props.u4_is_last)) + { + /* error status */ + IH264_ERROR_T ret = IH264_SUCCESS; + + svc_au_buf_t *ps_pic_buf = NULL; + + WORD32 i4_buf_status, i4_curr_poc = 32768; + + /* In case of skips we return recon, but indicate that buffer is zero size + */ + if(ps_codec->s_rate_control.post_encode_skip[ctxt_sel] || i4_rc_pre_enc_skip) + { + ps_video_encode_op->s_ive_op.dump_recon = 1; + ps_video_encode_op->s_ive_op.s_recon_buf.au4_wd[0] = 0; + ps_video_encode_op->s_ive_op.s_recon_buf.au4_wd[1] = 0; + } + else + { + for(i = 0; i < ps_codec->i4_ref_buf_cnt; i++) + { + if(ps_codec->as_ref_set[i].i4_pic_cnt == -1) continue; + + i4_buf_status = ih264_buf_mgr_get_status( + ps_codec->pv_ref_buf_mgr, ps_codec->as_ref_set[i].ps_pic_buf->i4_buf_id); + + if((i4_buf_status & BUF_MGR_IO) && (ps_codec->as_ref_set[i].i4_poc < i4_curr_poc)) + { + ps_pic_buf = ps_codec->as_ref_set[i].ps_pic_buf; + i4_curr_poc = ps_codec->as_ref_set[i].i4_poc; + } + } + + ps_video_encode_op->s_ive_op.s_recon_buf = ps_video_encode_ip->s_ive_ip.s_recon_buf; + + /* + * If we get a valid buffer. output and free recon. + * + * we may get an invalid buffer if num_b_frames is 0. This is because + * We assume that there will be a ref frame in ref list after encoding + * the last frame. With B frames this is correct since its forward ref + * pic will be in the ref list. But if num_b_frames is 0, we will not + * have a forward ref pic + */ + + if(ps_pic_buf) + { + if((ps_video_encode_ip->s_ive_ip.s_recon_buf.au4_wd[Y] != + ps_codec->s_cfg.u4_disp_wd) || + (ps_video_encode_ip->s_ive_ip.s_recon_buf.au4_ht[Y] != + ps_codec->s_cfg.u4_disp_ht)) + { + SET_ERROR_ON_RETURN(IH264E_NO_FREE_RECONBUF, IVE_FATALERROR, + ps_video_encode_op->s_ive_op.u4_error_code, IV_FAIL); + } + + isvce_fmt_conv(ps_codec, ps_pic_buf, + ps_video_encode_ip->s_ive_ip.s_recon_buf.apv_bufs[0], + ps_video_encode_ip->s_ive_ip.s_recon_buf.apv_bufs[1], + ps_video_encode_ip->s_ive_ip.s_recon_buf.apv_bufs[2], + ps_video_encode_ip->s_ive_ip.s_recon_buf.au4_wd[0], + ps_video_encode_ip->s_ive_ip.s_recon_buf.au4_wd[1], 0, + ps_codec->s_cfg.u4_disp_ht); + + ps_video_encode_op->s_ive_op.dump_recon = 1; + + ret = ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr, ps_pic_buf->i4_buf_id, + BUF_MGR_IO); + + if(IH264_SUCCESS != ret) + { + SET_ERROR_ON_RETURN((IH264E_ERROR_T) ret, IVE_FATALERROR, + ps_video_encode_op->s_ive_op.u4_error_code, IV_FAIL); + } + } + } + } + + /*************************************************************************** + * Free reference buffers: + * In case of a post enc skip, we have to ensure that those pics will not + * be used as reference anymore. In all other cases we will not even mark + * the ref buffers + ***************************************************************************/ + if(ps_codec->s_rate_control.post_encode_skip[ctxt_sel]) + { + /* pic info */ + svc_au_buf_t *ps_cur_pic; + + /* mv info */ + svc_au_data_t *ps_cur_mv_buf; + + /* error status */ + IH264_ERROR_T ret = IH264_SUCCESS; + + /* Decrement coded pic count */ + ps_codec->i4_poc--; + + /* loop through to get the min pic cnt among the list of pics stored in ref + * list */ + /* since the skipped frame may not be on reference list, we may not have an + * MV bank hence free only if we have allocated */ + for(i = 0; i < ps_codec->i4_ref_buf_cnt; i++) + { + if(ps_codec->i4_pic_cnt == ps_codec->as_ref_set[i].i4_pic_cnt) + { + ps_cur_pic = ps_codec->as_ref_set[i].ps_pic_buf; + + ps_cur_mv_buf = ps_codec->as_ref_set[i].ps_svc_au_data; + + /* release this frame from reference list and recon list */ + ret = ih264_buf_mgr_release(ps_codec->pv_svc_au_data_store_mgr, + ps_cur_mv_buf->i4_buf_id, BUF_MGR_REF); + ret |= ih264_buf_mgr_release(ps_codec->pv_svc_au_data_store_mgr, + ps_cur_mv_buf->i4_buf_id, BUF_MGR_IO); + SET_ERROR_ON_RETURN((IH264E_ERROR_T) ret, IVE_FATALERROR, + ps_video_encode_op->s_ive_op.u4_error_code, IV_FAIL); + + ret = ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr, ps_cur_pic->i4_buf_id, + BUF_MGR_REF); + ret |= ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr, ps_cur_pic->i4_buf_id, + BUF_MGR_IO); + SET_ERROR_ON_RETURN((IH264E_ERROR_T) ret, IVE_FATALERROR, + ps_video_encode_op->s_ive_op.u4_error_code, IV_FAIL); + break; + } + } + } + + /* + * Since recon is not in sync with output, ie there can be frame to be + * given back as recon even after last output. Hence we need to mark that + * the output is not the last. + * Hence search through reflist and mark appropriately + */ + if(ps_codec->s_cfg.u4_enable_recon) + { + WORD32 i4_buf_status = 0; + + for(i = 0; i < ps_codec->i4_ref_buf_cnt; i++) + { + if(ps_codec->as_ref_set[i].i4_pic_cnt == -1) continue; + + i4_buf_status |= ih264_buf_mgr_get_status( + ps_codec->pv_ref_buf_mgr, ps_codec->as_ref_set[i].ps_pic_buf->i4_buf_id); + } + + if(i4_buf_status & BUF_MGR_IO) + { + s_out_buf.u4_is_last = 0; + ps_video_encode_op->s_ive_op.u4_is_last = 0; + } + } + + /************************************************************************** + * Signaling to APP + * 1) If we valid a valid output mark it so + * 2) Set the codec output ps_video_encode_op + * 3) Set the error status + * 4) Set the return Pic type + * Note that we already has marked recon properly + * 5)Send the consumed input back to app so that it can free it if possible + * + * We will have to return the output and input buffers unconditionally + * so that app can release them + **************************************************************************/ + if(!i4_rc_pre_enc_skip && !ps_codec->s_rate_control.post_encode_skip[ctxt_sel] && + s_inp_buf.s_inp_props.s_raw_buf.apv_bufs[0]) + { + /* receive output back from codec */ + s_out_buf = ps_codec->as_out_buf[ctxt_sel]; + + /* send the output to app */ + ps_video_encode_op->s_ive_op.output_present = 1; + ps_video_encode_op->s_ive_op.u4_error_code = IV_SUCCESS; + + /* Set the time stamps of the encodec input */ + ps_video_encode_op->s_ive_op.u4_timestamp_low = s_inp_buf.s_inp_props.u4_timestamp_low; + ps_video_encode_op->s_ive_op.u4_timestamp_high = s_inp_buf.s_inp_props.u4_timestamp_high; + + switch(ps_codec->pic_type) + { + case PIC_IDR: + ps_video_encode_op->s_ive_op.u4_encoded_frame_type = IV_IDR_FRAME; + break; + + case PIC_I: + ps_video_encode_op->s_ive_op.u4_encoded_frame_type = IV_I_FRAME; + break; + + case PIC_P: + ps_video_encode_op->s_ive_op.u4_encoded_frame_type = IV_P_FRAME; + break; + + case PIC_B: + ps_video_encode_op->s_ive_op.u4_encoded_frame_type = IV_B_FRAME; + break; + + default: + ps_video_encode_op->s_ive_op.u4_encoded_frame_type = IV_NA_FRAME; + break; + } + + for(i = 0; i < (WORD32) ps_codec->s_cfg.u4_num_cores; i++) + { + error_status = ps_codec->as_process[ctxt_sel + i].i4_error_code; + SET_ERROR_ON_RETURN(error_status, IVE_FATALERROR, + ps_video_encode_op->s_ive_op.u4_error_code, IV_FAIL); + } + } + else + { + /* receive output back from codec */ + s_out_buf = ps_codec->as_out_buf[ctxt_sel]; + + ps_video_encode_op->s_ive_op.output_present = 0; + ps_video_encode_op->s_ive_op.u4_error_code = IV_SUCCESS; + + /* Set the time stamps of the encodec input */ + ps_video_encode_op->s_ive_op.u4_timestamp_low = 0; + ps_video_encode_op->s_ive_op.u4_timestamp_high = 0; + + ps_video_encode_op->s_ive_op.s_inp_buf = s_inp_buf.s_inp_props.s_raw_buf; + + ps_video_encode_op->s_ive_op.u4_encoded_frame_type = IV_NA_FRAME; + } + + /* Send the input to encoder so that it can free it if possible */ + ps_video_encode_op->s_ive_op.s_out_buf = ps_codec->as_out_buf[ctxt_sel].as_bits_buf[0]; + + for(i = 1; i < ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers; i++) + { + memmove(((UWORD8 *) ps_video_encode_op->s_ive_op.s_out_buf.pv_buf + + ps_video_encode_op->s_ive_op.s_out_buf.u4_bytes), + ps_codec->as_out_buf[ctxt_sel].as_bits_buf[i].pv_buf, + ps_codec->as_out_buf[ctxt_sel].as_bits_buf[i].u4_bytes); + + ps_video_encode_op->s_ive_op.s_out_buf.u4_bytes += + ps_codec->as_out_buf[ctxt_sel].as_bits_buf[i].u4_bytes; + } + + if(ps_codec->s_cfg.b_nalu_info_export_enable && !i4_rc_pre_enc_skip && + !ps_codec->s_rate_control.post_encode_skip[ctxt_sel] && + s_inp_buf.s_inp_props.s_raw_buf.apv_bufs[0]) + { + ps_video_encode_op->b_is_nalu_info_present = true; + + for(i = 0; i < ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers; i++) + { + isvce_nalu_info_csv_translator(&ps_codec->as_nalu_descriptors[i], + &ps_video_encode_ip->ps_nalu_info_buf[i]); + + ps_video_encode_op->ps_nalu_info_buf[i] = ps_video_encode_ip->ps_nalu_info_buf[i]; + } + } + else + { + ps_video_encode_op->b_is_nalu_info_present = false; + } + + ps_video_encode_op->s_ive_op.s_inp_buf = s_inp_buf.s_inp_props.s_raw_buf; + + return IV_SUCCESS; +} diff --git a/encoder/svc/isvce_encode.h b/encoder/svc/isvce_encode.h new file mode 100644 index 0000000..45ac760 --- /dev/null +++ b/encoder/svc/isvce_encode.h @@ -0,0 +1,41 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** +******************************************************************************* +* @file +* isvce_encode.h +* +* @brief +* Contains functions for encode API +* +******************************************************************************* +*/ + +#ifndef _ISVCE_ENCODE_H_ +#define _ISVCE_ENCODE_H_ + +#include "ih264_typedefs.h" +#include "iv2.h" +#include "ive2.h" + +extern WORD32 isvce_encode(iv_obj_t *ps_codec_obj, void *pv_api_ip, void *pv_api_op); + +#endif diff --git a/encoder/svc/isvce_encode_header.c b/encoder/svc/isvce_encode_header.c new file mode 100644 index 0000000..8d12535 --- /dev/null +++ b/encoder/svc/isvce_encode_header.c @@ -0,0 +1,2127 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** +******************************************************************************* +* @file +* isvce_encode_header.c +* +* @brief +* This file contains function definitions related to header encoding. +* +* @author +* ittiam +* +* @par List of Functions: +* - isvce_generate_sps() +* - isvce_generate_pps() +* - isvce_generate_slice_header() +* - isvce_populate_sps() +* - isvce_populate_pps() +* - isvce_populate_slice_header() +* +******************************************************************************* +*/ + +#include "ih264_typedefs.h" +#include "ih264_debug.h" + +/* Dependencies of ih264e_bitstream.h */ +#include "ih264e_error.h" + +#include "ih264e_bitstream.h" + +#include "isvce_encode_header.h" +#include "isvce_utils.h" + +static FORCEINLINE IH264E_ERROR_T isvce_generate_nal_unit_header(bitstrm_t *ps_bitstrm, + WORD32 nal_unit_type, + WORD32 nal_ref_idc) +{ + WORD32 return_status = IH264E_SUCCESS; + + if(!((nal_unit_type > 0) && (nal_unit_type < 32))) + { + return IH264E_FAIL; + } + + /* forbidden_zero_bit + nal_ref_idc + nal_unit_type */ + PUT_BITS(ps_bitstrm, ((nal_ref_idc << 5) + nal_unit_type), + (1 + 2 + 5), /*1 forbidden zero bit + 2 nal_ref_idc + 5 nal_unit_type */ + return_status, "nal_unit_header"); + + return return_status; +} + +/** +****************************************************************************** +* +* @brief Generates SPS (Sequence Parameter Set) +* +* @par Description +* This function generates Sequence Parameter Set header as per the spec +* +* @param[in] ps_bitstrm +* pointer to bitstream context (handle) +* +* @param[in] ps_sps +* pointer to structure containing SPS data +* +* @param[in] ps_vui +* pointer to structure containing VUI data +* +* @return success or failure error code +* +****************************************************************************** +*/ +WORD32 isvce_generate_sps(bitstrm_t *ps_bitstrm, sps_t *ps_sps, NAL_UNIT_TYPE_T nal_type) +{ + WORD32 return_status = IH264E_SUCCESS; + WORD32 i; + WORD8 i1_nal_ref_idc = 3; + vui_t *ps_vui = &ps_sps->s_vui_parameters; + + /* Insert Start Code */ + return_status = ih264e_put_nal_start_code_prefix(ps_bitstrm, 1); + if(return_status != IH264E_SUCCESS) + { + return return_status; + } + /* Insert Nal Unit Header */ + return_status = isvce_generate_nal_unit_header(ps_bitstrm, nal_type, i1_nal_ref_idc); + if(return_status != IH264E_SUCCESS) + { + return return_status; + } + + /* profile_idc */ + PUT_BITS(ps_bitstrm, ps_sps->u1_profile_idc, 8, return_status, "profile_idc"); + + /* constrained_set_flags */ + PUT_BITS(ps_bitstrm, ps_sps->u1_constraint_set0_flag, 1, return_status, + "constrained_set0_flag"); + PUT_BITS(ps_bitstrm, ps_sps->u1_constraint_set1_flag, 1, return_status, + "constrained_set1_flag"); + PUT_BITS(ps_bitstrm, ps_sps->u1_constraint_set2_flag, 1, return_status, + "constrained_set2_flag"); + PUT_BITS(ps_bitstrm, ps_sps->u1_constraint_set3_flag, 1, return_status, + "constrained_set3_flag"); + + /* reserved_zero_four_bits */ + PUT_BITS(ps_bitstrm, 0, 4, return_status, "reserved_zero_four_bits"); + + /* level_idc */ + PUT_BITS(ps_bitstrm, ps_sps->u1_level_idc, 8, return_status, "level_idc"); + + /* seq_parameter_set_id */ + PUT_BITS_UEV(ps_bitstrm, ps_sps->u1_sps_id, return_status, "seq_parameter_set_id"); + + if((ps_sps->u1_profile_idc == IH264_SCALABLE_BASELINE) || + (ps_sps->u1_profile_idc >= IH264_PROFILE_HIGH)) + { + /* chroma_format_idc */ + PUT_BITS_UEV(ps_bitstrm, ps_sps->u1_chroma_format_idc, return_status, "chroma_format_idc"); + + if(ps_sps->u1_chroma_format_idc == CHROMA_FMT_IDC_YUV444) + { + /* i1_residual_colour_transform_flag */ + PUT_BITS(ps_bitstrm, ps_sps->i1_residual_colour_transform_flag, 1, return_status, + "i1_residual_colour_transform_flag"); + } + + /* bit_depth_luma_minus8 */ + PUT_BITS_UEV(ps_bitstrm, (ps_sps->i1_bit_depth_luma - 8), return_status, + "bit_depth_luma_minus8"); + + /* bit_depth_chroma_minus8 */ + PUT_BITS_UEV(ps_bitstrm, (ps_sps->i1_bit_depth_chroma - 8), return_status, + "bit_depth_chroma_minus8"); + + /* qpprime_y_zero_transform_bypass_flag */ + PUT_BITS(ps_bitstrm, ps_sps->i1_qpprime_y_zero_transform_bypass_flag, 1, return_status, + "qpprime_y_zero_transform_bypass_flag"); + + /* seq_scaling_matrix_present_flag */ + PUT_BITS(ps_bitstrm, ps_sps->i1_seq_scaling_matrix_present_flag, 1, return_status, + "seq_scaling_matrix_present_flag"); + + /* seq_scaling_list */ + if(ps_sps->i1_seq_scaling_matrix_present_flag) + { + /* TODO_LATER: Will be enabled once scaling list support is added */ + } + } + + /* log2_max_frame_num_minus4 */ + PUT_BITS_UEV(ps_bitstrm, (ps_sps->i1_log2_max_frame_num - 4), return_status, + "log2_max_frame_num_minus4"); + + /* pic_order_cnt_type */ + PUT_BITS_UEV(ps_bitstrm, ps_sps->i1_pic_order_cnt_type, return_status, "pic_order_cnt_type"); + + if(ps_sps->i1_pic_order_cnt_type == 0) + { + /* log2_max_pic_order_cnt_lsb_minus4 */ + PUT_BITS_UEV(ps_bitstrm, (ps_sps->i1_log2_max_pic_order_cnt_lsb - 4), return_status, + "log2_max_pic_order_cnt_lsb_minus4"); + } + else if(ps_sps->i1_pic_order_cnt_type == 1) + { + /* delta_pic_order_always_zero_flag */ + PUT_BITS(ps_bitstrm, ps_sps->i1_delta_pic_order_always_zero_flag, 1, return_status, + "delta_pic_order_always_zero_flag"); + + /* offset_for_non_ref_pic */ + PUT_BITS_SEV(ps_bitstrm, ps_sps->i4_offset_for_non_ref_pic, return_status, + "offset_for_non_ref_pic"); + + /* offset_for_top_to_bottom_field */ + PUT_BITS_SEV(ps_bitstrm, ps_sps->i4_offset_for_top_to_bottom_field, return_status, + "offset_for_top_to_bottom_field"); + + /* num_ref_frames_in_pic_order_cnt_cycle */ + PUT_BITS_UEV(ps_bitstrm, ps_sps->u1_num_ref_frames_in_pic_order_cnt_cycle, return_status, + "num_ref_frames_in_pic_order_cnt_cycle"); + + /* Offset for ref frame */ + for(i = 0; i < ps_sps->u1_num_ref_frames_in_pic_order_cnt_cycle; i++) + { + /* offset_for_ref_frame */ + PUT_BITS_SEV(ps_bitstrm, ps_sps->ai4_offset_for_ref_frame[i], return_status, + "offset_for_ref_frame"); + } + } + + /* num_ref_frames */ + PUT_BITS_UEV(ps_bitstrm, ps_sps->u1_max_num_ref_frames, return_status, "num_ref_frames"); + + /* gaps_in_frame_num_value_allowed_flag */ + PUT_BITS(ps_bitstrm, ps_sps->i1_gaps_in_frame_num_value_allowed_flag, 1, return_status, + "gaps_in_frame_num_value_allowed_flag"); + + /* pic_width_in_mbs_minus1 */ + PUT_BITS_UEV(ps_bitstrm, ps_sps->i2_pic_width_in_mbs_minus1, return_status, + "pic_width_in_mbs_minus1"); + + /* pic_height_in_map_units_minus1 */ + PUT_BITS_UEV(ps_bitstrm, ps_sps->i2_pic_height_in_map_units_minus1, return_status, + "pic_height_in_map_units_minus1"); + + /* frame_mbs_only_flag */ + PUT_BITS(ps_bitstrm, ps_sps->i1_frame_mbs_only_flag, 1, return_status, "frame_mbs_only_flag"); + + if(!ps_sps->i1_frame_mbs_only_flag) + { + /* mb_adaptive_frame_field_flag */ + PUT_BITS(ps_bitstrm, ps_sps->i1_mb_adaptive_frame_field_flag, 1, return_status, + "mb_adaptive_frame_field_flag"); + } + + /* direct_8x8_inference_flag */ + PUT_BITS(ps_bitstrm, ps_sps->i1_direct_8x8_inference_flag, 1, return_status, + "direct_8x8_inference_flag"); + + /* frame_cropping_flag */ + PUT_BITS(ps_bitstrm, ps_sps->i1_frame_cropping_flag, 1, return_status, "frame_cropping_flag"); + + if(ps_sps->i1_frame_cropping_flag) + { + /* frame_crop_left_offset */ + PUT_BITS_UEV(ps_bitstrm, ps_sps->i2_frame_crop_left_offset, return_status, + "frame_crop_left_offset"); + + /* frame_crop_right_offset */ + PUT_BITS_UEV(ps_bitstrm, ps_sps->i2_frame_crop_right_offset, return_status, + "frame_crop_right_offset"); + + /* frame_crop_top_offset */ + PUT_BITS_UEV(ps_bitstrm, ps_sps->i2_frame_crop_top_offset, return_status, + "frame_crop_top_offset"); + + /* frame_crop_bottom_offset */ + PUT_BITS_UEV(ps_bitstrm, ps_sps->i2_frame_crop_bottom_offset, return_status, + "frame_crop_bottom_offset"); + } + + /* vui_parameters_present_flag */ + PUT_BITS(ps_bitstrm, ps_sps->i1_vui_parameters_present_flag, 1, return_status, + "vui_parameters_present_flag"); + + if(ps_sps->i1_vui_parameters_present_flag) + { + /* Add vui parameters to the bitstream */; + return_status = ih264e_generate_vui(ps_bitstrm, ps_vui); + if(return_status != IH264E_SUCCESS) + { + return return_status; + } + } + + if(nal_type != NAL_SUBSET_SPS) + { + /* rbsp trailing bits */ + return_status = ih264e_put_rbsp_trailing_bits(ps_bitstrm); + } + + return return_status; +} + +/** +****************************************************************************** +* +* @brief Generates PPS (Picture Parameter Set) +* +* @par Description +* Generate Picture Parameter Set as per Section 7.3.2.2 +* +* @param[in] ps_bitstrm +* pointer to bitstream context (handle) +* +* @param[in] ps_pps +* pointer to structure containing PPS data +* +* @return success or failure error code +* +****************************************************************************** +*/ +WORD32 isvce_generate_pps(bitstrm_t *ps_bitstrm, pps_t *ps_pps, sps_t *ps_sps) +{ + WORD32 return_status = IH264E_SUCCESS; + + /* Insert the NAL start code */ + return_status = ih264e_put_nal_start_code_prefix(ps_bitstrm, 1); + if(return_status != IH264E_SUCCESS) + { + return return_status; + } + + /* Insert Nal Unit Header */ + PUT_BITS(ps_bitstrm, NAL_PPS_FIRST_BYTE, 8, return_status, "pps_header"); + + /* pic_parameter_set_id */ + PUT_BITS_UEV(ps_bitstrm, ps_pps->u1_pps_id, return_status, "pic_parameter_set_id"); + + /* seq_parameter_set_id */ + PUT_BITS_UEV(ps_bitstrm, ps_pps->u1_sps_id, return_status, "seq_parameter_set_id"); + + /* Entropy coding : 0-VLC; 1 - CABAC */ + PUT_BITS(ps_bitstrm, ps_pps->u1_entropy_coding_mode_flag, 1, return_status, + "Entropy coding : 0-VLC; 1 - CABAC"); + + /* Pic order present flag */ + PUT_BITS(ps_bitstrm, ps_pps->u1_pic_order_present_flag, 1, return_status, + "Pic order present flag"); + + /* Number of slice groups */ + PUT_BITS_UEV(ps_bitstrm, ps_pps->u1_num_slice_groups - 1, return_status, + "Number of slice groups"); + + if(ps_pps->u1_num_slice_groups > 1) + { + /* TODO_LATER: Currently the number of slice groups minus 1 is 0. + * If this is not the case, we have to add Slice group map type to the bit + * stream*/ + } + + /* num_ref_idx_l0_default_active_minus1 */ + PUT_BITS_UEV(ps_bitstrm, ps_pps->i1_num_ref_idx_l0_default_active - 1, return_status, + "num_ref_idx_l0_default_active_minus1"); + + /* num_ref_idx_l1_default_active_minus1 */ + PUT_BITS_UEV(ps_bitstrm, ps_pps->i1_num_ref_idx_l1_default_active - 1, return_status, + "num_ref_idx_l1_default_active_minus1"); + + /* weighted_pred_flag */ + PUT_BITS(ps_bitstrm, ps_pps->i1_weighted_pred_flag, 1, return_status, "weighted_pred_flag"); + + /* weighted_bipred_flag */ + PUT_BITS(ps_bitstrm, ps_pps->i1_weighted_bipred_idc, 2, return_status, "weighted_bipred_idc"); + + /* pic_init_qp_minus26 */ + PUT_BITS_SEV(ps_bitstrm, ps_pps->i1_pic_init_qp - 26, return_status, "pic_init_qp_minus26"); + + /* pic_init_qs_minus26 */ + PUT_BITS_SEV(ps_bitstrm, ps_pps->i1_pic_init_qs - 26, return_status, "pic_init_qs_minus26"); + + /* chroma_qp_index_offset */ + PUT_BITS_SEV(ps_bitstrm, ps_pps->i1_chroma_qp_index_offset, return_status, + "chroma_qp_index_offset"); + + /* deblocking_filter_control_present_flag */ + PUT_BITS(ps_bitstrm, ps_pps->i1_deblocking_filter_control_present_flag, 1, return_status, + "deblocking_filter_control_present_flag"); + + /* constrained_intra_pred_flag */ + PUT_BITS(ps_bitstrm, ps_pps->i1_constrained_intra_pred_flag, 1, return_status, + "constrained_intra_pred_flag"); + + /*redundant_pic_cnt_present_flag */ + PUT_BITS(ps_bitstrm, ps_pps->i1_redundant_pic_cnt_present_flag, 1, return_status, + "redundant_pic_cnt_present_flag"); + + if(ps_sps->u1_profile_idc >= IH264_PROFILE_HIGH) + { + /* transform_8x8_mode_flag */ + PUT_BITS(ps_bitstrm, ps_pps->i1_transform_8x8_mode_flag, 1, return_status, + "transform_8x8_mode_flag"); + + /* pic_scaling_matrix_present_flag */ + PUT_BITS(ps_bitstrm, ps_pps->i1_pic_scaling_matrix_present_flag, 1, return_status, + "pic_scaling_matrix_present_flag"); + + if(ps_pps->i1_pic_scaling_matrix_present_flag) + { + /* TODO_LATER: Will be enabled once scaling list support is added */ + } + + /* Second chroma QP offset */ + PUT_BITS_SEV(ps_bitstrm, ps_pps->i1_second_chroma_qp_index_offset, return_status, + "Second chroma QP offset"); + } + + return_status = ih264e_put_rbsp_trailing_bits(ps_bitstrm); + + return return_status; +} + +/** +****************************************************************************** +* +* @brief Generates Slice Header +* +* @par Description +* Generate Slice Header as per Section 7.3.5.1 +* +* @param[inout] ps_bitstrm +* pointer to bitstream context for generating slice header +* +* @param[in] ps_slice_hdr +* pointer to slice header params +* +* @param[in] ps_pps +* pointer to pps params referred by slice +* +* @param[in] ps_sps +* pointer to sps params referred by slice +* +* @param[out] ps_dup_bit_strm_ent_offset +* Bitstream struct to store bitstream state +* +* @param[out] pu4_first_slice_start_offset +* first slice offset is returned +* +* @return success or failure error code +* +****************************************************************************** +*/ +WORD32 isvce_generate_slice_header(bitstrm_t *ps_bitstrm, slice_header_t *ps_slice_hdr, + pps_t *ps_pps, sps_t *ps_sps, UWORD8 u1_idr_flag) +{ + WORD32 return_status = IH264E_SUCCESS; + UWORD8 u1_slice_type; + + /* Insert start code */ + return_status = ih264e_put_nal_start_code_prefix(ps_bitstrm, 1); + if(return_status != IH264E_SUCCESS) + { + return return_status; + } + /* Insert Nal Unit Header */ + return_status = isvce_generate_nal_unit_header(ps_bitstrm, ps_slice_hdr->i1_nal_unit_type, + ps_slice_hdr->i1_nal_unit_idc); + if(return_status != IH264E_SUCCESS) + { + return return_status; + } + /* first_mb_in_slice */ + PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->u2_first_mb_in_slice, return_status, + "first_mb_in_slice"); + + /* slice_type */ + PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->u1_slice_type, return_status, "slice_type"); + + /* pic_parameter_set_id */ + PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->u1_pps_id, return_status, "pic_parameter_set_id"); + + /* frame_num */ + PUT_BITS(ps_bitstrm, ps_slice_hdr->i4_frame_num, ps_sps->i1_log2_max_frame_num, return_status, + "frame_num"); + + if(!ps_sps->i1_frame_mbs_only_flag) + { + /* field_pic_flag */ + PUT_BITS(ps_bitstrm, ps_slice_hdr->i1_field_pic_flag, 1, return_status, "field_pic_flag"); + + if(ps_slice_hdr->i1_field_pic_flag) + { + /* bottom_field_flag */ + PUT_BITS(ps_bitstrm, ps_slice_hdr->i1_bottom_field_flag, 1, return_status, + "bottom_field_flag"); + } + } + + if(u1_idr_flag == 1) + { + /* u2_idr_pic_id */ + PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->u2_idr_pic_id, return_status, "idr_pic_id"); + } + + if(ps_sps->i1_pic_order_cnt_type == 0) + { + /* pic_order_cnt_lsb */ + PUT_BITS(ps_bitstrm, ps_slice_hdr->i4_pic_order_cnt_lsb, + ps_sps->i1_log2_max_pic_order_cnt_lsb, return_status, "pic_order_cnt_lsb"); + + if(ps_pps->u1_pic_order_present_flag && !ps_slice_hdr->i1_field_pic_flag) + { + /* delta_pic_order_cnt_bottom */ + PUT_BITS_SEV(ps_bitstrm, ps_slice_hdr->i4_delta_pic_order_cnt_bottom, return_status, + "delta_pic_order_cnt_bottom"); + } + } + + if(ps_sps->i1_pic_order_cnt_type == 1 && !ps_sps->i1_delta_pic_order_always_zero_flag) + { + /* delta_pic_order_cnt[0] */ + PUT_BITS_SEV(ps_bitstrm, ps_slice_hdr->ai4_delta_pic_order_cnt[0], return_status, + "delta_pic_order_cnt[0]"); + + if(ps_pps->u1_pic_order_present_flag && !ps_slice_hdr->i1_field_pic_flag) + { + /* delta_pic_order_cnt[1] */ + PUT_BITS_SEV(ps_bitstrm, ps_slice_hdr->ai4_delta_pic_order_cnt[1], return_status, + "delta_pic_order_cnt[1]"); + } + } + + if(ps_pps->i1_redundant_pic_cnt_present_flag) + { + /* redundant_pic_cnt */ + PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->u1_redundant_pic_cnt, return_status, + "redundant_pic_cnt"); + } + + u1_slice_type = ps_slice_hdr->u1_slice_type % EPSLICE; + + if(u1_slice_type == BSLICE) + { + /* direct_spatial_mv_pred_flag */ + PUT_BITS(ps_bitstrm, ps_slice_hdr->u1_direct_spatial_mv_pred_flag, 1, return_status, + "direct_spatial_mv_pred_flag"); + } + + if(u1_slice_type == PSLICE || u1_slice_type == SPSLICE || u1_slice_type == BSLICE) + { + /* num_ref_idx_active_override_flag */ + PUT_BITS(ps_bitstrm, ps_slice_hdr->u1_num_ref_idx_active_override_flag, 1, return_status, + "num_ref_idx_active_override_flag"); + + if(ps_slice_hdr->u1_num_ref_idx_active_override_flag) + { + /* num_ref_idx_l0_active_minus1 */ + PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->i1_num_ref_idx_l0_active - 1, return_status, + "num_ref_idx_l0_active_minus1"); + + if(u1_slice_type == BSLICE) + { + /* num_ref_idx_l1_active_minus1 */ + PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->i1_num_ref_idx_l1_active - 1, return_status, + "num_ref_idx_l1_active_minus1"); + } + } + } + + /* ref_pic_list_modification */ + if((u1_slice_type != ISLICE) && (u1_slice_type != SISLICE)) + { + /* ref_pic_list_modification_flag_l0 */ + PUT_BITS(ps_bitstrm, ps_slice_hdr->s_rplm.i1_ref_pic_list_modification_flag_l0, 1, + return_status, "ref_pic_list_modification_flag_l0"); + + if(ps_slice_hdr->s_rplm.i1_ref_pic_list_modification_flag_l0) + { + UWORD8 i = 0; + + WORD8 *pi1_modification_of_pic_nums_idc_l0 = + ps_slice_hdr->s_rplm.i1_modification_of_pic_nums_idc_l0; + UWORD32 *pu4_abs_diff_pic_num_minus1_l0 = + ps_slice_hdr->s_rplm.u4_abs_diff_pic_num_minus1_l0; + UWORD8 *pu1_long_term_pic_num_l0 = ps_slice_hdr->s_rplm.u1_long_term_pic_num_l0; + + do + { + /* modification_of_pic_nums_idc */ + PUT_BITS_UEV(ps_bitstrm, pi1_modification_of_pic_nums_idc_l0[i], return_status, + "modification_of_pic_nums_idc"); + + if((0 == pi1_modification_of_pic_nums_idc_l0[i]) || + (1 == pi1_modification_of_pic_nums_idc_l0[i])) + { + /* abs_diff_pic_num_minus1 */ + PUT_BITS_UEV(ps_bitstrm, pu4_abs_diff_pic_num_minus1_l0[0], return_status, + "abs_diff_pic_num_minus1"); + + pu4_abs_diff_pic_num_minus1_l0++; + } + else if(2 == pi1_modification_of_pic_nums_idc_l0[i]) + { + /* long_term_pic_num */ + PUT_BITS_UEV(ps_bitstrm, pu1_long_term_pic_num_l0[0], return_status, + "abs_diff_pic_num_minus1"); + + pu1_long_term_pic_num_l0++; + } + } while(pi1_modification_of_pic_nums_idc_l0[i++] != 3); + } + } + + if(u1_slice_type == BSLICE) + { + /* ref_pic_list_modification_flag_l1 */ + PUT_BITS(ps_bitstrm, ps_slice_hdr->s_rplm.i1_ref_pic_list_modification_flag_l1, 1, + return_status, "ref_pic_list_modification_flag_l1"); + + if(ps_slice_hdr->s_rplm.i1_ref_pic_list_modification_flag_l1) + { + UWORD8 i = 0; + + WORD8 *pi1_modification_of_pic_nums_idc_l1 = + ps_slice_hdr->s_rplm.i1_modification_of_pic_nums_idc_l1; + UWORD32 *pu4_abs_diff_pic_num_minus1_l1 = + ps_slice_hdr->s_rplm.u4_abs_diff_pic_num_minus1_l1; + UWORD8 *pu1_long_term_pic_num_l1 = ps_slice_hdr->s_rplm.u1_long_term_pic_num_l1; + + do + { + /* modification_of_pic_nums_idc */ + PUT_BITS_UEV(ps_bitstrm, pi1_modification_of_pic_nums_idc_l1[i], return_status, + "modification_of_pic_nums_idc"); + + if((0 == pi1_modification_of_pic_nums_idc_l1[i]) || + (1 == pi1_modification_of_pic_nums_idc_l1[i])) + { + /* abs_diff_pic_num_minus1 */ + PUT_BITS_UEV(ps_bitstrm, pu4_abs_diff_pic_num_minus1_l1[0], return_status, + "abs_diff_pic_num_minus1"); + + pu4_abs_diff_pic_num_minus1_l1++; + } + else if(2 == pi1_modification_of_pic_nums_idc_l1[i]) + { + /* long_term_pic_num */ + PUT_BITS_UEV(ps_bitstrm, pu1_long_term_pic_num_l1[0], return_status, + "abs_diff_pic_num_minus1"); + + pu1_long_term_pic_num_l1++; + } + } while(pi1_modification_of_pic_nums_idc_l1[i++] != 3); + } + } + + if((ps_pps->i1_weighted_pred_flag && u1_slice_type == PSLICE) || + (u1_slice_type == BSLICE && ps_pps->i1_weighted_bipred_idc == 1)) + { + /* TODO_LATER: Currently there is no support for weighted prediction. + This needs to be updated when the support is added */ + } + + if(ps_slice_hdr->i1_nal_unit_idc != 0) + { + if(u1_idr_flag == 1) + { + /* no_output_of_prior_pics_flag */ + PUT_BITS(ps_bitstrm, ps_slice_hdr->u1_no_output_of_prior_pics_flag, 1, return_status, + "no_output_of_prior_pics_flag "); + + /* long_term_reference_flag */ + PUT_BITS(ps_bitstrm, ps_slice_hdr->u1_long_term_reference_flag, 1, return_status, + "long_term_reference_flag "); + } + else + { + /* adaptive_ref_pic_marking_mode_flag */ + PUT_BITS(ps_bitstrm, ps_slice_hdr->u1_adaptive_ref_pic_marking_mode_flag, 1, + return_status, "adaptive_ref_pic_marking_mode_flag "); + + if(ps_slice_hdr->u1_adaptive_ref_pic_marking_mode_flag) + { + /* TODO: if the reference picture marking mode is adaptive + add these fields in the bit-stream */ + } + } + } + + if(ps_slice_hdr->u1_entropy_coding_mode_flag && u1_slice_type != ISLICE && + u1_slice_type != SISLICE) + { + /* cabac_init_idc */ + PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->i1_cabac_init_idc, return_status, "cabac_init_idc"); + } + + /* slice_qp_delta */ + PUT_BITS_SEV(ps_bitstrm, ps_slice_hdr->i1_slice_qp - ps_pps->i1_pic_init_qp, return_status, + "slice_qp_delta"); + + if(ps_slice_hdr->u1_slice_type == SPSLICE || ps_slice_hdr->u1_slice_type == SISLICE) + { + if(ps_slice_hdr->u1_slice_type == SPSLICE) + { + /* sp_for_switch_flag */ + PUT_BITS(ps_bitstrm, ps_slice_hdr->u1_sp_for_switch_flag, 1, return_status, + "sp_for_switch_flag"); + } + /* slice_qs_delta */ + PUT_BITS_SEV(ps_bitstrm, ps_slice_hdr->u1_slice_qs - ps_pps->i1_pic_init_qs, return_status, + "slice_qs_delta"); + } + + if(ps_pps->i1_deblocking_filter_control_present_flag) + { + /* disable_deblocking_filter_idc */ + PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->u1_disable_deblocking_filter_idc, return_status, + "disable_deblocking_filter_idc"); + + if(ps_slice_hdr->u1_disable_deblocking_filter_idc != 1) + { + /* slice_alpha_c0_offset_div2 */ + PUT_BITS_SEV(ps_bitstrm, ps_slice_hdr->i1_slice_alpha_c0_offset_div2, return_status, + "slice_alpha_c0_offset_div2"); + + /* slice_beta_offset_div2 */ + PUT_BITS_SEV(ps_bitstrm, ps_slice_hdr->i1_slice_beta_offset_div2, return_status, + "slice_beta_offset_div2"); + } + } + + if(ps_slice_hdr->u1_num_slice_groups_minus1 > 0 && ps_pps->u1_slice_group_map_type >= 3 && + ps_pps->u1_slice_group_map_type <= 5) + { + /* slice_group_change_cycle */ + /* TODO_LATER: Currently the number of slice groups minus 1 is 0. + * If this is not the case, we have to add Slice group map type to the bit + * stream */ + } + + return return_status; +} + +/** +****************************************************************************** +* +* @brief Populates VUI structure +* +* @par Description +* Populates VUI structure for its use in header generation +* +* @param[in] ps_codec +* pointer to encoder context +* +* @return success or failure error code +* +****************************************************************************** +*/ +static IH264E_ERROR_T isvce_populate_vui(isvce_codec_t *ps_codec, sps_t *ps_sps) +{ + vui_t *ps_vui = &ps_sps->s_vui_parameters; + + ps_vui->u1_nal_hrd_parameters_present_flag = 0; + ps_vui->u1_vcl_hrd_parameters_present_flag = 0; + + ps_vui->u1_bitstream_restriction_flag = 1; + ps_vui->u1_motion_vectors_over_pic_boundaries_flag = 1; + ps_vui->u1_max_bytes_per_pic_denom = 0; + ps_vui->u1_max_bits_per_mb_denom = 0; + ps_vui->u1_log2_max_mv_length_horizontal = 16; + ps_vui->u1_log2_max_mv_length_vertical = 16; + + if(ps_codec->s_cfg.u4_num_bframes == 0) + { + ps_vui->u1_num_reorder_frames = 0; + } + else + { + ps_vui->u1_num_reorder_frames = 1; + } + + ps_vui->u1_max_dec_frame_buffering = ps_sps->u1_max_num_ref_frames; + + return 0; +} + +/** +****************************************************************************** +* +* @brief Populates sps structure +* +* @par Description +* Populates sps structure for its use in header generation +* +* @param[in] ps_codec +* pointer to encoder context +* +* @param[out] ps_sps +* pointer to sps params that needs to be populated +* +* @return success or failure error code +* +****************************************************************************** +*/ +IH264E_ERROR_T isvce_populate_sps(isvce_codec_t *ps_codec, sps_t *ps_sps, UWORD8 u1_sps_id, + UWORD8 u1_profile_idc, isvce_inp_buf_t *ps_inp_buf, + UWORD8 u1_spatial_layer_id) +{ + /* active config parameters */ + isvce_cfg_params_t *ps_cfg = &(ps_codec->s_cfg); + + // /* level */ + // IH264_LEVEL_T level_idc; + + /* error_status */ + IH264E_ERROR_T i4_err_code = IH264E_FAIL; + + /* profile */ + /* + * Baseline profile supports, 8 bits per sample, 4:2:0 format, CAVLC. + * B frames are not allowed. Further, Flexible mb ordering, Redundant slices, + * Arbitrary slice ordering are supported. The constrained baseline profile is + * baseline profile minus ASO, FMO and redundant slices. To the constrained + * baseline profile if we add support for B slices, support for encoding + * interlaced frames, support for weighted prediction and introduce CABAC + * entropy coding then we have Main Profile. + */ + ps_sps->u1_profile_idc = u1_profile_idc; + + /* level */ + ps_sps->u1_level_idc = MAX( + ps_cfg->u4_max_level, (UWORD32) ih264e_get_min_level(ps_cfg->u4_max_wd, ps_cfg->u4_max_ht)); + + /* constrained flags */ + /* + * baseline profile automatically implies set 0 flag + */ + ps_sps->u1_constraint_set0_flag = (ps_sps->u1_profile_idc == IH264_PROFILE_BASELINE); + /* + * main profile automatically implies set 1 flag + * Although the encoder says it supports Baseline profile it actually supports + * constrained baseline profile as ASO, FMO and redundant slices are not + * supported + */ + ps_sps->u1_constraint_set1_flag = (ps_sps->u1_profile_idc <= IH264_PROFILE_MAIN); + /* + * extended profile is not supported + */ + ps_sps->u1_constraint_set2_flag = 0x00; + /* + * level 1b or level 11 + */ + if(ps_sps->u1_level_idc == IH264_LEVEL_1B) + { + ps_sps->u1_constraint_set3_flag = 0; + ps_sps->u1_level_idc = IH264_LEVEL_11; + } + else + { + ps_sps->u1_constraint_set3_flag = 0; + } + + /* active sps id */ + ps_sps->u1_sps_id = u1_sps_id; + + if((ps_sps->u1_profile_idc == IH264_SCALABLE_BASELINE) || + (ps_sps->u1_profile_idc >= IH264_PROFILE_HIGH)) + { + /* chroma format idc */ + ps_sps->u1_chroma_format_idc = CHROMA_FMT_IDC_YUV420; + + /* residual_colour_transform_flag */ + ps_sps->i1_residual_colour_transform_flag = 0; + + /* luma bit depth 8 */ + ps_sps->i1_bit_depth_luma = 8; + + /* chroma bit depth 8 */ + ps_sps->i1_bit_depth_chroma = 8; + + /* qpprime_y_zero_transform_bypass_flag */ + ps_sps->i1_qpprime_y_zero_transform_bypass_flag = 0; + + /* seq_scaling_matrix_present_flag */ + ps_sps->i1_seq_scaling_matrix_present_flag = 0; + + if(ps_sps->i1_seq_scaling_matrix_present_flag) + { + /* TODO_LATER: Will be enabled once scaling list support is added */ + } + } + + /* log2_max_frame_num_minus4 */ + ps_sps->i1_log2_max_frame_num = LOG2_MAX_FRAME_NUM_MINUS4 + 4; + + /* pic_order_cnt_type */ + ps_sps->i1_pic_order_cnt_type = 2; + + if(ps_codec->i4_non_ref_frames_in_stream) + { + ps_sps->i1_pic_order_cnt_type = 0; + } + + /* log2_max_pic_order_cnt_lsb_minus4 */ + ps_sps->i1_log2_max_pic_order_cnt_lsb = 8; + + /* TODO : add support for other poc types */ + if(ps_sps->i1_pic_order_cnt_type == 0) + { + } + else if(ps_sps->i1_pic_order_cnt_type == 1) + { + } + + ps_sps->u1_max_num_ref_frames = ps_codec->i4_max_num_reference_frames; + + /* gaps_in_frame_num_value_allowed_flag */ + ps_sps->i1_gaps_in_frame_num_value_allowed_flag = 0; + + /* pic width in mb - 1 */ + ps_sps->i2_pic_width_in_mbs_minus1 = + (ps_inp_buf->as_layer_yuv_buf_props[u1_spatial_layer_id].u4_width >> 4) - 1; + + /* pic height in mb - 1 */ + ps_sps->i2_pic_height_in_map_units_minus1 = + (ps_inp_buf->as_layer_yuv_buf_props[u1_spatial_layer_id].u4_height >> 4) - 1; + + /* frame_mbs_only_flag, no support for interlace encoding */ + ps_sps->i1_frame_mbs_only_flag = 1; + + /* mb_adaptive_frame_field_flag */ + if(ps_sps->i1_frame_mbs_only_flag == 0) + { + ps_sps->i1_mb_adaptive_frame_field_flag = 0; + } + + /* direct_8x8_inference_flag */ + if(ps_sps->u1_level_idc < IH264_LEVEL_30) + { + ps_sps->i1_direct_8x8_inference_flag = 0; + } + else + { + ps_sps->i1_direct_8x8_inference_flag = 1; + } + + /* cropping params */ + /*NOTE : Cropping values depend on the chroma format + * For our case ,decoder interprets the cropping values as 2*num pixels + * Hence the difference in the disp width and width must be halved before + * sending to get the expected results + */ + ps_sps->i1_frame_cropping_flag = 0; + ps_sps->i2_frame_crop_left_offset = 0; + ps_sps->i2_frame_crop_right_offset = (ps_codec->s_cfg.u4_wd - ps_codec->s_cfg.u4_disp_wd) >> 1; + ps_sps->i2_frame_crop_top_offset = 0; + ps_sps->i2_frame_crop_bottom_offset = (ps_codec->s_cfg.u4_ht - ps_codec->s_cfg.u4_disp_ht) >> 1; + + if(ps_sps->i2_frame_crop_left_offset || ps_sps->i2_frame_crop_right_offset || + ps_sps->i2_frame_crop_top_offset || ps_sps->i2_frame_crop_bottom_offset) + { + ps_sps->i1_frame_cropping_flag = 1; + } + + /* vui params */ + ps_sps->i1_vui_parameters_present_flag = !(ps_cfg->u4_disable_vui); + + if(!ps_sps->i1_vui_parameters_present_flag) + { + /* populate vui params */ + isvce_populate_vui(ps_codec, ps_sps); + } + else + { + ps_sps->s_vui_parameters = ps_cfg->s_vui; + } + + return i4_err_code; +} + +/** +****************************************************************************** +* +* @brief Populates pps structure +* +* @par Description +* Populates pps structure for its use in header generation +* +* @param[in] ps_codec +* pointer to encoder context +* +* @param[out] ps_pps +* pointer to pps params that needs to be populated +* +* @return success or failure error code +* +****************************************************************************** +*/ +IH264E_ERROR_T isvce_populate_pps(isvce_codec_t *ps_codec, pps_t *ps_pps, UWORD8 u1_sps_id, + UWORD8 u1_pps_id, UWORD8 u1_spatial_layer_id) +{ + /* seq_parameter_set_id */ + ps_pps->u1_sps_id = u1_sps_id; + + /* pic_parameter_set_id */ + ps_pps->u1_pps_id = u1_pps_id; + + /* entropy_coding_mode */ + ps_pps->u1_entropy_coding_mode_flag = + ((ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers > 1) && (0 == u1_spatial_layer_id)) + ? CAVLC + : ps_codec->s_cfg.u4_entropy_coding_mode; + + /* pic_order_present_flag is unset if we don't have feilds */ + ps_pps->u1_pic_order_present_flag = 0; + + /* Currently number of slice groups supported are 1 */ + ps_pps->u1_num_slice_groups = 1; + + if(ps_pps->u1_num_slice_groups - 1) + { + /* TODO_LATER: Currently the number of slice groups minus 1 is 0. + * If this is not the case, we have to add Slice group map type to the bit + * stream*/ + } + + /* number of reference frames for list 0 */ + /* FIXME : fix this hard coded value */ + ps_pps->i1_num_ref_idx_l0_default_active = 1; + + /* number of reference frames for list 1 */ + ps_pps->i1_num_ref_idx_l1_default_active = 1; + + /* weighted prediction for now is disabled */ + ps_pps->i1_weighted_pred_flag = 0; + ps_pps->i1_weighted_bipred_idc = 0; + + /* The intent is to not signal qp from pps. Rather send the same in slice + * headers */ + ps_pps->i1_pic_init_qp = 0; + + /* The intent is to not signal qp from pps. Rather send the same in slice + * headers */ + ps_pps->i1_pic_init_qs = 0; + + /* The intent is to not signal qp from pps. Rather send the same in slice + * headers */ + ps_pps->i1_chroma_qp_index_offset = 0; + + /* deblocking filter flags present in slice header */ + ps_pps->i1_deblocking_filter_control_present_flag = 1; + + /* constrained intra prediction */ + ps_pps->i1_constrained_intra_pred_flag = + ps_codec->au4_constrained_intra_pred[u1_spatial_layer_id]; + + /* sending redundant slices is not supported for now */ + ps_pps->i1_redundant_pic_cnt_present_flag = 0; + + ps_pps->u1_slice_group_map_type = 0; + + return IH264E_SUCCESS; +} + +/** +****************************************************************************** +* +* @brief Populates slice header structure +* +* @par Description +* Populates slice header structure for its use in header generation +* +* @param[in] ps_proc +* pointer to proc context +* +* @param[out] ps_slice_hdr +* pointer to slice header structure that needs to be populated +* +* @param[in] ps_pps +* pointer to pps params structure referred by the slice +* +* @param[in] ps_sps +* pointer to sps params referred by the pps +* +* @return success or failure error code +* +****************************************************************************** +*/ +WORD32 isvce_populate_slice_header(isvce_process_ctxt_t *ps_proc, slice_header_t *ps_slice_hdr, + pps_t *ps_pps, sps_t *ps_sps, UWORD8 u1_is_idr) +{ + /* entropy context */ + isvce_entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy; + + isvce_codec_t *ps_codec = ps_proc->ps_codec; + + if(ps_codec->u4_is_curr_frm_ref) + { + ps_slice_hdr->i1_nal_unit_idc = 3; + } + else + { + ps_slice_hdr->i1_nal_unit_idc = 0; + } + + /* start mb address */ + ps_slice_hdr->u2_first_mb_in_slice = ps_entropy->i4_mb_start_add; + + /* slice type */ + ps_slice_hdr->u1_slice_type = ps_proc->i4_slice_type; + + /* pic_parameter_set_id */ + ps_slice_hdr->u1_pps_id = ps_pps->u1_pps_id; + + /* Separate color plane flag is 0, + * hence the syntax element color_plane_id not included */ + + /* frame num */ + ps_slice_hdr->i4_frame_num = ps_proc->i4_frame_num; + + /* frame_mbs_only_flag, no support for interlace encoding */ + if(!ps_sps->i1_frame_mbs_only_flag) + { + ps_slice_hdr->i1_field_pic_flag = 0; + + if(ps_slice_hdr->i1_field_pic_flag) + { + ps_slice_hdr->i1_bottom_field_flag = 0; + } + } + + /* idr pic id */ + if(u1_is_idr) + { + ps_slice_hdr->u2_idr_pic_id = ps_proc->u4_idr_pic_id; + ps_slice_hdr->i1_nal_unit_type = NAL_SLICE_IDR; + } + else + { + ps_slice_hdr->i1_nal_unit_type = NAL_SLICE_NON_IDR; + } + + if(ps_proc->u1_spatial_layer_id > 0) + { + ps_slice_hdr->i1_nal_unit_type = NAL_CODED_SLICE_EXTENSION; + } + + if(ps_sps->i1_pic_order_cnt_type == 0) + { + WORD32 i4_poc; + i4_poc = ps_codec->i4_poc; + i4_poc %= (1 << ps_sps->i1_log2_max_pic_order_cnt_lsb); + ps_slice_hdr->i4_pic_order_cnt_lsb = i4_poc; + } + /* TODO add support for poc type 1 */ + else if(ps_sps->i1_pic_order_cnt_type == 1) + { + } + + /* + * redundant slices are not currently supported. + * Hence the syntax element redundant slice cnt is not initialized + */ + if(ps_pps->i1_redundant_pic_cnt_present_flag) + { + } + + /* direct spatial mv pred flag */ + if(ps_proc->i4_slice_type == BSLICE) + { + ps_slice_hdr->u1_direct_spatial_mv_pred_flag = 1; + } + + if(ps_proc->i4_slice_type == PSLICE || ps_proc->i4_slice_type == SPSLICE || + ps_proc->i4_slice_type == BSLICE) + { + /* num_ref_idx_active_override_flag */ + ps_slice_hdr->u1_num_ref_idx_active_override_flag = 0; + + if(ps_slice_hdr->u1_num_ref_idx_active_override_flag) + { + /* num_ref_idx_l0_active_minus1 */ + + if(ps_proc->i4_slice_type == BSLICE) + { + /* num_ref_idx_l1_active_minus1 */ + } + } + } + + /* ref_pic_list_modification */ + if((ps_proc->i4_slice_type != ISLICE) && (ps_proc->i4_slice_type != SISLICE)) + { + /* ref_pic_list_modification_flag_l0 */ + ps_slice_hdr->s_rplm.i1_ref_pic_list_modification_flag_l0 = 1; + + if(ps_slice_hdr->s_rplm.i1_ref_pic_list_modification_flag_l0) + { + ps_slice_hdr->s_rplm.i1_modification_of_pic_nums_idc_l0[0] = 0; + ps_slice_hdr->s_rplm.i1_modification_of_pic_nums_idc_l0[1] = 3; + + if((ps_codec->i4_frame_num - ps_proc->aps_ref_pic[L0]->i4_frame_num) < 1) + { + return IH264E_FAIL; + } + + ps_slice_hdr->s_rplm.u4_abs_diff_pic_num_minus1_l0[0] = + ps_codec->i4_frame_num - ps_proc->aps_ref_pic[L0]->i4_frame_num - 1; + } + } + + if(ps_proc->i4_slice_type == BSLICE) + { + /* ref_pic_list_modification_flag_l1 */ + ps_slice_hdr->s_rplm.i1_ref_pic_list_modification_flag_l1 = 0; + + if(ps_slice_hdr->s_rplm.i1_ref_pic_list_modification_flag_l1) + { + } + } + + /* Currently we do not support weighted pred */ + /* ps_slice_hdr->u1_weighted_bipred_idc = 0; */ + + if((ps_pps->i1_weighted_pred_flag && + (ps_proc->i4_slice_type == PSLICE || ps_proc->i4_slice_type == SPSLICE)) || + (ps_proc->i4_slice_type == BSLICE && ps_pps->i1_weighted_bipred_idc == 1)) + { + /* TODO_LATER: Currently there is no support for weighted prediction. + This needs to be updated when the support is added */ + } + + if(ps_slice_hdr->i1_nal_unit_idc != 0) + { + if(u1_is_idr) + { + /* no_output_of_prior_pics_flag */ + ps_slice_hdr->u1_no_output_of_prior_pics_flag = 0; + + /* long_term_reference_flag */ + ps_slice_hdr->u1_long_term_reference_flag = 0; + } + else + { + /* adaptive_ref_pic_marking_mode_flag */ + ps_slice_hdr->u1_adaptive_ref_pic_marking_mode_flag = 0; + + if(ps_slice_hdr->u1_adaptive_ref_pic_marking_mode_flag) + { + /* TODO: if the reference picture marking mode is adaptive + add these fields in the bit-stream */ + } + } + } + + /* entropy coding mode flag */ + ps_slice_hdr->u1_entropy_coding_mode_flag = ps_entropy->u1_entropy_coding_mode_flag; + + if(ps_slice_hdr->u1_entropy_coding_mode_flag && ps_proc->i4_slice_type != ISLICE && + ps_proc->i4_slice_type != SISLICE) + { + /* cabac_init_idc */ + ps_slice_hdr->i1_cabac_init_idc = CABAC_INIT_IDC; + } + + /* slice qp */ + ps_slice_hdr->i1_slice_qp = ps_proc->u1_frame_qp; + + if(ps_proc->i4_slice_type == SPSLICE || ps_proc->i4_slice_type == SISLICE) + { + if(ps_proc->i4_slice_type == SPSLICE) + { + /* sp_for_switch_flag */ + } + /* slice_qs_delta */ + } + + if(ps_pps->i1_deblocking_filter_control_present_flag) + { + /* disable_deblocking_filter_idc */ + ps_slice_hdr->u1_disable_deblocking_filter_idc = ps_proc->u4_disable_deblock_level; + + if(ps_slice_hdr->u1_disable_deblocking_filter_idc != 1) + { + /* slice_alpha_c0_offset_div2 */ + ps_slice_hdr->i1_slice_alpha_c0_offset_div2 = 0; + + /* slice_beta_offset_div2 */ + ps_slice_hdr->i1_slice_beta_offset_div2 = 0; + } + } + ps_slice_hdr->u1_num_slice_groups_minus1 = 0; + if(ps_slice_hdr->u1_num_slice_groups_minus1 > 0 && ps_pps->u1_slice_group_map_type >= 3 && + ps_pps->u1_slice_group_map_type <= 5) + { + /* slice_group_change_cycle */ + /* TODO_LATER: Currently the number of slice groups minus 1 is 0. + * If this is not the case, we have to add Slice group map type to the bit + * stream */ + } + + ps_slice_hdr->i1_cabac_init_idc = CABAC_INIT_IDC; + + return IH264E_SUCCESS; +} + +/** +****************************************************************************** +* +* @brief Populates svc_nalu_ext structure +* +* @par Description +* Populates svc_nalu_ext structure for its use in header generation +* +* @param[in] ps_proc +* pointer to proc context +* +* @param[out] ps_slice_hdr +* pointer to slice header structure that needs to be populated +* +* @param[in] ps_pps +* pointer to pps params structure referred by the slice +* +* @param[in] ps_sps +* pointer to sps params referred by the pps +* +* @return success or failure error code +* +****************************************************************************** +*/ +WORD32 isvce_populate_svc_nalu_extension(isvce_process_ctxt_t *ps_proc, + svc_nalu_ext_t *ps_svc_nalu_ext, NAL_UNIT_TYPE_T nalu_type, + UWORD8 u1_idr_flag) +{ + isvce_codec_t *ps_codec = ps_proc->ps_codec; + + ps_svc_nalu_ext->u1_idr_flag = u1_idr_flag; + + ps_svc_nalu_ext->u1_priority_id = 0; + + ps_svc_nalu_ext->u1_no_inter_layer_pred_flag = ((nalu_type == NAL_PREFIX) ? 1 : 0); + + ps_svc_nalu_ext->u1_dependency_id = + ((nalu_type == NAL_PREFIX) ? 0 : ps_proc->u1_spatial_layer_id); + + ps_svc_nalu_ext->u1_temporal_id = ps_proc->ps_cur_pic->i1_temporal_id; + + ps_svc_nalu_ext->u1_quality_id = 0; + + ps_svc_nalu_ext->u1_use_ref_base_pic_flag = 0; + + ps_svc_nalu_ext->u1_discardable_flag = 0; + + ps_svc_nalu_ext->u1_output_flag = 1; + + ps_svc_nalu_ext->u1_reserved_three_2bits = 3; + + ps_svc_nalu_ext->s_nalu_header.u1_nal_ref_idc = ps_codec->u4_is_curr_frm_ref ? 3 : 0; + + ps_svc_nalu_ext->s_nalu_header.u1_nal_unit_type = nalu_type; + + return IH264E_SUCCESS; +} + +WORD32 isvce_populate_subset_sps(isvce_codec_t *ps_codec, subset_sps_t *ps_subset_sps, + UWORD8 u1_sps_id, isvce_inp_buf_t *ps_inp_buf, + UWORD8 u1_spatial_layer_id) +{ + sps_t *ps_sps = &ps_subset_sps->s_sps; + + isvce_populate_sps(ps_codec, ps_sps, u1_sps_id, IH264_SCALABLE_BASELINE, ps_inp_buf, + u1_spatial_layer_id); + + ps_subset_sps->s_sps_svc_ext.u1_inter_layer_deblocking_filter_control_present_flag = 1; + + ps_subset_sps->s_sps_svc_ext.i1_slice_header_restriction_flag = 1; + + ps_subset_sps->s_sps_svc_ext.u1_extended_spatial_scalability_idc = 0; + + ps_subset_sps->s_sps_svc_ext.i1_seq_tcoeff_level_prediction_flag = 0; + + ps_subset_sps->i1_svc_vui_parameters_present_flag = 0; + + ps_subset_sps->i1_additional_extension2_flag = 0; + + ps_subset_sps->s_sps_svc_ext.u1_chroma_phase_x_plus1 = 1; + + ps_subset_sps->s_sps_svc_ext.u1_chroma_phase_y_plus1 = 1; + + ps_subset_sps->s_sps_svc_ext.i1_adaptive_tcoeff_level_prediction_flag = 0; + + return IH264E_SUCCESS; +} + +WORD32 isvce_populate_svc_slice(isvce_process_ctxt_t *ps_proc, svc_slice_header_t *ps_svc_slice_hdr, + pps_t *ps_pps, subset_sps_t *ps_subset_sps, + svc_nalu_ext_t *ps_svc_nalu_ext) +{ + WORD32 i4_return_status; + + i4_return_status = + isvce_populate_slice_header(ps_proc, &ps_svc_slice_hdr->s_slice_header, ps_pps, + &ps_subset_sps->s_sps, ps_svc_nalu_ext->u1_idr_flag); + + if(IH264E_SUCCESS != i4_return_status) + { + return IH264E_FAIL; + } + + ps_svc_slice_hdr->i1_slice_skip_flag = 0; + ps_svc_slice_hdr->i1_adaptive_residual_prediction_flag = ENABLE_RESIDUAL_PREDICTION; + ps_svc_slice_hdr->i1_default_residual_prediction_flag = 0; + ps_svc_slice_hdr->i1_adaptive_base_mode_flag = ENABLE_ILP_MV || ENABLE_IBL_MODE; + ps_svc_slice_hdr->i1_default_base_mode_flag = 0; + ps_svc_slice_hdr->i1_tcoeff_level_prediction_flag = 0; + ps_svc_slice_hdr->i1_constrained_intra_resampling_flag = 0; + ps_svc_slice_hdr->i1_adaptive_motion_prediction_flag = USE_ILP_MV_AS_MVP; + ps_svc_slice_hdr->i1_default_motion_prediction_flag = 0; + ps_svc_slice_hdr->u4_disable_inter_layer_deblocking_filter_idc = + !ENABLE_INTRA_BASE_DEBLOCK || ps_proc->u4_disable_deblock_level; + + if(ps_svc_slice_hdr->u4_disable_inter_layer_deblocking_filter_idc != 1) + { + /* slice_alpha_c0_offset_div2 */ + ps_svc_slice_hdr->i4_inter_layer_slice_alpha_c0_offset_div2 = 0; + + /* slice_beta_offset_div2 */ + ps_svc_slice_hdr->i4_inter_layer_slice_beta_offset_div2 = 0; + } + + if((ps_svc_nalu_ext->u1_quality_id == 0) && (ps_svc_nalu_ext->u1_no_inter_layer_pred_flag == 0)) + { + ps_svc_slice_hdr->u4_ref_layer_dq_id = (ps_proc->u1_spatial_layer_id - 1) << 4; + } + + return IH264E_SUCCESS; +} + +/** +****************************************************************************** +* +* @brief Signals prefix_nal_unit_rbsp +* +* @par Description +* prefix_nal_unit_rbsp as per Section G.7.3.2.12 +* +* @param[inout] ps_bitstrm +* pointer to bitstream context for generating slice header +* +* @param[in] svc_nalu_ext +* pointer to svc NAL unit structure +* +* @param[in] ps_slice_header +* pointer to slice header +* +* @return success or failure error code +* +****************************************************************************** +*/ +WORD32 isvce_generate_prefix_nal(bitstrm_t *ps_bitstrm, svc_nalu_ext_t *ps_svc_nalu_ext, + slice_header_t *ps_slice_header, UWORD8 u1_max_num_ref_frames, + UWORD8 u1_num_spatial_layers) +{ + WORD32 return_status = IH264E_SUCCESS; + + WORD32 i4_store_ref_base_pic_flag = 0; + WORD32 i4_additional_prefix_nal_unit_extension_flag = 0; + + if(ps_svc_nalu_ext->u1_dependency_id == (u1_num_spatial_layers - 1)) + { + i4_store_ref_base_pic_flag = 1; + if(u1_max_num_ref_frames < 2) + { + i4_store_ref_base_pic_flag = 0; + } + } + + /* store_ref_base_pic_flag */ + if(ps_svc_nalu_ext->s_nalu_header.u1_nal_ref_idc != 0) + { + PUT_BITS(ps_bitstrm, i4_store_ref_base_pic_flag, 1, return_status, + "store_ref_base_pic_flag"); + + if((ps_svc_nalu_ext->u1_use_ref_base_pic_flag || i4_store_ref_base_pic_flag) && + !ps_svc_nalu_ext->u1_idr_flag) + { + PUT_BITS(ps_bitstrm, ps_slice_header->u1_adaptive_ref_pic_marking_mode_flag, 1, + return_status, "DPRM: adaptive_ref_base_pic_marking_mode_flag"); + } + + PUT_BITS(ps_bitstrm, i4_additional_prefix_nal_unit_extension_flag, 1, return_status, + "additional_prefix_nal_unit_extension_flag"); + } + + /* rbsp trailing bits */ + return_status = ih264e_put_rbsp_trailing_bits(ps_bitstrm); + + return return_status; +} + +WORD32 isvce_generate_slice_header_svc(bitstrm_t *ps_bitstrm, pps_t *ps_pps, + svc_nalu_ext_t *ps_svc_nalu_ext, + svc_slice_header_t *ps_svc_slice_hdr, + subset_sps_t *ps_subset_sps) +{ + WORD32 return_status = IH264E_SUCCESS; + + UWORD8 u1_slice_type; + sps_t *ps_sps = &ps_subset_sps->s_sps; + slice_header_t *ps_slice_hdr = &ps_svc_slice_hdr->s_slice_header; + + /* first_mb_in_slice */ + PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->u2_first_mb_in_slice, return_status, + "SH: first_mb_in_slice"); + + /* slice_type */ + PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->u1_slice_type, return_status, "SH: slice_type"); + + /* pic_parameter_set_id */ + PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->u1_pps_id, return_status, "SH: pic_parameter_set_id"); + + /* frame_num */ + PUT_BITS(ps_bitstrm, ps_slice_hdr->i4_frame_num, ps_sps->i1_log2_max_frame_num, return_status, + "SH: frame_num"); + + if(!ps_sps->i1_frame_mbs_only_flag) + { + /* field_pic_flag */ + PUT_BITS(ps_bitstrm, ps_slice_hdr->i1_field_pic_flag, 1, return_status, + "SH: field_pic_flag"); + + if(ps_slice_hdr->i1_field_pic_flag) + { + /* bottom_field_flag */ + PUT_BITS(ps_bitstrm, ps_slice_hdr->i1_bottom_field_flag, 1, return_status, + "SH: bottom_field_flag"); + } + } + + if(ps_svc_nalu_ext->u1_idr_flag == 1) + { + /* u2_idr_pic_id */ + PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->u2_idr_pic_id, return_status, "SH: idr_pic_id"); + } + + if(ps_sps->i1_pic_order_cnt_type == 0) + { + /* pic_order_cnt_lsb */ + PUT_BITS(ps_bitstrm, ps_slice_hdr->i4_pic_order_cnt_lsb, + ps_sps->i1_log2_max_pic_order_cnt_lsb, return_status, "SH: pic_order_cnt_lsb"); + + if(ps_pps->u1_pic_order_present_flag && !ps_slice_hdr->i1_field_pic_flag) + { + /* delta_pic_order_cnt_bottom */ + PUT_BITS_SEV(ps_bitstrm, ps_slice_hdr->i4_delta_pic_order_cnt_bottom, return_status, + "SH: delta_pic_order_cnt_bottom"); + } + } + + if(ps_sps->i1_pic_order_cnt_type == 1 && !ps_sps->i1_delta_pic_order_always_zero_flag) + { + /* delta_pic_order_cnt[0] */ + PUT_BITS_SEV(ps_bitstrm, ps_slice_hdr->ai4_delta_pic_order_cnt[0], return_status, + "SH: delta_pic_order_cnt[0]"); + + if(ps_pps->u1_pic_order_present_flag && !ps_slice_hdr->i1_field_pic_flag) + { + /* delta_pic_order_cnt[1] */ + PUT_BITS_SEV(ps_bitstrm, ps_slice_hdr->ai4_delta_pic_order_cnt[1], return_status, + "SH: delta_pic_order_cnt[1]"); + } + } + + if(ps_pps->i1_redundant_pic_cnt_present_flag) + { + /* redundant_pic_cnt */ + PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->u1_redundant_pic_cnt, return_status, + "SH: redundant_pic_cnt"); + } + + u1_slice_type = ps_slice_hdr->u1_slice_type % EPSLICE; + + if(ps_svc_nalu_ext->u1_quality_id == 0) + { + if(u1_slice_type == BSLICE) + { + /* direct_spatial_mv_pred_flag */ + PUT_BITS(ps_bitstrm, ps_slice_hdr->u1_direct_spatial_mv_pred_flag, 1, return_status, + "SH: direct_spatial_mv_pred_flag"); + } + + if(u1_slice_type == PSLICE || u1_slice_type == BSLICE) + { + /* num_ref_idx_active_override_flag */ + PUT_BITS(ps_bitstrm, ps_slice_hdr->u1_num_ref_idx_active_override_flag, 1, + return_status, "SH: num_ref_idx_active_override_flag"); + + if(ps_slice_hdr->u1_num_ref_idx_active_override_flag) + { + /* num_ref_idx_l0_active_minus1 */ + PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->i1_num_ref_idx_l0_active - 1, return_status, + "SH: num_ref_idx_l0_active_minus1"); + + if(u1_slice_type == BSLICE) + { + /* num_ref_idx_l1_active_minus1 */ + PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->i1_num_ref_idx_l1_active - 1, + return_status, "SH: num_ref_idx_l1_active_minus1"); + } + } + } + + /* ref_pic_list_modification */ + if((u1_slice_type != ISLICE) && (u1_slice_type != SISLICE)) + { + /* ref_pic_list_modification_flag_l0 */ + PUT_BITS(ps_bitstrm, ps_slice_hdr->s_rplm.i1_ref_pic_list_modification_flag_l0, 1, + return_status, "RPLR: ref_pic_list_reordering_flag"); + + if(ps_slice_hdr->s_rplm.i1_ref_pic_list_modification_flag_l0) + { + UWORD8 i = 0; + + WORD8 *pi1_modification_of_pic_nums_idc_l0 = + ps_slice_hdr->s_rplm.i1_modification_of_pic_nums_idc_l0; + UWORD32 *pu4_abs_diff_pic_num_minus1_l0 = + ps_slice_hdr->s_rplm.u4_abs_diff_pic_num_minus1_l0; + UWORD8 *pu1_long_term_pic_num_l0 = ps_slice_hdr->s_rplm.u1_long_term_pic_num_l0; + + do + { + /* modification_of_pic_nums_idc */ + PUT_BITS_UEV(ps_bitstrm, pi1_modification_of_pic_nums_idc_l0[i], return_status, + "RPLR: reordering_of_pic_nums_idc"); + + if((0 == pi1_modification_of_pic_nums_idc_l0[i]) || + (1 == pi1_modification_of_pic_nums_idc_l0[i])) + { + /* abs_diff_pic_num_minus1 */ + PUT_BITS_UEV(ps_bitstrm, pu4_abs_diff_pic_num_minus1_l0[0], return_status, + "RPLR: abs_diff_pic_num_minus1"); + + pu4_abs_diff_pic_num_minus1_l0++; + } + else if(2 == pi1_modification_of_pic_nums_idc_l0[i]) + { + /* long_term_pic_num */ + PUT_BITS_UEV(ps_bitstrm, pu1_long_term_pic_num_l0[0], return_status, + "RPLR: long_term_pic_num"); + + pu1_long_term_pic_num_l0++; + } + } while(pi1_modification_of_pic_nums_idc_l0[i++] != 3); + } + } + + if(u1_slice_type == BSLICE) + { + /* ref_pic_list_modification_flag_l1 */ + PUT_BITS(ps_bitstrm, ps_slice_hdr->s_rplm.i1_ref_pic_list_modification_flag_l1, 1, + return_status, "SH: ref_pic_list_modification_flag_l1"); + + if(ps_slice_hdr->s_rplm.i1_ref_pic_list_modification_flag_l1) + { + UWORD8 i = 0; + + WORD8 *pi1_modification_of_pic_nums_idc_l1 = + ps_slice_hdr->s_rplm.i1_modification_of_pic_nums_idc_l1; + UWORD32 *pu4_abs_diff_pic_num_minus1_l1 = + ps_slice_hdr->s_rplm.u4_abs_diff_pic_num_minus1_l1; + UWORD8 *pu1_long_term_pic_num_l1 = ps_slice_hdr->s_rplm.u1_long_term_pic_num_l1; + + do + { + /* modification_of_pic_nums_idc */ + PUT_BITS_UEV(ps_bitstrm, pi1_modification_of_pic_nums_idc_l1[i], return_status, + "SH: modification_of_pic_nums_idc"); + + if((0 == pi1_modification_of_pic_nums_idc_l1[i]) || + (1 == pi1_modification_of_pic_nums_idc_l1[i])) + { + /* abs_diff_pic_num_minus1 */ + PUT_BITS_UEV(ps_bitstrm, pu4_abs_diff_pic_num_minus1_l1[0], return_status, + "SH: abs_diff_pic_num_minus1"); + + pu4_abs_diff_pic_num_minus1_l1++; + } + else if(2 == pi1_modification_of_pic_nums_idc_l1[i]) + { + /* long_term_pic_num */ + PUT_BITS_UEV(ps_bitstrm, pu1_long_term_pic_num_l1[0], return_status, + "SH: abs_diff_pic_num_minus1"); + + pu1_long_term_pic_num_l1++; + } + } while(pi1_modification_of_pic_nums_idc_l1[i++] != 3); + } + } + + if((ps_pps->i1_weighted_pred_flag && u1_slice_type == PSLICE) || + (u1_slice_type == BSLICE && ps_pps->i1_weighted_bipred_idc == 1)) + { + /* TODO_LATER: Currently there is no support for weighted prediction. + This needs to be updated when the support is added */ + } + + if(ps_slice_hdr->i1_nal_unit_idc != 0) + { + if(ps_svc_nalu_ext->u1_idr_flag) + { + /* no_output_of_prior_pics_flag */ + PUT_BITS(ps_bitstrm, ps_slice_hdr->u1_no_output_of_prior_pics_flag, 1, + return_status, "DRPM: no_output_of_prior_pics_flag "); + + /* long_term_reference_flag */ + PUT_BITS(ps_bitstrm, ps_slice_hdr->u1_long_term_reference_flag, 1, return_status, + "DRPM: long_term_reference_flag "); + } + else + { + /* adaptive_ref_pic_marking_mode_flag */ + PUT_BITS(ps_bitstrm, ps_slice_hdr->u1_adaptive_ref_pic_marking_mode_flag, 1, + return_status, "DPRM: adaptive_ref_pic_marking_mode_flag "); + + if(ps_slice_hdr->u1_adaptive_ref_pic_marking_mode_flag) + { + /* TODO: if the reference picture marking mode is adaptive + add these fields in the bit-stream */ + } + } + if(ps_subset_sps->s_sps_svc_ext.i1_slice_header_restriction_flag == 0) + { + WORD32 i4_store_ref_base_pic_flag = 0; + + if(ps_sps->u1_max_num_ref_frames >= 2) + { + i4_store_ref_base_pic_flag = 1; + } + + /* store_ref_base_pic_flag */ + PUT_BITS(ps_bitstrm, i4_store_ref_base_pic_flag, 1, return_status, + "SH: store_ref_base_pic_flag"); + + if((ps_svc_nalu_ext->u1_use_ref_base_pic_flag || i4_store_ref_base_pic_flag) && + (!ps_svc_nalu_ext->u1_idr_flag)) + { + PUT_BITS(ps_bitstrm, ps_slice_hdr->u1_adaptive_ref_pic_marking_mode_flag, 1, + return_status, "SH: adaptive_ref_base_pic_marking_mode_flag"); + } + } + } + } + + if(ps_slice_hdr->u1_entropy_coding_mode_flag && u1_slice_type != ISLICE) + { + /* cabac_init_idc */ + PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->i1_cabac_init_idc, return_status, + "SH: cabac_init_idc"); + } + + /* slice_qp_delta */ + PUT_BITS_SEV(ps_bitstrm, ps_slice_hdr->i1_slice_qp - ps_pps->i1_pic_init_qp, return_status, + "SH: slice_qp_delta"); + + if(ps_pps->i1_deblocking_filter_control_present_flag) + { + /* disable_deblocking_filter_idc */ + PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->u1_disable_deblocking_filter_idc, return_status, + "SH: disable_deblocking_filter_idc"); + + if(ps_slice_hdr->u1_disable_deblocking_filter_idc != 1) + { + /* slice_alpha_c0_offset_div2 */ + PUT_BITS_SEV(ps_bitstrm, ps_slice_hdr->i1_slice_alpha_c0_offset_div2, return_status, + "SH: slice_alpha_c0_offset_div2"); + + /* slice_beta_offset_div2 */ + PUT_BITS_SEV(ps_bitstrm, ps_slice_hdr->i1_slice_beta_offset_div2, return_status, + "SH: slice_beta_offset_div2"); + } + } + + if(ps_slice_hdr->u1_num_slice_groups_minus1 > 0 && ps_pps->u1_slice_group_map_type >= 3 && + ps_pps->u1_slice_group_map_type <= 5) + { + /* slice_group_change_cycle */ + /* TODO_LATER: Currently the number of slice groups minus 1 is 0. + * If this is not the case, we have to add Slice group map type to the bit + * stream */ + } + + if((ps_svc_nalu_ext->u1_no_inter_layer_pred_flag == 0) && (ps_svc_nalu_ext->u1_quality_id == 0)) + { + PUT_BITS_UEV(ps_bitstrm, ps_svc_slice_hdr->u4_ref_layer_dq_id, return_status, + "SH: ref_layer_dq_id"); + if(ps_subset_sps->s_sps_svc_ext.u1_inter_layer_deblocking_filter_control_present_flag) + { + PUT_BITS_UEV(ps_bitstrm, ps_svc_slice_hdr->u4_disable_inter_layer_deblocking_filter_idc, + return_status, "SH: disable_inter_layer_deblocking_filter_idc"); + if(ps_svc_slice_hdr->u4_disable_inter_layer_deblocking_filter_idc != 1) + { + PUT_BITS_SEV(ps_bitstrm, + ps_svc_slice_hdr->i4_inter_layer_slice_alpha_c0_offset_div2, + return_status, "SH: inter_layer_slice_alpha_c0_offset_div2"); + PUT_BITS_SEV(ps_bitstrm, ps_svc_slice_hdr->i4_inter_layer_slice_beta_offset_div2, + return_status, "SH: inter_layer_slice_beta_offset_div2"); + } + } + PUT_BITS(ps_bitstrm, ps_svc_slice_hdr->i1_constrained_intra_resampling_flag, 1, + return_status, "SH: constrained_intra_resampling_flag"); + if(ps_subset_sps->s_sps_svc_ext.u1_extended_spatial_scalability_idc == 2) + { + if(ps_sps->u1_chroma_format_idc > 0) + { + PUT_BITS(ps_bitstrm, ps_svc_slice_hdr->i1_ref_layer_chroma_phase_x_plus1_flag, 1, + return_status, "SH: ref_layer_chroma_phase_x_plus1_flag"); + PUT_BITS(ps_bitstrm, ps_svc_slice_hdr->i1_ref_layer_chroma_phase_y_plus1, 2, + return_status, "SH: ref_layer_chroma_phase_y_plus1"); + } + PUT_BITS_SEV(ps_bitstrm, ps_svc_slice_hdr->i4_scaled_ref_layer_left, return_status, + "SH: scaled_ref_layer_left_offset"); + PUT_BITS_SEV(ps_bitstrm, ps_svc_slice_hdr->i4_scaled_ref_layer_top, return_status, + "SH: scaled_ref_layer_top_offset"); + PUT_BITS_SEV(ps_bitstrm, ps_svc_slice_hdr->i4_scaled_ref_layer_right, return_status, + "SH: scaled_ref_layer_right_offset"); + PUT_BITS_SEV(ps_bitstrm, ps_svc_slice_hdr->i4_scaled_ref_layer_bottom, return_status, + "SH: scaled_ref_layer_bottom_offset"); + } + } + + if(!ps_svc_nalu_ext->u1_no_inter_layer_pred_flag) + { + PUT_BITS(ps_bitstrm, ps_svc_slice_hdr->i1_slice_skip_flag, 1, return_status, + "SH: slice_skip_flag"); + if(ps_svc_slice_hdr->i1_slice_skip_flag) + { + PUT_BITS_UEV(ps_bitstrm, ps_svc_slice_hdr->u4_num_mbs_in_slice_minus1, return_status, + "SH: num_mbs_in_slice_minus1"); + } + else + { + PUT_BITS(ps_bitstrm, ps_svc_slice_hdr->i1_adaptive_base_mode_flag, 1, return_status, + "SH: adaptive_base_mode_flag"); + if(!ps_svc_slice_hdr->i1_adaptive_base_mode_flag) + PUT_BITS(ps_bitstrm, ps_svc_slice_hdr->i1_default_base_mode_flag, 1, return_status, + "SH: default_base_mode_flag"); + + if(!ps_svc_slice_hdr->i1_default_base_mode_flag) + { + PUT_BITS(ps_bitstrm, ps_svc_slice_hdr->i1_adaptive_motion_prediction_flag, 1, + return_status, "SH: adaptive_motion_prediction_flag"); + if(!ps_svc_slice_hdr->i1_adaptive_motion_prediction_flag) + PUT_BITS(ps_bitstrm, ps_svc_slice_hdr->i1_default_motion_prediction_flag, 1, + return_status, "SH: default_motion_prediction_flag"); + } + PUT_BITS(ps_bitstrm, ps_svc_slice_hdr->i1_adaptive_residual_prediction_flag, 1, + return_status, "SH: adaptive_residual_prediction_flag"); + if(!ps_svc_slice_hdr->i1_adaptive_residual_prediction_flag) + PUT_BITS(ps_bitstrm, ps_svc_slice_hdr->i1_default_residual_prediction_flag, 1, + return_status, "SH: default_residual_prediction_flag"); + } + + if(ps_subset_sps->s_sps_svc_ext.i1_adaptive_tcoeff_level_prediction_flag) + { + PUT_BITS(ps_bitstrm, ps_svc_slice_hdr->i1_tcoeff_level_prediction_flag, 1, + return_status, "SH: tcoeff_level_prediction_flag"); + } + } + + if(!ps_subset_sps->s_sps_svc_ext.i1_slice_header_restriction_flag && + !ps_svc_slice_hdr->i1_slice_skip_flag) + { + PUT_BITS(ps_bitstrm, ps_svc_slice_hdr->u4_scan_idx_start, 4, return_status, + "SH: scan_idx_start"); + PUT_BITS(ps_bitstrm, ps_svc_slice_hdr->u4_scan_idx_end, 4, return_status, + "SH: scan_idx_end"); + } + + return return_status; +} + +WORD32 isvce_seq_parameter_set_svc_extension(bitstrm_t *ps_bitstrm, subset_sps_t *ps_sub_sps, + UWORD8 u1_chroma_format_idc) +{ + WORD32 return_status = IH264E_SUCCESS; + + /* inter_layer_deblocking_filter_control_present_flag */ + PUT_BITS(ps_bitstrm, + ps_sub_sps->s_sps_svc_ext.u1_inter_layer_deblocking_filter_control_present_flag, 1, + return_status, "SPS: inter_layer_deblocking_filter_control_present_flag"); + + /* extended_spatial_scalability */ + PUT_BITS(ps_bitstrm, ps_sub_sps->s_sps_svc_ext.u1_extended_spatial_scalability_idc, 2, + return_status, "SPS: extended_spatial_scalability"); + + if(u1_chroma_format_idc == 1 || u1_chroma_format_idc == 2) + { + /* chroma_phase_x_plus1_flag */ + PUT_BITS(ps_bitstrm, ps_sub_sps->s_sps_svc_ext.u1_chroma_phase_x_plus1, 1, return_status, + "SPS: chroma_phase_x_plus1_flag"); + } + + if(u1_chroma_format_idc == 1) + { + /* chroma_phase_y_plus1 */ + PUT_BITS(ps_bitstrm, ps_sub_sps->s_sps_svc_ext.u1_chroma_phase_y_plus1, 2, return_status, + "SPS: chroma_phase_y_plus1"); + } + + if(ps_sub_sps->s_sps_svc_ext.u1_extended_spatial_scalability_idc == 1) + { + if(u1_chroma_format_idc > 0) + { + /* seq_ref_layer_chroma_phase_x_plus1_flag */ + PUT_BITS(ps_bitstrm, + ps_sub_sps->s_sps_svc_ext.u1_seq_ref_layer_chroma_phase_x_plus1_flag, 1, + return_status, "SPS: seq_ref_layer_chroma_phase_x_plus1_flag"); + + /* seq_ref_layer_chroma_phase_y_plus1 */ + PUT_BITS(ps_bitstrm, ps_sub_sps->s_sps_svc_ext.u1_seq_ref_layer_chroma_phase_y_plus1, 2, + return_status, "SPS: seq_ref_layer_chroma_phase_y_plus1"); + } + /* seq_scaled_ref_layer_left_offset */ + PUT_BITS_SEV(ps_bitstrm, ps_sub_sps->s_sps_svc_ext.i4_seq_scaled_ref_layer_left_offset, + return_status, "SPS: seq_scaled_ref_layer_left_offset"); + + /* seq_scaled_ref_layer_top_offset */ + PUT_BITS_SEV(ps_bitstrm, ps_sub_sps->s_sps_svc_ext.i4_seq_scaled_ref_layer_top_offset, + return_status, "SPS: seq_scaled_ref_layer_top_offset"); + + /* seq_scaled_ref_layer_right_offset */ + PUT_BITS_SEV(ps_bitstrm, ps_sub_sps->s_sps_svc_ext.i4_seq_scaled_ref_layer_right_offset, + return_status, "SPS: seq_scaled_ref_layer_right_offset"); + + /* seq_scaled_ref_layer_bottom_offset */ + PUT_BITS_SEV(ps_bitstrm, ps_sub_sps->s_sps_svc_ext.i4_seq_scaled_ref_layer_bottom_offset, + return_status, "SPS: seq_scaled_ref_layer_bottom_offset"); + } + + /* seq_tcoeff_level_prediction_flag */ + PUT_BITS(ps_bitstrm, ps_sub_sps->s_sps_svc_ext.i1_seq_tcoeff_level_prediction_flag, 1, + return_status, "SPS: seq_tcoeff_level_prediction_flag"); + + if(ps_sub_sps->s_sps_svc_ext.i1_seq_tcoeff_level_prediction_flag) + { + /* adaptive_tcoeff_level_prediction_flag */ + PUT_BITS(ps_bitstrm, ps_sub_sps->s_sps_svc_ext.i1_adaptive_tcoeff_level_prediction_flag, 1, + return_status, "SPS: adaptive_tcoeff_level_prediction_flag"); + } + + /* slice_header_restriction_flag */ + PUT_BITS(ps_bitstrm, ps_sub_sps->s_sps_svc_ext.i1_slice_header_restriction_flag, 1, + return_status, "SPS: slice_header_restriction_flag"); + + return return_status; +} + +WORD32 isvce_svc_vui_parameters_extension(bitstrm_t *ps_bitstrm, svc_vui_ext_t *ps_svc_vui) +{ + WORD32 return_status = IH264E_SUCCESS; + UWORD32 i; + + PUT_BITS_UEV(ps_bitstrm, ps_svc_vui->u4_vui_ext_num_entries_minus1, return_status, + "num_layers_minus1"); + + for(i = 0; i < ps_svc_vui->u4_vui_ext_num_entries_minus1; i++) + { + /* dependency_id */ + PUT_BITS(ps_bitstrm, ps_svc_vui->u1_vui_ext_dependency_id[i], 3, return_status, + "dependency_id"); + + /* quality_id */ + PUT_BITS(ps_bitstrm, ps_svc_vui->u1_vui_ext_quality_id[i], 4, return_status, "quality_id"); + + /* temporal_id */ + PUT_BITS(ps_bitstrm, ps_svc_vui->u1_vui_ext_temporal_id[i], 3, return_status, + "temporal_id"); + + /* timing_info_present_flag */ + PUT_BITS(ps_bitstrm, ps_svc_vui->u1_vui_ext_timing_info_present_flag[i], 1, return_status, + "timing_info_present_flag"); + + if(ps_svc_vui->u1_vui_ext_timing_info_present_flag[i]) + { + /* num_units_in_tick */ + PUT_BITS(ps_bitstrm, ps_svc_vui->u4_vui_ext_num_units_in_tick[i], 32, return_status, + "num_units_in_tick"); + + /* time_scale */ + PUT_BITS(ps_bitstrm, ps_svc_vui->u4_vui_ext_time_scale[i], 32, return_status, + "time_scale"); + + /* fixed_frame_rate_flag */ + PUT_BITS(ps_bitstrm, ps_svc_vui->u1_vui_ext_fixed_frame_rate_flag[i], 1, return_status, + "fixed_frame_rate_flag"); + } + + /* nal_hrd_parameters_present_flag */ + PUT_BITS(ps_bitstrm, ps_svc_vui->u1_vui_ext_nal_hrd_params_present_flag[i], 1, + return_status, "nal_hrd_parameters_present_flag"); + + if(ps_svc_vui->u1_vui_ext_nal_hrd_params_present_flag[i]) + { + } + + /* nal_hrd_parameters_present_flag */ + PUT_BITS(ps_bitstrm, ps_svc_vui->u1_vui_ext_vcl_hrd_params_present_flag[i], 1, + return_status, "vcl_hrd_parameters_present_flag"); + + if(ps_svc_vui->u1_vui_ext_vcl_hrd_params_present_flag[i]) + { + } + + if(ps_svc_vui->u1_vui_ext_nal_hrd_params_present_flag[i] || + ps_svc_vui->u1_vui_ext_vcl_hrd_params_present_flag[i]) + { + /* low_delay_hrd_flag */ + PUT_BITS(ps_bitstrm, ps_svc_vui->u1_vui_ext_low_delay_hrd_flag[i], 1, return_status, + "low_delay_hrd_flag"); + } + + /* pic_struct_present_flag */ + PUT_BITS(ps_bitstrm, ps_svc_vui->u1_vui_ext_pic_struct_present_flag[i], 1, return_status, + "pic_struct_present_flag"); + } + + return return_status; +} + +WORD32 isvce_generate_subset_sps(bitstrm_t *ps_bitstrm, subset_sps_t *ps_subset_sps) +{ + WORD32 return_status = IH264E_SUCCESS; + sps_t *ps_sps = &ps_subset_sps->s_sps; + return_status = isvce_generate_sps(ps_bitstrm, &ps_subset_sps->s_sps, NAL_SUBSET_SPS); + + /* generate subset sps */ + if(ps_sps->u1_profile_idc == IH264_SCALABLE_BASELINE || + ps_sps->u1_profile_idc == IH264_SCALABLE_HIGH_PROFILE) + { + isvce_seq_parameter_set_svc_extension(ps_bitstrm, ps_subset_sps, + ps_sps->u1_chroma_format_idc); + + /* svc_vui_parameters_present_flag */ + PUT_BITS(ps_bitstrm, ps_subset_sps->i1_svc_vui_parameters_present_flag, 1, return_status, + "SPS: svc_vui_parameters_present_flag"); + + if(ps_subset_sps->i1_svc_vui_parameters_present_flag == 1) + { + svc_vui_ext_t *ps_svc_vui = NULL; + isvce_svc_vui_parameters_extension(ps_bitstrm, ps_svc_vui); + } + + /* additional_extension2_flag */ + PUT_BITS(ps_bitstrm, ps_subset_sps->i1_additional_extension2_flag, 1, return_status, + "SPS: additional_extension2_flag"); + } + + /* rbsp trailing bits */ + return_status = ih264e_put_rbsp_trailing_bits(ps_bitstrm); + + return return_status; +} +/** +****************************************************************************** +* +* @brief Generates svc_nalu_ext +* +* @par Description +* Generate svc_nalu_ext as per Section G.7.3.1.1 +* +* @param[inout] ps_bitstrm +* pointer to bitstream context for generating slice header +* +* @param[in] ps_svc_nalu_ext +* pointer to svc_nalu_ext struct +* +* @return success or failure error code +* +****************************************************************************** +*/ +WORD32 isvce_generate_svc_nalu_extension(bitstrm_t *ps_bitstrm, svc_nalu_ext_t *ps_svc_nalu_ext, + UWORD8 u1_nalu_id) +{ + WORD32 return_status = IH264E_SUCCESS; + + /* Insert start code */ + return_status = ih264e_put_nal_start_code_prefix(ps_bitstrm, 1); + + if(return_status != IH264E_SUCCESS) + { + return return_status; + } + + /* Insert Nal Unit Header */ + return_status = isvce_generate_nal_unit_header(ps_bitstrm, u1_nalu_id, 3); + + if(return_status != IH264E_SUCCESS) + { + return return_status; + } + + /* reserved_one_bit */ + PUT_BITS(ps_bitstrm, 1, 1, return_status, "NAL unit header: reserved_one_bit"); + + /* idr_flag */ + PUT_BITS(ps_bitstrm, ps_svc_nalu_ext->u1_idr_flag, 1, return_status, + "NAL unit header: idr_flag"); + + /* priority_id */ + PUT_BITS(ps_bitstrm, ps_svc_nalu_ext->u1_priority_id, 6, return_status, + "NAL unit header: priority_id"); + + /* no_inter_layer_pred_flag */ + PUT_BITS(ps_bitstrm, ps_svc_nalu_ext->u1_no_inter_layer_pred_flag, 1, return_status, + "NAL unit header: no_inter_layer_pred_flag"); + + /* dependency_id */ + PUT_BITS(ps_bitstrm, ps_svc_nalu_ext->u1_dependency_id, 3, return_status, + "NAL unit header: dependency_id"); + + /* quality_id */ + PUT_BITS(ps_bitstrm, ps_svc_nalu_ext->u1_quality_id, 4, return_status, + "NAL unit header: quality_id"); + + /* temporal_id */ + PUT_BITS(ps_bitstrm, ps_svc_nalu_ext->u1_temporal_id, 3, return_status, + "NAL unit header: temporal_id"); + + /* use_ref_base_pic_flag */ + PUT_BITS(ps_bitstrm, ps_svc_nalu_ext->u1_use_ref_base_pic_flag, 1, return_status, + "NAL unit header: use_ref_base_pic_flag"); + + /* discardable_flag */ + PUT_BITS(ps_bitstrm, ps_svc_nalu_ext->u1_discardable_flag, 1, return_status, + "NAL unit header: discardable_flag"); + + /* output_flag */ + PUT_BITS(ps_bitstrm, ps_svc_nalu_ext->u1_output_flag, 1, return_status, + "NAL unit header: output_flag"); + + /* reserved_three_2bits */ + PUT_BITS(ps_bitstrm, ps_svc_nalu_ext->u1_reserved_three_2bits, 2, return_status, + "NAL unit header: reserved_three_2bits"); + + return return_status; +} diff --git a/encoder/svc/isvce_encode_header.h b/encoder/svc/isvce_encode_header.h new file mode 100644 index 0000000..c443ed0 --- /dev/null +++ b/encoder/svc/isvce_encode_header.h @@ -0,0 +1,296 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** +****************************************************************************** +* @file +* isvce_encode_header.h +* +* @brief +* This file contains structures and interface prototypes for h264 bitstream +* header encoding +* +* @author +* ittiam +* +* @remarks +* None +* +******************************************************************************* +*/ + +#ifndef _ISVCE_ENCODE_HEADER_H_ +#define _ISVCE_ENCODE_HEADER_H_ + +#include "ih264_typedefs.h" + +/* Dependencies of ih264e_bitstream.h */ +#include "ih264e_error.h" + +#include "ih264e_bitstream.h" +#include "ih264e_trace.h" +#include "isvce_structs.h" + +/** +****************************************************************************** +* @brief Macro to put a code with specified number of bits into the +* bitstream +****************************************************************************** +*/ +#define PUT_BITS(ps_bitstrm, code_val, code_len, ret_val, syntax_string) \ + { \ + ENTROPY_TRACE(syntax_string, code_val); \ + ret_val = ih264e_put_bits((ps_bitstrm), (code_val), (code_len)); \ + if(ret_val != IH264E_SUCCESS) \ + { \ + return ret_val; \ + } \ + } + +/** +****************************************************************************** +* @brief Macro to put a code with specified number of bits into the +* bitstream using 0th order exponential Golomb encoding for +* signed numbers +****************************************************************************** +*/ +#define PUT_BITS_UEV(ps_bitstrm, code_val, ret_val, syntax_string) \ + { \ + ENTROPY_TRACE(syntax_string, code_val); \ + ret_val = ih264e_put_uev((ps_bitstrm), (code_val)); \ + if(ret_val != IH264E_SUCCESS) \ + { \ + return ret_val; \ + } \ + } +/** +****************************************************************************** +* @brief Macro to put a code with specified number of bits into the +* bitstream using 0th order exponential Golomb encoding for +* signed numbers +****************************************************************************** +*/ +#define PUT_BITS_SEV(ps_bitstrm, code_val, ret_val, syntax_string) \ + { \ + ENTROPY_TRACE(syntax_string, code_val); \ + ret_val = ih264e_put_sev((ps_bitstrm), (code_val)); \ + if(ret_val != IH264E_SUCCESS) \ + { \ + return ret_val; \ + } \ + } + +/** +****************************************************************************** +* @brief Macro to set active entropy threads to zero and return +* in case of errors +****************************************************************************** +*/ +#define RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel) \ + if(ps_entropy->i4_error_code != IH264E_SUCCESS) \ + { \ + DATA_SYNC(); \ + ps_codec->au4_entropy_thread_active[ctxt_sel] = 0; \ + return ps_entropy->i4_error_code; \ + } + +/*****************************************************************************/ +/* Extern Function Declarations */ +/*****************************************************************************/ +extern WORD32 ih264e_generate_nal_unit_header(bitstrm_t *ps_bitstrm, WORD32 nal_unit_type, + WORD32 nal_ref_idc); + +extern WORD32 ih264e_generate_vui(bitstrm_t *ps_bitstrm, vui_t *ps_vui); + +extern IH264E_ERROR_T ih264e_generate_sei(bitstrm_t *ps_bitstrm, sei_params_t *ps_sei, + UWORD32 u4_insert_per_idr); + +extern IH264E_ERROR_T ih264e_add_filler_nal_unit(bitstrm_t *ps_bitstrm, WORD32 insert_fill_bytes); + +/** +****************************************************************************** +* +* @brief Generates SPS (Sequence Parameter Set) +* +* @par Description +* This function generates Sequence Parameter Set header as per the spec +* +* @param[in] ps_bitstrm +* pointer to bitstream context (handle) +* +* @param[in] ps_sps +* pointer to structure containing SPS data +* +* @return success or failure error code +* +****************************************************************************** +*/ +WORD32 isvce_generate_sps(bitstrm_t *ps_bitstrm, sps_t *ps_sps, NAL_UNIT_TYPE_T nal_type); + +/** +****************************************************************************** +* +* @brief Generates PPS (Picture Parameter Set) +* +* @par Description +* Generate Picture Parameter Set as per Section 7.3.2.2 +* +* @param[in] ps_bitstrm +* pointer to bitstream context (handle) +* +* @param[in] ps_pps +* pointer to structure containing PPS data +* +* @return success or failure error code +* +****************************************************************************** +*/ +WORD32 isvce_generate_pps(bitstrm_t *ps_bitstrm, pps_t *ps_pps, sps_t *ps_sps); + +/** +****************************************************************************** +* +* @brief Generates Slice Header +* +* @par Description +* Generate Slice Header as per Section 7.3.5.1 +* +* @param[inout] ps_bitstrm +* pointer to bitstream context for generating slice header +* +* @param[in] ps_slice_hdr +* pointer to slice header params +* +* @param[in] ps_pps +* pointer to pps params referred by slice +* +* @param[in] ps_sps +* pointer to sps params referred by slice +* +* @param[out] ps_dup_bit_strm_ent_offset +* Bitstream struct to store bitstream state +* +* @param[out] pu4_first_slice_start_offset +* first slice offset is returned +* +* @return success or failure error code +* +****************************************************************************** +*/ +WORD32 isvce_generate_slice_header(bitstrm_t *ps_bitstrm, slice_header_t *ps_slice_hdr, + pps_t *ps_pps, sps_t *ps_sps, UWORD8 u1_idr_flag); +/** +****************************************************************************** +* +* @brief Populates sps structure +* +* @par Description +* Populates sps structure for its use in header generation +* +* @param[in] ps_codec +* pointer to encoder context +* +* @param[out] ps_sps +* pointer to sps params that needs to be populated +* +* @return success or failure error code +* +****************************************************************************** +*/ +IH264E_ERROR_T isvce_populate_sps(isvce_codec_t *ps_codec, sps_t *ps_sps, UWORD8 u1_sps_id, + UWORD8 u1_profile_idc, isvce_inp_buf_t *ps_inp_buf, + UWORD8 u1_spatial_layer_id); + +/** +****************************************************************************** +* +* @brief Populates pps structure +* +* @par Description +* Populates pps structure for its use in header generation +* +* @param[in] ps_codec +* pointer to encoder context +* +* @param[out] ps_pps +* pointer to pps params that needs to be populated +* +* @return success or failure error code +* +****************************************************************************** +*/ +IH264E_ERROR_T isvce_populate_pps(isvce_codec_t *ps_codec, pps_t *ps_pps, UWORD8 u1_sps_id, + UWORD8 u1_pps_id, UWORD8 u1_spatial_layer_id); + +/** +****************************************************************************** +* +* @brief Populates slice header structure +* +* @par Description +* Populates slice header structure for its use in header generation +* +* @param[in] ps_proc +* pointer to proc context +* +* @param[out] ps_slice_hdr +* pointer to slice header structure that needs to be populated +* +* @param[in] ps_pps +* pointer to pps params structure referred by the slice +* +* @param[in] ps_sps +* pointer to sps params referred by the pps +* +* @return success or failure error code +* +****************************************************************************** +*/ +WORD32 isvce_populate_slice_header(isvce_process_ctxt_t *ps_proc, slice_header_t *ps_slice_hdr, + pps_t *ps_pps, sps_t *ps_sps, UWORD8 u1_is_idr); + +extern WORD32 isvce_populate_svc_nalu_extension(isvce_process_ctxt_t *ps_proc, + svc_nalu_ext_t *ps_svc_nalu_ext, + NAL_UNIT_TYPE_T nalu_type, UWORD8 u1_idr_flag); + +extern WORD32 isvce_generate_svc_nalu_extension(bitstrm_t *ps_bitstrm, + svc_nalu_ext_t *ps_svc_nalu_ext, UWORD8 u1_nalu_id); + +extern WORD32 isvce_populate_svc_slice(isvce_process_ctxt_t *ps_proc, + svc_slice_header_t *ps_svc_slice_hdr, pps_t *ps_pps, + subset_sps_t *ps_subset_sps, + svc_nalu_ext_t *ps_svc_nalu_ext); + +extern WORD32 isvce_populate_subset_sps(isvce_codec_t *ps_codec, subset_sps_t *ps_subset_sps, + UWORD8 u1_sps_id, isvce_inp_buf_t *ps_inp_buf, + UWORD8 u1_spatial_layer_id); + +extern WORD32 isvce_generate_prefix_nal(bitstrm_t *ps_bitstrm, svc_nalu_ext_t *ps_svc_nalu_ext, + slice_header_t *ps_slice_header, + UWORD8 u1_max_num_ref_frames, UWORD8 u1_num_spatial_layers); + +extern WORD32 isvce_generate_slice_header_svc(bitstrm_t *ps_bitstrm, pps_t *ps_pps, + svc_nalu_ext_t *ps_svc_nalu_ext, + svc_slice_header_t *ps_svc_slice_hdr, + subset_sps_t *ps_subset_sps); + +extern WORD32 isvce_generate_subset_sps(bitstrm_t *ps_bitstrm, subset_sps_t *ps_subset_sps); + +#endif diff --git a/encoder/svc/isvce_error.h b/encoder/svc/isvce_error.h new file mode 100644 index 0000000..fb4900d --- /dev/null +++ b/encoder/svc/isvce_error.h @@ -0,0 +1,70 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ +/** +******************************************************************************* +* @file +* isvce_error.h +* +* @brief +* SVC specific error codes +* +* @author +* ittiam +* +* @remarks +* None +* +******************************************************************************* +*/ + +#ifndef _ISVCE_ERROR_H_ +#define _ISVCE_ERROR_H_ + +#include "ih264e_error.h" + +typedef enum ISVCE_ERRORS_T +{ + /**Invalid SVC params */ + IH264E_INVALID_SVC_PARAMS = IH264E_CODEC_ERROR_START + 0x100, + + /**Invalid num_temporal_layers */ + IH264E_INVALID_NUM_TEMPORAL_LAYERS = IH264E_CODEC_ERROR_START + 0x101, + + /**Invalid num_spatial_layers */ + IH264E_INVALID_NUM_SPATIAL_LAYERS = IH264E_CODEC_ERROR_START + 0x102, + + /**Invalid spatial_res_ratio */ + IH264E_INVALID_SPATIAL_RES_RATIO = IH264E_CODEC_ERROR_START + 0x103, + + /** Weighted prediction not supported */ + IH264E_WEIGHTED_PRED_NOT_SUPPORTED = IH264E_CODEC_ERROR_START + 0x104, + + /** CABAC entropy mode not supported for SVC */ + IH264E_CABAC_NOT_SUPPORTED = IH264E_CODEC_ERROR_START + 0x105, + + /**Invalid input dimensions */ + IH264E_INVALID_SVC_INPUT_DIMENSIONS = IH264E_CODEC_ERROR_START + 0x106, + + /** Invalid init QP */ + IH264E_INVALID_DYN_INIT_QP = IH264E_CODEC_ERROR_START + 0x107, + +} ISVCE_ERRORS_T; + +#endif diff --git a/encoder/svc/isvce_fmt_conv.c b/encoder/svc/isvce_fmt_conv.c new file mode 100644 index 0000000..80d888b --- /dev/null +++ b/encoder/svc/isvce_fmt_conv.c @@ -0,0 +1,145 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** +******************************************************************************* +* @file +* isvce_fmt_conv.c +* +* @brief +* Contains functions for format conversion or frame copy of output buffer +* +* @author +* ittiam +* +* @par List of Functions: +* - isvce_fmt_conv() +* +* @remarks +* None +* +******************************************************************************* +*/ +#include "ih264_typedefs.h" +#include "ih264_macros.h" +/* Dependencies of ih264_buf_mgr.h */ +/* Dependencies of ih264_list.h */ +#include "ih264_error.h" +/* Dependencies of ih264_common_tables.h */ +#include "ih264_defs.h" +#include "ih264_structs.h" +#include "ih264_buf_mgr.h" +#include "ih264_common_tables.h" +#include "ih264_list.h" +#include "ih264_platform_macros.h" +#include "ih264_trans_data.h" +#include "ih264_size_defs.h" +/* Dependencies of ih264e_cabac_structs.h */ +#include "ih264_cabac_tables.h" +/* Dependencies of ime_structs.h */ +#include "ime_defs.h" +#include "ime_distortion_metrics.h" +/* Dependencies of ih264e_structs.h */ +#include "iv2.h" +#include "ive2.h" +#include "ih264_defs.h" +#include "ih264_deblk_edge_filters.h" +#include "ih264_inter_pred_filters.h" +#include "ih264_structs.h" +#include "ih264_trans_quant_itrans_iquant.h" +/* Dependencies of ih264e_bitstream.h */ +#include "ih264e_error.h" +#include "ih264e_bitstream.h" +#include "ih264e_cabac_structs.h" +#include "irc_cntrl_param.h" +#include "irc_frame_info_collector.h" +#include "ime_statistics.h" +#include "ime_structs.h" +/* Dependencies of 'ih264e_utils.h' */ +#include "ih264e_defs.h" +#include "ih264e_structs.h" +#include "ih264e_fmt_conv.h" +#include "isvce_structs.h" + +IH264E_ERROR_T isvce_fmt_conv(isvce_codec_t *ps_codec, svc_au_buf_t *ps_pic, UWORD8 *pu1_y_dst, + UWORD8 *pu1_u_dst, UWORD8 *pu1_v_dst, UWORD32 u4_dst_y_strd, + UWORD32 u4_dst_uv_strd, WORD32 cur_row, WORD32 num_rows) +{ + IH264E_ERROR_T ret = IH264E_SUCCESS; + UWORD8 *pu1_y_src, *pu1_uv_src; + UWORD8 *pu1_y_dst_tmp, *pu1_uv_dst_tmp; + UWORD8 *pu1_u_dst_tmp, *pu1_v_dst_tmp; + WORD32 is_u_first; + UWORD8 *pu1_luma; + UWORD8 *pu1_chroma; + WORD32 wd; + + WORD32 src_y_strd; + WORD32 src_uv_strd; + + WORD32 layer_id = ps_pic->u1_num_spatial_layers - 1; + + if(0 == num_rows) + { + return ret; + } + + pu1_luma = ps_pic->ps_layer_yuv_buf_props[layer_id].as_component_bufs[0].pv_data; + pu1_chroma = ps_pic->ps_layer_yuv_buf_props[layer_id].as_component_bufs[1].pv_data; + + src_y_strd = ps_pic->ps_layer_yuv_buf_props[layer_id].as_component_bufs[0].i4_data_stride; + src_uv_strd = ps_pic->ps_layer_yuv_buf_props[layer_id].as_component_bufs[1].i4_data_stride; + + wd = ps_codec->s_cfg.u4_disp_wd; + is_u_first = (IV_YUV_420SP_UV == ps_codec->e_codec_color_format) ? 1 : 0; + + /* In case of 420P output luma copy is disabled for shared mode */ + { + pu1_y_src = pu1_luma + cur_row * src_y_strd; + pu1_uv_src = pu1_chroma + (cur_row / 2) * src_uv_strd; + + pu1_y_dst_tmp = pu1_y_dst + cur_row * u4_dst_y_strd; + pu1_uv_dst_tmp = pu1_u_dst + (cur_row / 2) * u4_dst_uv_strd; + pu1_u_dst_tmp = pu1_u_dst + (cur_row / 2) * u4_dst_uv_strd; + pu1_v_dst_tmp = pu1_v_dst + (cur_row / 2) * u4_dst_uv_strd; + + /* If the call is non-blocking and there are no rows to be copied then + * return */ + /* In non-shared mode, reference buffers are in 420SP UV format, + * if output also is in 420SP_UV, then just copy + * if output is in 420SP_VU then swap UV values + */ + if((IV_YUV_420SP_UV == ps_codec->s_cfg.e_recon_color_fmt) || + (IV_YUV_420SP_VU == ps_codec->s_cfg.e_recon_color_fmt)) + { + ih264e_fmt_conv_420sp_to_420sp(pu1_y_src, pu1_uv_src, pu1_y_dst_tmp, pu1_uv_dst_tmp, wd, + num_rows, ps_codec->i4_rec_strd, ps_codec->i4_rec_strd, + u4_dst_y_strd, u4_dst_uv_strd); + } + else if(IV_YUV_420P == ps_codec->s_cfg.e_recon_color_fmt) + { + ih264e_fmt_conv_420sp_to_420p(pu1_y_src, pu1_uv_src, pu1_y_dst_tmp, pu1_u_dst_tmp, + pu1_v_dst_tmp, wd, num_rows, ps_codec->i4_rec_strd, + ps_codec->i4_rec_strd, u4_dst_y_strd, u4_dst_uv_strd, + is_u_first, 0); + } + } + return (ret); +} diff --git a/encoder/svc/isvce_fmt_conv.h b/encoder/svc/isvce_fmt_conv.h new file mode 100644 index 0000000..d8d0ccf --- /dev/null +++ b/encoder/svc/isvce_fmt_conv.h @@ -0,0 +1,48 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** +******************************************************************************* +* @file +* ih264e_fmt_conv.h +* +* @brief +* The file contains extern declarations of color space conversion routines +* +* @author +* ittiam +* +* @remarks +* None +* +******************************************************************************* +*/ + +#ifndef _ISVCE_FMT_CONV_H_ +#define _ISVCE_FMT_CONV_H_ + +#include "ih264e_fmt_conv.h" +#include "isvce_structs.h" + +IH264E_ERROR_T isvce_fmt_conv(isvce_codec_t *ps_codec, svc_au_buf_t *ps_pic, UWORD8 *pu1_y_dst, + UWORD8 *pu1_u_dst, UWORD8 *pu1_v_dst, UWORD32 u4_dst_y_strd, + UWORD32 u4_dst_uv_strd, WORD32 cur_row, WORD32 num_rows); + +#endif diff --git a/encoder/svc/isvce_function_selector_generic.c b/encoder/svc/isvce_function_selector_generic.c new file mode 100644 index 0000000..044bbeb --- /dev/null +++ b/encoder/svc/isvce_function_selector_generic.c @@ -0,0 +1,314 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** +******************************************************************************* +* @file +* isvce_function_selector_generic.c +* +* @brief +* Contains functions to initialize function pointers of codec context +* +* @author +* ittiam +* +* @par List of Functions: +* - isvce_init_function_ptr_generic +* +* @remarks +* None +* +******************************************************************************* +*/ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System Include files */ +#include +#include +#include +#include + +/* User Include files */ +#include "ih264_typedefs.h" +#include "iv2.h" +#include "ive2.h" +#include "isvc_defs.h" +#include "ih264_size_defs.h" +#include "isvce_defs.h" +#include "ih264e_error.h" +#include "ih264e_bitstream.h" +#include "ime_distortion_metrics.h" +#include "ime_defs.h" +#include "ime_structs.h" +#include "ih264_error.h" +#include "isvc_structs.h" +#include "isvc_trans_quant_itrans_iquant.h" +#include "ih264_inter_pred_filters.h" +#include "isvc_mem_fns.h" +#include "ih264_padding.h" +#include "ih264_intra_pred_filters.h" +#include "ih264_deblk_edge_filters.h" +#include "isvc_cabac_tables.h" +#include "irc_cntrl_param.h" +#include "irc_frame_info_collector.h" +#include "isvce_rate_control.h" +#include "isvce_cabac_structs.h" +#include "isvce_structs.h" +#include "ih264e_platform_macros.h" +#include "isvce_cabac.h" +#include "isvce_core_coding.h" +#include "ih264_cavlc_tables.h" +#include "isvce_cavlc.h" +#include "ih264e_intra_modes_eval.h" +#include "ih264e_fmt_conv.h" +#include "ih264e_half_pel.h" +#include "isvce_me.h" + +/*****************************************************************************/ +/* Function Definitions */ +/*****************************************************************************/ + +/** +******************************************************************************* +* +* @brief Initialize the intra/inter/transform/deblk function pointers of +* codec context +* +* @par Description: the current routine initializes the function pointers of +* codec context basing on the architecture in use +* +* @param[in] ps_codec +* Codec context pointer +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +void isvce_init_function_ptr_generic(isvce_codec_t *ps_codec) +{ + WORD32 i = 0; + + /* curr proc ctxt */ + isvce_process_ctxt_t *ps_proc = NULL; + isvce_me_ctxt_t *ps_me_ctxt = NULL; + isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns; + enc_loop_fxns_t *ps_enc_loop_fxns = &ps_isa_dependent_fxns->s_enc_loop_fxns; + inter_pred_fxns_t *ps_inter_pred_fxns = &ps_isa_dependent_fxns->s_inter_pred_fxns; + mem_fxns_t *ps_mem_fxns = &ps_isa_dependent_fxns->s_mem_fxns; + + /* Init function pointers for intra pred leaf level functions luma + * Intra 16x16 */ + ps_codec->apf_intra_pred_16_l[0] = ih264_intra_pred_luma_16x16_mode_vert; + ps_codec->apf_intra_pred_16_l[1] = ih264_intra_pred_luma_16x16_mode_horz; + ps_codec->apf_intra_pred_16_l[2] = ih264_intra_pred_luma_16x16_mode_dc; + ps_codec->apf_intra_pred_16_l[3] = ih264_intra_pred_luma_16x16_mode_plane; + + /* Init function pointers for intra pred leaf level functions luma + * Intra 4x4 */ + ps_codec->apf_intra_pred_4_l[0] = ih264_intra_pred_luma_4x4_mode_vert; + ps_codec->apf_intra_pred_4_l[1] = ih264_intra_pred_luma_4x4_mode_horz; + ps_codec->apf_intra_pred_4_l[2] = ih264_intra_pred_luma_4x4_mode_dc; + ps_codec->apf_intra_pred_4_l[3] = ih264_intra_pred_luma_4x4_mode_diag_dl; + ps_codec->apf_intra_pred_4_l[4] = ih264_intra_pred_luma_4x4_mode_diag_dr; + ps_codec->apf_intra_pred_4_l[5] = ih264_intra_pred_luma_4x4_mode_vert_r; + ps_codec->apf_intra_pred_4_l[6] = ih264_intra_pred_luma_4x4_mode_horz_d; + ps_codec->apf_intra_pred_4_l[7] = ih264_intra_pred_luma_4x4_mode_vert_l; + ps_codec->apf_intra_pred_4_l[8] = ih264_intra_pred_luma_4x4_mode_horz_u; + + /* Init function pointers for intra pred leaf level functions luma + * Intra 8x8 */ + ps_codec->apf_intra_pred_8_l[0] = ih264_intra_pred_luma_8x8_mode_vert; + ps_codec->apf_intra_pred_8_l[2] = ih264_intra_pred_luma_8x8_mode_dc; + ps_codec->apf_intra_pred_8_l[3] = ih264_intra_pred_luma_8x8_mode_diag_dl; + ps_codec->apf_intra_pred_8_l[4] = ih264_intra_pred_luma_8x8_mode_diag_dr; + ps_codec->apf_intra_pred_8_l[5] = ih264_intra_pred_luma_8x8_mode_vert_r; + ps_codec->apf_intra_pred_8_l[6] = ih264_intra_pred_luma_8x8_mode_horz_d; + ps_codec->apf_intra_pred_8_l[7] = ih264_intra_pred_luma_8x8_mode_vert_l; + ps_codec->apf_intra_pred_8_l[8] = ih264_intra_pred_luma_8x8_mode_horz_u; + + /* Init function pointers for intra pred leaf level functions chroma + * Intra 8x8 */ + ps_codec->apf_intra_pred_c[0] = ih264_intra_pred_chroma_8x8_mode_dc; + ps_codec->apf_intra_pred_c[1] = ih264_intra_pred_chroma_8x8_mode_horz; + ps_codec->apf_intra_pred_c[2] = ih264_intra_pred_chroma_8x8_mode_vert; + ps_codec->apf_intra_pred_c[3] = ih264_intra_pred_chroma_8x8_mode_plane; + + /* Init luma forward transform fn ptr */ + ASSERT((sizeof(ps_enc_loop_fxns->apf_resi_trans_quant_8x8) / + sizeof(ps_enc_loop_fxns->apf_resi_trans_quant_8x8[0])) == + NUM_RESI_TRANS_QUANT_VARIANTS); + ASSERT((sizeof(ps_enc_loop_fxns->apf_resi_trans_quant_4x4) / + sizeof(ps_enc_loop_fxns->apf_resi_trans_quant_4x4[0])) == + NUM_RESI_TRANS_QUANT_VARIANTS); + ASSERT((sizeof(ps_enc_loop_fxns->apf_resi_trans_quant_chroma_4x4) / + sizeof(ps_enc_loop_fxns->apf_resi_trans_quant_chroma_4x4[0])) == + NUM_RESI_TRANS_QUANT_VARIANTS); + + ps_enc_loop_fxns->apf_resi_trans_quant_8x8[0] = isvc_resi_trans_quant_8x8; + ps_enc_loop_fxns->apf_resi_trans_quant_4x4[0] = isvc_resi_trans_quant_4x4; + ps_enc_loop_fxns->apf_resi_trans_quant_chroma_4x4[0] = isvc_resi_trans_quant_chroma_4x4; + ps_enc_loop_fxns->apf_resi_trans_quant_8x8[1] = isvc_resi_trans_quant_8x8; + ps_enc_loop_fxns->apf_resi_trans_quant_4x4[1] = isvc_resi_trans_quant_4x4; + ps_enc_loop_fxns->apf_resi_trans_quant_chroma_4x4[1] = isvc_resi_trans_quant_chroma_4x4; + ps_enc_loop_fxns->pf_hadamard_quant_4x4 = isvc_hadamard_quant_4x4; + ps_enc_loop_fxns->pf_hadamard_quant_2x2_uv = isvc_hadamard_quant_2x2_uv; + + /* Init inverse transform fn ptr */ + ASSERT((sizeof(ps_enc_loop_fxns->apf_iquant_itrans_recon_8x8) / + sizeof(ps_enc_loop_fxns->apf_iquant_itrans_recon_8x8[0])) == NUM_IQ_IT_RECON_VARIANTS); + ASSERT((sizeof(ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4) / + sizeof(ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4[0])) == NUM_IQ_IT_RECON_VARIANTS); + ASSERT((sizeof(ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4_dc) / + sizeof(ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4_dc[0])) == + NUM_IQ_IT_RECON_VARIANTS); + ASSERT((sizeof(ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4) / + sizeof(ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4[0])) == + NUM_IQ_IT_RECON_VARIANTS); + ASSERT((sizeof(ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4_dc) / + sizeof(ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4_dc[0])) == + NUM_IQ_IT_RECON_VARIANTS); + + ps_enc_loop_fxns->apf_iquant_itrans_recon_8x8[0] = isvc_iquant_itrans_recon_8x8; + ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4[0] = isvc_iquant_itrans_recon_4x4; + ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4_dc[0] = isvc_iquant_itrans_recon_4x4_dc; + ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4[0] = isvc_iquant_itrans_recon_chroma_4x4; + ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4_dc[0] = + isvc_iquant_itrans_recon_chroma_4x4_dc; + ps_enc_loop_fxns->apf_iquant_itrans_recon_8x8[1] = isvc_iquant_itrans_recon_8x8; + ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4[1] = isvc_iquant_itrans_recon_4x4; + ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4_dc[1] = isvc_iquant_itrans_recon_4x4_dc; + ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4[1] = isvc_iquant_itrans_recon_chroma_4x4; + ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4_dc[1] = + isvc_iquant_itrans_recon_chroma_4x4_dc; + ps_enc_loop_fxns->apf_iquant_itrans_recon_8x8[2] = isvc_iquant_itrans_recon_8x8; + ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4[2] = isvc_iquant_itrans_recon_4x4; + ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4_dc[2] = isvc_iquant_itrans_recon_4x4_dc; + ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4[2] = isvc_iquant_itrans_recon_chroma_4x4; + ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4_dc[2] = + isvc_iquant_itrans_recon_chroma_4x4_dc; + ps_enc_loop_fxns->pf_zcbf_iquant_itrans_recon_4x4 = isvc_zcbf_iquant_itrans_recon_4x4; + ps_enc_loop_fxns->pf_chroma_zcbf_iquant_itrans_recon_4x4 = + isvc_chroma_zcbf_iquant_itrans_recon_4x4; + + ps_enc_loop_fxns->pf_ihadamard_scaling_4x4 = ih264_ihadamard_scaling_4x4; + ps_enc_loop_fxns->pf_ihadamard_scaling_2x2_uv = ih264_ihadamard_scaling_2x2_uv; + + /* Init fn ptr luma core coding */ + ps_enc_loop_fxns->apf_luma_energy_compaction[0] = isvce_code_luma_intra_macroblock_16x16; + ps_enc_loop_fxns->apf_luma_energy_compaction[1] = isvce_code_luma_intra_macroblock_4x4; + ps_enc_loop_fxns->apf_luma_energy_compaction[3] = isvce_code_luma_inter_macroblock_16x16; + + /* Init fn ptr chroma core coding */ + ps_enc_loop_fxns->apf_chroma_energy_compaction[0] = isvce_code_chroma_intra_macroblock_8x8; + ps_enc_loop_fxns->apf_chroma_energy_compaction[1] = isvce_code_chroma_inter_macroblock_8x8; + + /* Init fn ptr luma deblocking */ + ps_codec->pf_deblk_luma_vert_bs4 = ih264_deblk_luma_vert_bs4; + ps_codec->pf_deblk_luma_vert_bslt4 = ih264_deblk_luma_vert_bslt4; + ps_codec->pf_deblk_luma_horz_bs4 = ih264_deblk_luma_horz_bs4; + ps_codec->pf_deblk_luma_horz_bslt4 = ih264_deblk_luma_horz_bslt4; + + /* Init fn ptr chroma deblocking */ + ps_codec->pf_deblk_chroma_vert_bs4 = ih264_deblk_chroma_vert_bs4; + ps_codec->pf_deblk_chroma_vert_bslt4 = ih264_deblk_chroma_vert_bslt4; + ps_codec->pf_deblk_chroma_horz_bs4 = ih264_deblk_chroma_horz_bs4; + ps_codec->pf_deblk_chroma_horz_bslt4 = ih264_deblk_chroma_horz_bslt4; + + /* write mb syntax layer */ + ps_codec->pf_write_mb_syntax_layer[CAVLC][ISLICE] = isvce_write_islice_mb_cavlc; + ps_codec->pf_write_mb_syntax_layer[CAVLC][PSLICE] = isvce_write_pslice_mb_cavlc; + ps_codec->pf_write_mb_syntax_layer[CAVLC][BSLICE] = isvce_write_bslice_mb_cavlc; + ps_codec->pf_write_mb_syntax_layer[CABAC][ISLICE] = isvce_write_islice_mb_cabac; + ps_codec->pf_write_mb_syntax_layer[CABAC][PSLICE] = isvce_write_pslice_mb_cabac; + ps_codec->pf_write_mb_syntax_layer[CABAC][BSLICE] = isvce_write_bslice_mb_cabac; + + /* Padding Functions */ + ps_codec->pf_pad_top = ih264_pad_top; + ps_codec->pf_pad_bottom = ih264_pad_bottom; + ps_codec->pf_pad_left_luma = ih264_pad_left_luma; + ps_codec->pf_pad_left_chroma = ih264_pad_left_chroma; + ps_codec->pf_pad_right_luma = ih264_pad_right_luma; + ps_codec->pf_pad_right_chroma = ih264_pad_right_chroma; + + /* Inter pred leaf level functions */ + ps_inter_pred_fxns->pf_inter_pred_luma_copy = ih264_inter_pred_luma_copy; + ps_inter_pred_fxns->pf_inter_pred_luma_horz = ih264_inter_pred_luma_horz; + ps_inter_pred_fxns->pf_inter_pred_luma_vert = ih264_inter_pred_luma_vert; + ps_inter_pred_fxns->pf_inter_pred_luma_bilinear = ih264_inter_pred_luma_bilinear; + ps_inter_pred_fxns->pf_inter_pred_chroma = ih264_inter_pred_chroma; + + /* sad me level functions */ + ps_codec->apf_compute_sad_16x16[0] = ime_compute_sad_16x16; + ps_codec->apf_compute_sad_16x16[1] = ime_compute_sad_16x16_fast; + ps_codec->pf_compute_sad_16x8 = ime_compute_sad_16x8; + + /* memory handling operations */ + ps_mem_fxns->pf_mem_cpy = ih264_memcpy; + ps_mem_fxns->pf_mem_cpy_mul8 = ih264_memcpy_mul_8; + ps_mem_fxns->pf_mem_set = ih264_memset; + ps_mem_fxns->pf_mem_set_mul8 = ih264_memset_mul_8; + ps_mem_fxns->pf_copy_2d = isvc_copy_2d; + ps_mem_fxns->pf_memset_2d = isvc_memset_2d; + ps_mem_fxns->pf_16bit_interleaved_copy = isvc_16bit_interleaved_copy; + ps_mem_fxns->pf_16bit_interleaved_memset = isvc_16bit_interleaved_memset; + ps_mem_fxns->pf_nonzero_checker = isvc_is_nonzero_blk; + + /* sad me level functions */ + for(i = 0; i < (MAX_PROCESS_CTXT); i++) + { + ps_proc = &ps_codec->as_process[i]; + + ps_me_ctxt = &ps_proc->s_me_ctxt; + ps_me_ctxt->pf_ime_compute_sad_16x16[0] = ime_compute_sad_16x16; + ps_me_ctxt->pf_ime_compute_sad_16x16[1] = ime_compute_sad_16x16_fast; + ps_me_ctxt->pf_ime_compute_sad_16x8 = ime_compute_sad_16x8; + ps_me_ctxt->pf_ime_compute_sad4_diamond = ime_calculate_sad4_prog; + ps_me_ctxt->pf_ime_compute_sad3_diamond = ime_calculate_sad3_prog; + ps_me_ctxt->pf_ime_compute_sad2_diamond = ime_calculate_sad2_prog; + ps_me_ctxt->pf_ime_sub_pel_compute_sad_16x16 = ime_sub_pel_compute_sad_16x16; + ps_me_ctxt->pf_ime_compute_sad_stat_luma_16x16 = ime_compute_satqd_16x16_lumainter; + } + + /* intra mode eval -encoder level function */ + ps_codec->pf_ih264e_evaluate_intra16x16_modes = ih264e_evaluate_intra16x16_modes; + ps_codec->pf_ih264e_evaluate_intra_chroma_modes = ih264e_evaluate_intra_chroma_modes; + ps_codec->pf_ih264e_evaluate_intra_4x4_modes = ih264e_evaluate_intra_4x4_modes; + + /* csc */ + ps_codec->pf_ih264e_conv_420p_to_420sp = ih264e_fmt_conv_420p_to_420sp; + ps_codec->pf_ih264e_fmt_conv_422i_to_420sp = ih264e_fmt_conv_422i_to_420sp; + + /* Halp pel generation function - encoder level*/ + ps_codec->pf_ih264e_sixtapfilter_horz = ih264e_sixtapfilter_horz; + ps_codec->pf_ih264e_sixtap_filter_2dvh_vert = ih264e_sixtap_filter_2dvh_vert; + + /* ME compute */ + ps_codec->apf_compute_me[PSLICE] = &isvce_compute_me_single_reflist; + ps_codec->apf_compute_me[BSLICE] = &isvce_compute_me_multi_reflist; + + /* skip decision */ + ps_codec->apf_find_skip_params_me[PSLICE] = &isvce_find_pskip_params_me; + ps_codec->apf_find_skip_params_me[BSLICE] = &isvce_find_bskip_params_me; +} diff --git a/encoder/svc/isvce_globals.c b/encoder/svc/isvce_globals.c new file mode 100644 index 0000000..966c5e8 --- /dev/null +++ b/encoder/svc/isvce_globals.c @@ -0,0 +1,48 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ +/** +******************************************************************************* +* @file +* isvce_globals.c +* +* @brief +* Contains definitions of global variables used across the encoder +* +* @author +* ittiam +* +* @par List of functions +* +* +* @remarks +* +******************************************************************************* +*/ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +#include "ih264_typedefs.h" +#include "ih264_defs.h" + +/* Raster to z scan map */ +const UWORD8 gau1_raster_to_zscan_map[MAX_TU_IN_MB] = {0, 1, 4, 5, 2, 3, 6, 7, + 8, 9, 12, 13, 10, 11, 14, 15}; diff --git a/encoder/svc/isvce_globals.h b/encoder/svc/isvce_globals.h new file mode 100644 index 0000000..6e04a50 --- /dev/null +++ b/encoder/svc/isvce_globals.h @@ -0,0 +1,44 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** +******************************************************************************* +* @file +* isvce_globals.h +* +* @brief +* Contains declarations of global variables for H264 encoder +* +* @author +* Ittiam +* +* @remarks +* +******************************************************************************* +*/ + +#ifndef _ISVCE_GLOBALS_H_ +#define _ISVCE_GLOBALS_H_ + +#include "ih264e_globals.h" + +extern const UWORD8 gau1_raster_to_zscan_map[MAX_TU_IN_MB]; + +#endif diff --git a/encoder/svc/isvce_ibl_eval.c b/encoder/svc/isvce_ibl_eval.c new file mode 100644 index 0000000..cb3d493 --- /dev/null +++ b/encoder/svc/isvce_ibl_eval.c @@ -0,0 +1,1378 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** +******************************************************************************* +* @file +* isvce_ibl_eval.c +* +* @brief +* Contains functions used for SVC intra prediction +* +******************************************************************************* +*/ +#include +#include +#include + +#include "ih264_typedefs.h" +#include "iv2.h" +#include "isvc_macros.h" +#include "ih264_debug.h" +#include "ih264_padding.h" +#include "isvce_defs.h" +#include "isvce_ibl_private_defs.h" +#include "isvce_ibl_eval.h" +#include "isvce_utils.h" +#include "isvc_intra_resample.h" +#include "isvc_defs.h" + +static FORCEINLINE WORD32 isvce_get_num_mb_states(UWORD32 u4_wd, UWORD32 u4_ht) +{ + return (u4_wd / MB_SIZE) * (u4_ht / MB_SIZE); +} + +static FORCEINLINE WORD32 isvce_get_phase_array_size(DOUBLE d_spatial_res_ratio, bool b_is_chroma) +{ + return (2 == d_spatial_res_ratio) ? (b_is_chroma ? 3 : 0) : 5; +} + +/** +******************************************************************************* +* +* @brief +* Returns size of buffers for storing residual pred ctxt +* +* @param[in] u1_num_spatial_layers +* Num Spatial Layers +* +* @param[in] d_spatial_res_ratio +* Resolution Ratio b/w spatial layers +* +* @param[in] u4_wd +* Input Width +* +* @param[in] u4_ht +* Input Height +* +* @returns Size of buffers +* +******************************************************************************* +*/ +UWORD32 isvce_get_svc_intra_pred_ctxt_size(UWORD8 u1_num_spatial_layers, DOUBLE d_spatial_res_ratio, + UWORD32 u4_wd, UWORD32 u4_ht) +{ + WORD32 i, j; + + UWORD32 u4_size = 0; + + if(u1_num_spatial_layers > 1) + { + u4_size += MAX_PROCESS_CTXT * sizeof(svc_intra_pred_ctxt_t); + u4_size += MAX_PROCESS_CTXT * sizeof(intra_pred_state_t); + u4_size += MAX_PROCESS_CTXT * u1_num_spatial_layers * sizeof(intra_pred_layer_state_t); + + for(i = u1_num_spatial_layers - 1; i >= 0; i--) + { + WORD32 i4_layer_luma_wd = + (WORD32) ((DOUBLE) u4_wd / + pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - i)) + + 0.99; + WORD32 i4_layer_luma_ht = + ((DOUBLE) u4_ht / pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - i)) + 0.99; + WORD32 i4_layer_wd_mbs = i4_layer_luma_wd / MB_SIZE; + WORD32 i4_layer_ht_mbs = i4_layer_luma_ht / MB_SIZE; + /*Add PAD Mbs */ + WORD32 i4_layer_luma_mbs = + ((i4_layer_luma_wd / MB_SIZE) + 2) * ((i4_layer_luma_ht / MB_SIZE) + 2); + WORD32 i4_num_mb_states = isvce_get_num_mb_states(i4_layer_luma_wd, i4_layer_luma_ht); + + for(j = 0; j < NUM_SP_COMPONENTS; j++) + { + bool b_is_chroma = ((COMPONENT_TYPE) j) != Y; + + u4_size += i4_num_mb_states * sizeof(intra_pred_mb_state_t); + + /* pi4_ref_array_positions_x */ + u4_size += MAX_REF_ARR_WD_HT * i4_layer_wd_mbs * sizeof(WORD32); + + /* pi4_ref_array_positions_y */ + u4_size += (i4_layer_ht_mbs >> b_is_chroma) * i4_layer_ht_mbs * sizeof(WORD32); + + /* ps_ref_array_phases */ + u4_size += isvce_get_phase_array_size(d_spatial_res_ratio, b_is_chroma) * + sizeof(coordinates_t); + } + + /* pi1_mb_mode */ + u4_size += i4_layer_luma_mbs * sizeof(WORD8); + + /* pu1_refarray_buffer */ + u4_size += MAX_PROCESS_CTXT * TEMP_BUF_SIZE_LUMA * sizeof(UWORD8); + + /* pu1_refarray_cb, pu1_refarray_cr */ + u4_size += MAX_PROCESS_CTXT * (TEMP_BUF_SIZE_CB + TEMP_BUF_SIZE_CR) * sizeof(UWORD8); + + /* pi4_temp_interpolation_buffer */ + u4_size += MAX_PROCESS_CTXT * TEMP_INTERPOLATION_BUF_SIZE * sizeof(WORD32); + } + + /* intra_pred_outputs_t.s_pred_buf */ + u4_size += MAX_PROCESS_CTXT * MB_SIZE * MB_SIZE * sizeof(UWORD8); + + u4_size += MAX_PROCESS_CTXT * MB_SIZE * MB_SIZE * sizeof(UWORD8); + } + + return u4_size; +} + +static FORCEINLINE WORD32 isvce_get_scaled_pixel_pos(layer_resampler_props_t *ps_layer_props, + WORD32 i4_pixel_pos, UWORD8 u1_dim_id) +{ + if(1 == u1_dim_id) + { + return (((i4_pixel_pos - ps_layer_props->i4_offset_y) * + ((WORD64) ps_layer_props->u4_scale_y) + + ps_layer_props->i4_add_y) >> + (ps_layer_props->u4_shift_y - 4)) - + ps_layer_props->i4_delta_y; + } + else + { + return (((i4_pixel_pos - ps_layer_props->i4_offset_x) * + ((WORD64) ps_layer_props->u4_scale_x) + + ps_layer_props->i4_add_x) >> + (ps_layer_props->u4_shift_x - 4)) - + ps_layer_props->i4_delta_x; + } +} + +static FORCEINLINE void isvce_ref_array_pos_init( + layer_resampler_props_t *ps_layer_props, intra_pred_mb_state_t *ps_mb_state, + coordinates_t *ps_mb_pos, DOUBLE d_spatial_res_ratio, UWORD8 u1_frame_mbs_only_flag, + UWORD8 u1_field_mb_flag, UWORD8 u1_ref_layer_frame_mbs_only_flag) +{ + if(1.5 == d_spatial_res_ratio) + { + UWORD32 i; + + WORD32 *pi4_ref_array_positions_x = ps_mb_state->pi4_ref_array_positions_x; + WORD32 *pi4_ref_array_positions_y = ps_mb_state->pi4_ref_array_positions_y; + WORD32 i4_x_offset = ps_mb_state->s_offsets.i4_abscissa; + WORD32 i4_y_offset = ps_mb_state->s_offsets.i4_ordinate; + + if(0 == ps_mb_pos->i4_abscissa) + { + for(i = 0; i < ps_layer_props->u4_mb_ht; i++) + { + WORD32 i4_y_ref16; + + WORD32 i4_yc = ps_mb_pos->i4_ordinate * ps_layer_props->u4_mb_ht + i; + + if((0 == u1_frame_mbs_only_flag) || (0 == u1_ref_layer_frame_mbs_only_flag)) + { + i4_yc = i4_yc >> (1 - u1_field_mb_flag); + } + + i4_y_ref16 = isvce_get_scaled_pixel_pos(ps_layer_props, i4_yc, 1); + + pi4_ref_array_positions_y[i] = (i4_y_ref16 >> 4) - i4_y_offset; + } + } + + if(0 == ps_mb_pos->i4_ordinate) + { + for(i = 0; i < MAX_REF_ARR_WD_HT; i++) + { + WORD32 i4_x_ref16; + + WORD32 i4_xc = ps_mb_pos->i4_abscissa * ps_layer_props->u4_mb_wd + i; + + i4_x_ref16 = isvce_get_scaled_pixel_pos(ps_layer_props, i4_xc, 0); + + pi4_ref_array_positions_x[i] = (i4_x_ref16 >> 4) - i4_x_offset; + } + } + } +} + +static FORCEINLINE void isvce_ref_array_phase_init( + layer_resampler_props_t *ps_layer_props, intra_pred_mb_state_t *ps_mb_state, + coordinates_t *ps_mb_pos, DOUBLE d_spatial_res_ratio, UWORD8 u1_frame_mbs_only_flag, + UWORD8 u1_field_mb_flag, UWORD8 u1_ref_layer_frame_mbs_only_flag) +{ + UWORD32 i, j; + + coordinates_t *ps_ref_array_phases = ps_mb_state->ps_ref_array_phases; + + WORD32 i4_x_offset = ps_mb_state->s_offsets.i4_abscissa; + WORD32 i4_y_offset = ps_mb_state->s_offsets.i4_ordinate; + UWORD32 u4_phase_array_idx = 0; + + if(1.5 == d_spatial_res_ratio) + { + for(i = 0; i < 3; i++) + { + WORD32 i4_y_ref16; + + WORD32 i4_yc = ps_mb_pos->i4_ordinate * ps_layer_props->u4_mb_ht + i; + + if((0 == u1_frame_mbs_only_flag) || (0 == u1_ref_layer_frame_mbs_only_flag)) + { + i4_yc = i4_yc >> (1 - u1_field_mb_flag); + } + + i4_y_ref16 = isvce_get_scaled_pixel_pos(ps_layer_props, i4_yc, 1); + + for(j = 0; j < ((0 == i) ? 3 : 1); j++) + { + WORD32 i4_x_ref16; + + WORD32 i4_xc = ps_mb_pos->i4_abscissa * ps_layer_props->u4_mb_wd + j; + + i4_x_ref16 = isvce_get_scaled_pixel_pos(ps_layer_props, i4_xc, 0); + + ps_ref_array_phases[u4_phase_array_idx].i4_abscissa = i4_x_ref16 & 15; + ps_ref_array_phases[u4_phase_array_idx].i4_ordinate = i4_y_ref16 & 15; + + u4_phase_array_idx++; + } + } + } + else + { + for(i = 0; i < 2; i++) + { + WORD32 i4_y_ref16; + + WORD32 i4_yc = ps_mb_pos->i4_ordinate * ps_layer_props->u4_mb_ht + i; + + if((0 == u1_frame_mbs_only_flag) || (0 == u1_ref_layer_frame_mbs_only_flag)) + { + i4_yc = i4_yc >> (1 - u1_field_mb_flag); + } + + i4_y_ref16 = isvce_get_scaled_pixel_pos(ps_layer_props, i4_yc, 1); + + for(j = 0; j < ((0 == i) ? 2 : 1); j++) + { + WORD32 i4_x_ref16; + + WORD32 i4_xc = ps_mb_pos->i4_abscissa * ps_layer_props->u4_mb_wd + j; + + i4_x_ref16 = isvce_get_scaled_pixel_pos(ps_layer_props, i4_xc, 0); + + ps_ref_array_phases[u4_phase_array_idx].i4_abscissa = + (i4_x_ref16 - (16 * i4_x_offset)) & 15; + ps_ref_array_phases[u4_phase_array_idx].i4_ordinate = + (i4_y_ref16 - (16 * i4_y_offset)) & 15; + + u4_phase_array_idx++; + } + } + } +} + +static FORCEINLINE void isvce_set_mb_states(layer_resampler_props_t *ps_layer_props, + intra_pred_mb_state_t *ps_mb_states, + coordinates_t *ps_mb_pos, DOUBLE d_spatial_res_ratio, + UWORD32 u4_wd_in_mbs, bool b_is_chroma) +{ + WORD32 i4_x_refmin16; + WORD32 i4_x_refmax16; + WORD32 i4_y_refmin16; + WORD32 i4_y_refmax16; + WORD32 i4_x_offset, i4_y_offset; + + const UWORD8 u1_frame_mbs_only_flag = 1; + const UWORD8 u1_ref_layer_frame_mbs_only_flag = 1; + const UWORD8 u1_field_mb_flag = 0; + + i4_x_refmin16 = isvce_get_scaled_pixel_pos( + ps_layer_props, ps_mb_pos->i4_abscissa * ps_layer_props->u4_mb_wd, 0); + i4_x_refmax16 = isvce_get_scaled_pixel_pos( + ps_layer_props, + ps_mb_pos->i4_abscissa * ps_layer_props->u4_mb_wd + ps_layer_props->u4_mb_wd - 1, 0); + + i4_y_refmin16 = isvce_get_scaled_pixel_pos( + ps_layer_props, ps_mb_pos->i4_ordinate * ps_layer_props->u4_mb_ht, 1); + i4_y_refmax16 = isvce_get_scaled_pixel_pos( + ps_layer_props, + ps_mb_pos->i4_ordinate * ps_layer_props->u4_mb_ht + ps_layer_props->u4_mb_ht - 1, 1); + + i4_x_offset = (i4_x_refmin16 >> 4); + i4_y_offset = (i4_y_refmin16 >> 4); + + ps_mb_states[ps_mb_pos->i4_abscissa + ps_mb_pos->i4_ordinate * u4_wd_in_mbs] + .s_offsets.i4_abscissa = i4_x_offset; + ps_mb_states[ps_mb_pos->i4_abscissa + ps_mb_pos->i4_ordinate * u4_wd_in_mbs] + .s_offsets.i4_ordinate = i4_y_offset; + ps_mb_states[ps_mb_pos->i4_abscissa + ps_mb_pos->i4_ordinate * u4_wd_in_mbs] + .s_ref_array_dims.i4_abscissa = (((i4_x_refmax16 + 15) >> 8) << 4) + + ((WORD32) (ps_layer_props->u4_mb_wd >> 1)) - i4_x_offset + + 16; + ps_mb_states[ps_mb_pos->i4_abscissa + ps_mb_pos->i4_ordinate * u4_wd_in_mbs] + .s_ref_array_dims.i4_ordinate = (((i4_y_refmax16 + 15) >> 8) << 4) + + ((WORD32) (ps_layer_props->u4_mb_ht >> 1)) - i4_y_offset + + 16; + + ps_mb_states[ps_mb_pos->i4_abscissa + ps_mb_pos->i4_ordinate * u4_wd_in_mbs] + .s_max_pos.i4_abscissa = ((i4_x_refmax16 + 15) >> 4) - i4_x_offset; + ps_mb_states[ps_mb_pos->i4_abscissa + ps_mb_pos->i4_ordinate * u4_wd_in_mbs] + .s_max_pos.i4_ordinate = ((i4_y_refmax16 + 15) >> 4) - i4_y_offset; + + ps_mb_states[ps_mb_pos->i4_abscissa + ps_mb_pos->i4_ordinate * u4_wd_in_mbs] + .s_min_pos.i4_abscissa = (i4_x_refmin16 >> 4) - i4_x_offset; + ps_mb_states[ps_mb_pos->i4_abscissa + ps_mb_pos->i4_ordinate * u4_wd_in_mbs] + .s_min_pos.i4_ordinate = (i4_y_refmin16 >> 4) - i4_y_offset; + + if((1.5 == d_spatial_res_ratio) && + ((0 == ps_mb_pos->i4_abscissa) || (0 == ps_mb_pos->i4_ordinate))) + { + WORD32 i4_min, i4_max, i4_xr_index, i4_yr_index, i4_ref_array_wd, i4_ref_array_ht; + + i4_x_offset = i4_x_offset - 2; + i4_ref_array_wd = ((i4_x_refmax16 + 15) >> 4) - (i4_x_refmin16 >> 4) + 1 + 4; + + i4_min = i4_x_offset; + i4_xr_index = i4_min - ((i4_min / (WORD32) ps_layer_props->u4_mb_wd) * + (WORD32) ps_layer_props->u4_mb_wd); + + if(i4_xr_index < (WORD32) (ps_layer_props->u4_mb_wd >> 1)) + { + i4_ref_array_wd = i4_ref_array_wd + (ps_layer_props->u4_mb_wd >> 1); + i4_x_offset = i4_x_offset - ((WORD32) (ps_layer_props->u4_mb_wd >> 1)); + } + + i4_max = ((i4_x_refmax16 + 15) >> 4) + 2; + i4_xr_index = i4_max - ((i4_max / (WORD32) ps_layer_props->u4_mb_wd) * + (WORD32) ps_layer_props->u4_mb_wd); + + if(i4_xr_index >= (WORD32) (ps_layer_props->u4_mb_wd >> 1)) + { + i4_ref_array_wd = i4_ref_array_wd + (ps_layer_props->u4_mb_wd >> 1); + } + + ps_mb_states[ps_mb_pos->i4_abscissa + ps_mb_pos->i4_ordinate * u4_wd_in_mbs] + .s_ref_array_dims.i4_abscissa = i4_ref_array_wd; + ps_mb_states[ps_mb_pos->i4_abscissa + ps_mb_pos->i4_ordinate * u4_wd_in_mbs] + .s_offsets.i4_abscissa = i4_x_offset; + + i4_ref_array_ht = ((i4_y_refmax16 + 15) >> 4) - (i4_y_refmin16 >> 4) + 1 + 4; + + i4_y_offset = (i4_y_refmin16 >> 4) - 2; + + i4_min = i4_y_offset; + + i4_yr_index = i4_min - ((i4_min / (WORD32) ps_layer_props->u4_mb_ht) * + (WORD32) ps_layer_props->u4_mb_ht); + + if(i4_yr_index < (WORD32) (ps_layer_props->u4_mb_ht >> 1)) + { + i4_ref_array_ht = i4_ref_array_ht + (ps_layer_props->u4_mb_ht >> 1); + i4_y_offset = i4_y_offset - ((WORD32) (ps_layer_props->u4_mb_ht >> 1)); + } + + i4_max = ((i4_y_refmax16 + 15) >> 4) + 2; + i4_yr_index = i4_max - ((i4_max / (WORD32) ps_layer_props->u4_mb_ht) * + (WORD32) ps_layer_props->u4_mb_ht); + + if(i4_yr_index >= (WORD32) (ps_layer_props->u4_mb_ht >> 1)) + { + i4_ref_array_ht = i4_ref_array_ht + (ps_layer_props->u4_mb_ht >> 1); + } + + ps_mb_states[ps_mb_pos->i4_abscissa + ps_mb_pos->i4_ordinate * u4_wd_in_mbs] + .s_ref_array_dims.i4_ordinate = i4_ref_array_ht; + ps_mb_states[ps_mb_pos->i4_abscissa + ps_mb_pos->i4_ordinate * u4_wd_in_mbs] + .s_offsets.i4_ordinate = i4_y_offset; + + ps_mb_states[ps_mb_pos->i4_abscissa + ps_mb_pos->i4_ordinate * u4_wd_in_mbs] + .s_max_pos.i4_abscissa = ((i4_x_refmax16 + 15) >> 4) - i4_x_offset; + ps_mb_states[ps_mb_pos->i4_abscissa + ps_mb_pos->i4_ordinate * u4_wd_in_mbs] + .s_max_pos.i4_ordinate = ((i4_y_refmax16 + 15) >> 4) - i4_y_offset; + + ps_mb_states[ps_mb_pos->i4_abscissa + ps_mb_pos->i4_ordinate * u4_wd_in_mbs] + .s_min_pos.i4_abscissa = (i4_x_refmin16 >> 4) - i4_x_offset; + ps_mb_states[ps_mb_pos->i4_abscissa + ps_mb_pos->i4_ordinate * u4_wd_in_mbs] + .s_min_pos.i4_ordinate = (i4_y_refmin16 >> 4) - i4_y_offset; + + isvce_ref_array_pos_init( + ps_layer_props, + &ps_mb_states[ps_mb_pos->i4_abscissa + ps_mb_pos->i4_ordinate * u4_wd_in_mbs], + ps_mb_pos, d_spatial_res_ratio, u1_frame_mbs_only_flag, u1_field_mb_flag, + u1_ref_layer_frame_mbs_only_flag); + + isvce_ref_array_phase_init( + ps_layer_props, + &ps_mb_states[ps_mb_pos->i4_abscissa + ps_mb_pos->i4_ordinate * u4_wd_in_mbs], + ps_mb_pos, d_spatial_res_ratio, u1_frame_mbs_only_flag, u1_field_mb_flag, + u1_ref_layer_frame_mbs_only_flag); + } + else if((2. == d_spatial_res_ratio) && + ((0 == ps_mb_pos->i4_abscissa) && (0 == ps_mb_pos->i4_ordinate) && b_is_chroma)) + { + isvce_ref_array_pos_init( + ps_layer_props, + &ps_mb_states[ps_mb_pos->i4_abscissa + ps_mb_pos->i4_ordinate * u4_wd_in_mbs], + ps_mb_pos, d_spatial_res_ratio, u1_frame_mbs_only_flag, u1_field_mb_flag, + u1_ref_layer_frame_mbs_only_flag); + + isvce_ref_array_phase_init( + ps_layer_props, + &ps_mb_states[ps_mb_pos->i4_abscissa + ps_mb_pos->i4_ordinate * u4_wd_in_mbs], + ps_mb_pos, d_spatial_res_ratio, u1_frame_mbs_only_flag, u1_field_mb_flag, + u1_ref_layer_frame_mbs_only_flag); + } +} + +static void isvce_ibl_layer_state_init(intra_pred_layer_state_t *ps_layer_state, + DOUBLE d_spatial_res_ratio, UWORD32 u4_wd, UWORD32 u4_ht, + UWORD8 u1_level_idc, IV_COLOR_FORMAT_T e_color_format) +{ + UWORD32 i, j, k; + + const UWORD8 u1_ref_layer_field_pic_flag = 0; + const UWORD8 u1_field_pic_flag = 0; + const UWORD8 u1_frame_mbs_only_flag = 1; + const UWORD8 u1_ref_layer_frame_mbs_only_flag = 1; + const UWORD8 u1_bot_field_flag = 0; + const WORD32 i4_scaled_ref_layer_left_offset = 0; + const WORD32 i4_scaled_ref_layer_top_offset = 0; + const WORD32 i4_ref_layer_chroma_phase_x_plus1 = 1; + const WORD32 i4_ref_layer_chroma_phase_y_plus1 = 1; + const WORD32 i4_chroma_phase_x_plus1 = 1; + const WORD32 i4_chroma_phase_y_plus1 = 1; + const WORD32 i4_sub_wd_chroma = 2; + const WORD32 i4_sub_ht_chroma = 2; + + ASSERT((IV_YUV_420P == e_color_format) || (IV_YUV_420SP_UV == e_color_format)); + + UNUSED(e_color_format); + + for(i = 0; i < NUM_SP_COMPONENTS; i++) + { + intra_pred_mb_state_t *ps_mb_states; + layer_resampler_props_t *ps_layer_props; + + UWORD32 u4_wd_in_mbs; + UWORD32 u4_ht_in_mbs; + + UWORD8 u1_is_chroma = (Y != ((COMPONENT_TYPE) i)); + UWORD32 u4_ref_wd = (u4_wd / d_spatial_res_ratio); + UWORD32 u4_ref_ht = (u4_ht / d_spatial_res_ratio) * (1 + u1_ref_layer_field_pic_flag); + UWORD32 u4_scaled_wd = u4_wd; + UWORD32 u4_scaled_ht = u4_ht * (1 + u1_field_pic_flag); + + ps_mb_states = + u1_is_chroma ? ps_layer_state->ps_chroma_mb_states : ps_layer_state->ps_luma_mb_states; + ps_layer_props = + u1_is_chroma ? ps_layer_state->ps_chroma_props : ps_layer_state->ps_luma_props; + + u4_ref_wd = u4_ref_wd >> u1_is_chroma; + u4_ref_ht = u4_ref_ht >> u1_is_chroma; + u4_scaled_wd = u4_scaled_wd >> u1_is_chroma; + u4_scaled_ht = u4_scaled_ht >> u1_is_chroma; + + if(u1_is_chroma) + { + ps_layer_props->i4_refphase_x = i4_ref_layer_chroma_phase_x_plus1 - 1; + ps_layer_props->i4_refphase_y = i4_ref_layer_chroma_phase_y_plus1 - 1; + ps_layer_props->i4_phase_x = i4_chroma_phase_x_plus1 - 1; + ps_layer_props->i4_phase_y = i4_chroma_phase_y_plus1 - 1; + ps_layer_props->u4_sub_wd = i4_sub_wd_chroma; + ps_layer_props->u4_sub_ht = i4_sub_ht_chroma; + ps_layer_props->u4_mb_wd = MB_SIZE >> 1; + ps_layer_props->u4_mb_ht = MB_SIZE >> 1; + } + else + { + ps_layer_props->i4_refphase_x = 0; + ps_layer_props->i4_refphase_y = 0; + ps_layer_props->i4_phase_x = 0; + ps_layer_props->i4_phase_y = 0; + ps_layer_props->u4_sub_wd = 1; + ps_layer_props->u4_sub_ht = 1; + ps_layer_props->u4_mb_wd = MB_SIZE; + ps_layer_props->u4_mb_ht = MB_SIZE; + } + + u4_wd_in_mbs = u4_scaled_wd / ps_layer_props->u4_mb_wd; + u4_ht_in_mbs = u4_scaled_ht / ps_layer_props->u4_mb_ht; + + if(u1_level_idc <= 30) + { + ps_layer_props->u4_shift_x = 16; + ps_layer_props->u4_shift_y = 16; + } + else + { + ps_layer_props->u4_shift_x = 31 - isvcd_get_ceil_log2(u4_ref_wd); + ps_layer_props->u4_shift_y = 31 - isvcd_get_ceil_log2(u4_ref_ht); + } + + if((0 == u1_frame_mbs_only_flag) || (0 == u1_ref_layer_frame_mbs_only_flag)) + { + if(1 == u1_ref_layer_frame_mbs_only_flag) + { + ps_layer_props->i4_phase_y = ps_layer_props->i4_phase_y + (4 * u1_bot_field_flag) + + 3 - ps_layer_props->u4_sub_ht; + ps_layer_props->i4_refphase_y = (2 * ps_layer_props->i4_refphase_y) + 2; + } + else + { + ps_layer_props->i4_phase_y = ps_layer_props->i4_phase_y + 4 * u1_bot_field_flag; + ps_layer_props->i4_refphase_y = + ps_layer_props->i4_refphase_y + (4 * u1_bot_field_flag); + } + } + + ps_layer_props->u4_scale_x = + ((u4_ref_wd << ps_layer_props->u4_shift_x) + (u4_scaled_wd >> 1)) / (u4_scaled_wd); + ps_layer_props->u4_scale_y = + ((u4_ref_ht << ps_layer_props->u4_shift_y) + (u4_scaled_ht >> 1)) / (u4_scaled_ht); + + ps_layer_props->i4_offset_x = i4_scaled_ref_layer_left_offset / ps_layer_props->u4_sub_wd; + ps_layer_props->i4_add_x = + (((u4_ref_wd * (2 + ps_layer_props->i4_phase_x)) << (ps_layer_props->u4_shift_x - 2)) + + (u4_scaled_wd >> 1)) / + u4_scaled_wd + + (1 << (ps_layer_props->u4_shift_x - 5)); + ps_layer_props->i4_delta_x = 4 * (2 + ps_layer_props->i4_refphase_x); + + if((1 == u1_frame_mbs_only_flag) && (1 == u1_ref_layer_frame_mbs_only_flag)) + { + ps_layer_props->i4_offset_y = + i4_scaled_ref_layer_top_offset / ps_layer_props->u4_sub_ht; + ps_layer_props->i4_add_y = (((u4_ref_ht * (2 + ps_layer_props->i4_phase_y)) + << (ps_layer_props->u4_shift_y - 2)) + + (u4_scaled_ht >> 1)) / + u4_scaled_ht + + (1 << (ps_layer_props->u4_shift_y - 5)); + ps_layer_props->i4_delta_y = 4 * (2 + ps_layer_props->i4_refphase_y); + } + else + { + ps_layer_props->i4_offset_y = + i4_scaled_ref_layer_top_offset / (2 * ps_layer_props->u4_sub_ht); + ps_layer_props->i4_add_y = (((u4_ref_ht * (2 + ps_layer_props->i4_phase_y)) + << (ps_layer_props->u4_shift_y - 3)) + + (u4_scaled_ht >> 1)) / + u4_scaled_ht + + (1 << (ps_layer_props->u4_shift_y - 5)); + ps_layer_props->i4_delta_y = 2 * (2 + ps_layer_props->i4_refphase_y); + } + + for(j = 0; j < u4_ht_in_mbs; j++) + { + for(k = 0; k < u4_wd_in_mbs; k++) + { + coordinates_t s_mb_pos = {k, j}; + + isvce_set_mb_states(ps_layer_props, ps_mb_states, &s_mb_pos, d_spatial_res_ratio, + u4_wd_in_mbs, u1_is_chroma); + } + } + } +} + +/** +******************************************************************************* +* +* @brief +* Function to initialize svc ilp buffers +* +* @param[in] ps_codec +* Pointer to codec context +* +* @param[in] ps_mem_rec +* Pointer to memory allocated for input buffers +* +******************************************************************************* +*/ +void isvce_intra_pred_ctxt_init(isvce_codec_t *ps_codec, iv_mem_rec_t *ps_mem_rec) +{ + intra_pred_state_t *ps_intra_pred_state; + svc_intra_pred_ctxt_t *ps_intra_pred_ctxt; + intra_pred_mb_state_t *aps_luma_mb_states[MAX_NUM_SPATIAL_LAYERS]; + intra_pred_mb_state_t *aps_chroma_mb_states[MAX_NUM_SPATIAL_LAYERS]; + + WORD32 i, j, k, l, m; + WORD8 *api4_mb_modes[MAX_NUM_SPATIAL_LAYERS]; + + isvce_process_ctxt_t *ps_proc = ps_codec->as_process; + + const WORD32 i4_num_proc_ctxts = sizeof(ps_codec->as_process) / sizeof(ps_codec->as_process[0]); + DOUBLE d_spatial_res_ratio = ps_codec->s_cfg.s_svc_params.d_spatial_res_ratio; + UWORD8 u1_num_spatial_layers = ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers; + UWORD32 u4_wd = ps_codec->s_cfg.u4_wd; + UWORD32 u4_ht = ps_codec->s_cfg.u4_ht; + UWORD8 *pu1_buf = ps_mem_rec->pv_base; + WORD64 i8_alloc_mem_size = isvce_get_svc_intra_pred_ctxt_size( + u1_num_spatial_layers, d_spatial_res_ratio, u4_wd, u4_ht); + + if(u1_num_spatial_layers > 1) + { + for(j = 0; j < i4_num_proc_ctxts; j++) + { + ps_proc = &ps_codec->as_process[j]; + ps_intra_pred_ctxt = ps_proc->ps_intra_pred_ctxt = (svc_intra_pred_ctxt_t *) pu1_buf; + pu1_buf += sizeof(svc_intra_pred_ctxt_t); + i8_alloc_mem_size -= sizeof(svc_intra_pred_ctxt_t); + + ps_intra_pred_ctxt->s_intra_pred_constants.pv_state = pu1_buf; + ps_intra_pred_state = (intra_pred_state_t *) pu1_buf; + pu1_buf += sizeof(intra_pred_state_t); + i8_alloc_mem_size -= sizeof(intra_pred_state_t); + + ps_intra_pred_state->ps_layer_state = (intra_pred_layer_state_t *) pu1_buf; + pu1_buf += u1_num_spatial_layers * sizeof(ps_intra_pred_state->ps_layer_state[0]); + i8_alloc_mem_size -= + u1_num_spatial_layers * sizeof(ps_intra_pred_state->ps_layer_state[0]); + + ASSERT(i8_alloc_mem_size >= 0); + + for(i = u1_num_spatial_layers - 1; i >= 0; i--) + { + intra_pred_layer_state_t *ps_layer_state = &ps_intra_pred_state->ps_layer_state[i]; + + WORD32 i4_layer_luma_wd = + ((DOUBLE) u4_wd / pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - i)) + + 0.99; + WORD32 i4_layer_luma_ht = + ((DOUBLE) u4_ht / pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - i)) + + 0.99; + WORD32 i4_layer_wd_mbs = i4_layer_luma_wd / MB_SIZE; + WORD32 i4_layer_ht_mbs = i4_layer_luma_ht / MB_SIZE; + /* Add PAD MBs on all directions */ + WORD32 i4_layer_luma_mbs = + ((i4_layer_luma_wd / MB_SIZE) + 2) * ((i4_layer_luma_ht / MB_SIZE) + 2); + WORD32 i4_num_mb_states = + isvce_get_num_mb_states(i4_layer_luma_wd, i4_layer_luma_ht); + + if(0 == j) + { + UWORD32 au4_ref_xpos_array_size[NUM_SP_COMPONENTS]; + UWORD32 au4_ref_ypos_array_size[NUM_SP_COMPONENTS]; + UWORD32 au4_ref_phase_array_size[NUM_SP_COMPONENTS]; + + for(k = 0; k < NUM_SP_COMPONENTS; k++) + { + bool b_is_chroma = ((COMPONENT_TYPE) k) != Y; + + au4_ref_xpos_array_size[k] = MAX_REF_ARR_WD_HT; + au4_ref_ypos_array_size[k] = (i4_layer_ht_mbs >> b_is_chroma); + au4_ref_phase_array_size[k] = + isvce_get_phase_array_size(d_spatial_res_ratio, b_is_chroma); + } + + ps_layer_state->ps_luma_mb_states = (intra_pred_mb_state_t *) pu1_buf; + aps_luma_mb_states[i] = ps_layer_state->ps_luma_mb_states; + pu1_buf += i4_num_mb_states * sizeof(ps_layer_state->ps_luma_mb_states[0]); + i8_alloc_mem_size -= + i4_num_mb_states * sizeof(ps_layer_state->ps_luma_mb_states[0]); + + ps_layer_state->ps_chroma_mb_states = (intra_pred_mb_state_t *) pu1_buf; + aps_chroma_mb_states[i] = ps_layer_state->ps_chroma_mb_states; + pu1_buf += i4_num_mb_states * sizeof(ps_layer_state->ps_chroma_mb_states[0]); + i8_alloc_mem_size -= + i4_num_mb_states * sizeof(ps_layer_state->ps_chroma_mb_states[0]); + + if(1.5 == d_spatial_res_ratio) + { + for(k = 0; k < NUM_SP_COMPONENTS; k++) + { + bool b_is_chroma = ((COMPONENT_TYPE) k) != Y; + + WORD32 *pi4_ref_array_positions_x = (WORD32 *) pu1_buf; + WORD32 *pi4_ref_array_positions_y = + pi4_ref_array_positions_x + MAX_REF_ARR_WD_HT * i4_layer_wd_mbs; + coordinates_t *ps_ref_array_phases = + (coordinates_t *) (pi4_ref_array_positions_y + + (i4_layer_ht_mbs >> b_is_chroma) * + i4_layer_ht_mbs); + intra_pred_mb_state_t *ps_mb_state = + b_is_chroma ? ps_layer_state->ps_chroma_mb_states + : ps_layer_state->ps_luma_mb_states; + + for(l = 0; l < i4_layer_ht_mbs; l++) + { + for(m = 0; m < i4_layer_wd_mbs; m++) + { + ps_mb_state[l * i4_layer_wd_mbs + m].pi4_ref_array_positions_x = + pi4_ref_array_positions_x + m * au4_ref_xpos_array_size[k]; + ps_mb_state[l * i4_layer_wd_mbs + m].pi4_ref_array_positions_y = + pi4_ref_array_positions_y + l * au4_ref_ypos_array_size[k]; + + ps_mb_state[l * i4_layer_wd_mbs + m].ps_ref_array_phases = + ps_ref_array_phases; + } + } + + pu1_buf += i4_layer_wd_mbs * au4_ref_xpos_array_size[k] * + sizeof(pi4_ref_array_positions_x[0]); + pu1_buf += i4_layer_ht_mbs * au4_ref_ypos_array_size[k] * + sizeof(pi4_ref_array_positions_y[0]); + pu1_buf += au4_ref_phase_array_size[k] * sizeof(ps_ref_array_phases[0]); + i8_alloc_mem_size -= i4_layer_wd_mbs * au4_ref_xpos_array_size[k] * + sizeof(pi4_ref_array_positions_x[0]); + i8_alloc_mem_size -= i4_layer_ht_mbs * au4_ref_ypos_array_size[k] * + sizeof(pi4_ref_array_positions_y[0]); + i8_alloc_mem_size -= + au4_ref_phase_array_size[k] * sizeof(ps_ref_array_phases[0]); + } + } + else + { + intra_pred_mb_state_t *ps_mb_state; + coordinates_t *ps_ref_array_phases; + + for(k = 0; k < NUM_SP_COMPONENTS; k++) + { + bool b_is_chroma = ((COMPONENT_TYPE) k) != Y; + + ps_mb_state = b_is_chroma ? ps_layer_state->ps_chroma_mb_states + : ps_layer_state->ps_luma_mb_states; + ps_ref_array_phases = b_is_chroma ? ((coordinates_t *) pu1_buf) : NULL; + + for(l = 0; l < i4_num_mb_states; l++) + { + ps_mb_state[l].pi4_ref_array_positions_x = NULL; + ps_mb_state[l].pi4_ref_array_positions_y = NULL; + ps_mb_state[l].ps_ref_array_phases = ps_ref_array_phases; + } + } + + pu1_buf += au4_ref_phase_array_size[U] * sizeof(ps_ref_array_phases[0]); + i8_alloc_mem_size -= + au4_ref_phase_array_size[U] * sizeof(ps_ref_array_phases[0]); + } + + ps_layer_state->i4_mb_mode_stride = (i4_layer_luma_wd / MB_SIZE) + 2; + ps_layer_state->pi1_mb_mode = (WORD8 *) pu1_buf; + ps_layer_state->pi1_mb_mode += ps_layer_state->i4_mb_mode_stride + 1; + api4_mb_modes[i] = ps_layer_state->pi1_mb_mode; + pu1_buf += i4_layer_luma_mbs * sizeof(ps_layer_state->pi1_mb_mode[0]); + i8_alloc_mem_size -= + u1_num_spatial_layers * sizeof(ps_layer_state->pi1_mb_mode[0]); + memset(ps_layer_state->pi1_mb_mode, -1, i4_layer_luma_mbs); + + if(i > 0) + { + /* Asserts below verify that + * 'ps_codec->s_svc_ilp_data.aps_layer_resampler_props' is initialised + */ + ASSERT(ps_codec->s_svc_ilp_data.aps_layer_resampler_props[Y][i].u4_mb_wd == + MB_SIZE); + ASSERT(ps_codec->s_svc_ilp_data.aps_layer_resampler_props[UV][i].u4_mb_wd == + (MB_SIZE / 2)); + + ps_layer_state->ps_luma_props = + &ps_codec->s_svc_ilp_data.aps_layer_resampler_props[Y][i]; + ps_layer_state->ps_chroma_props = + &ps_codec->s_svc_ilp_data.aps_layer_resampler_props[UV][i]; + + isvce_ibl_layer_state_init( + ps_layer_state, d_spatial_res_ratio, i4_layer_luma_wd, i4_layer_luma_ht, + ps_codec->s_cfg.u4_max_level, ps_codec->s_cfg.e_inp_color_fmt); + } + else + { + ps_layer_state->ps_luma_props = NULL; + ps_layer_state->ps_chroma_props = NULL; + } + } + else + { + ps_layer_state->ps_luma_mb_states = aps_luma_mb_states[i]; + ps_layer_state->ps_chroma_mb_states = aps_chroma_mb_states[i]; + + ps_layer_state->i4_mb_mode_stride = (i4_layer_luma_wd / MB_SIZE) + 2; + ps_layer_state->pi1_mb_mode = api4_mb_modes[i]; + + if(i > 0) + { + ps_layer_state->ps_luma_props = + &ps_codec->s_svc_ilp_data.aps_layer_resampler_props[Y][i]; + ps_layer_state->ps_chroma_props = + &ps_codec->s_svc_ilp_data.aps_layer_resampler_props[UV][i]; + } + else + { + ps_layer_state->ps_luma_props = NULL; + ps_layer_state->ps_chroma_props = NULL; + } + } + + ps_layer_state->pu1_refarray_buffer = (UWORD8 *) pu1_buf; + memset(ps_layer_state->pu1_refarray_buffer, 0, TEMP_BUF_SIZE_LUMA * sizeof(UWORD8)); + pu1_buf += TEMP_BUF_SIZE_LUMA * sizeof(UWORD8); + i8_alloc_mem_size -= TEMP_BUF_SIZE_LUMA * sizeof(UWORD8); + + ps_layer_state->pu1_refarray_cb = (UWORD8 *) pu1_buf; + memset(ps_layer_state->pu1_refarray_cb, 0, TEMP_BUF_SIZE_CB * sizeof(UWORD8)); + pu1_buf += TEMP_BUF_SIZE_CB * sizeof(UWORD8); + i8_alloc_mem_size -= TEMP_BUF_SIZE_CB * sizeof(UWORD8); + + ps_layer_state->pu1_refarray_cr = (UWORD8 *) pu1_buf; + memset(ps_layer_state->pu1_refarray_cr, 0, TEMP_BUF_SIZE_CR * sizeof(UWORD8)); + pu1_buf += TEMP_BUF_SIZE_CR * sizeof(UWORD8); + i8_alloc_mem_size -= TEMP_BUF_SIZE_CR * sizeof(UWORD8); + + ps_layer_state->pi4_temp_interpolation_buffer = (WORD32 *) pu1_buf; + pu1_buf += (TEMP_INTERPOLATION_BUF_SIZE * sizeof(WORD32)); + i8_alloc_mem_size -= (TEMP_INTERPOLATION_BUF_SIZE * sizeof(WORD32)); + + ASSERT(i8_alloc_mem_size >= 0); + } + } + + for(i = 0; i < i4_num_proc_ctxts; i++) + { + isvce_process_ctxt_t *ps_proc = &ps_codec->as_process[i]; + svc_intra_pred_ctxt_t *ps_intra_pred_ctxt = ps_proc->ps_intra_pred_ctxt; + yuv_buf_props_t *ps_mb_intra_pred_buf = + &ps_intra_pred_ctxt->s_intra_pred_outputs.s_pred_buf; + + ps_proc->ps_mb_pred_buf = ps_mb_intra_pred_buf; + + for(j = 0; j < NUM_SP_COMPONENTS; j++) + { + buffer_container_t *ps_comp_buf = &ps_mb_intra_pred_buf->as_component_bufs[j]; + + ps_comp_buf->pv_data = pu1_buf; + ps_comp_buf->i4_data_stride = MB_SIZE; + pu1_buf += MB_SIZE * MB_SIZE * sizeof(UWORD8); + i8_alloc_mem_size -= MB_SIZE * MB_SIZE * sizeof(WORD8); + + ASSERT(i8_alloc_mem_size >= 0); + } + + ps_mb_intra_pred_buf->as_component_bufs[V].pv_data = NULL; + ps_mb_intra_pred_buf->e_color_format = IV_YUV_420SP_UV; + ps_mb_intra_pred_buf->u1_bit_depth = 16; + ps_mb_intra_pred_buf->u4_width = MB_SIZE; + ps_mb_intra_pred_buf->u4_height = MB_SIZE; + } + } + else + { + for(i = 0; i < i4_num_proc_ctxts; i++) + { + isvce_process_ctxt_t *ps_proc = &ps_codec->as_process[i]; + + ps_proc->ps_intra_pred_ctxt = NULL; + } + } +} + +void isvce_intra_sampling_function_selector(intra_sampling_ctxt_t *ps_ctxt, + DOUBLE d_spatial_res_ratio, IV_ARCH_T e_arch) +{ + if(2. == d_spatial_res_ratio) + { + switch(e_arch) + { +#if defined(X86) + case ARCH_X86_SSE42: + { + ps_ctxt->as_res_lyrs[ps_ctxt->i4_res_lyr_id].pf_horz_interpol_chroma = + isvc_horz_interpol_chroma_dyadic_sse42; + ps_ctxt->as_res_lyrs[ps_ctxt->i4_res_lyr_id].pf_vert_interpol_chroma = + isvc_vert_interpol_chroma_dyadic_sse42; + ps_ctxt->as_res_lyrs[ps_ctxt->i4_res_lyr_id].pf_interpolate_luma = + isvc_interpolate_base_luma_dyadic_sse42; + + break; + } +#elif defined(ARMV8) + case ARCH_ARM_A53: + case ARCH_ARM_A57: + case ARCH_ARM_V8_NEON: + { + ps_ctxt->as_res_lyrs[ps_ctxt->i4_res_lyr_id].pf_horz_interpol_chroma = + isvc_horz_interpol_chroma_dyadic_neon; + ps_ctxt->as_res_lyrs[ps_ctxt->i4_res_lyr_id].pf_vert_interpol_chroma = + isvc_vert_interpol_chroma_dyadic_neon; + ps_ctxt->as_res_lyrs[ps_ctxt->i4_res_lyr_id].pf_interpolate_luma = + isvc_interpolate_base_luma_dyadic_neon; + + break; + } +#elif !defined(DISABLE_NEON) + case ARCH_ARM_A9Q: + case ARCH_ARM_A9A: + case ARCH_ARM_A9: + case ARCH_ARM_A7: + case ARCH_ARM_A5: + case ARCH_ARM_A15: + { + ps_ctxt->as_res_lyrs[ps_ctxt->i4_res_lyr_id].pf_horz_interpol_chroma = + isvc_horz_interpol_chroma_dyadic_neon; + ps_ctxt->as_res_lyrs[ps_ctxt->i4_res_lyr_id].pf_vert_interpol_chroma = + isvc_vert_interpol_chroma_dyadic_neon; + ps_ctxt->as_res_lyrs[ps_ctxt->i4_res_lyr_id].pf_interpolate_luma = + isvc_interpolate_base_luma_dyadic_neon; + + break; + } +#endif + default: + { + ps_ctxt->as_res_lyrs[ps_ctxt->i4_res_lyr_id].pf_horz_interpol_chroma = + isvc_horz_interpol_chroma_dyadic; + ps_ctxt->as_res_lyrs[ps_ctxt->i4_res_lyr_id].pf_vert_interpol_chroma = + isvc_vert_interpol_chroma_dyadic; + ps_ctxt->as_res_lyrs[ps_ctxt->i4_res_lyr_id].pf_interpolate_luma = + isvc_interpolate_base_luma_dyadic; + + break; + } + } + } +} + +static void isvce_get_mb_intra_pred(isvce_process_ctxt_t *ps_proc) +{ + mem_element_t s_ref_mb_mode; + mem_element_t s_inp_luma; + mem_element_t s_inp_chroma; + mem_element_t s_out_luma; + mem_element_t s_out_chroma; + + coordinates_t s_frame_dims; + coordinates_t s_frame_dims_in_mbs; + + WORD32 i4_cur_stride; + WORD32 i4_ref_stride; + WORD32 i; + + intra_sampling_ctxt_t s_intra_samp_ctxt[NUM_SP_COMPONENTS]; + isvce_codec_t *ps_codec = ps_proc->ps_codec; + svc_intra_pred_ctxt_t *ps_intra_pred_ctxt = ps_proc->ps_intra_pred_ctxt; + intra_pred_state_t *ps_intra_pred_state = + (intra_pred_state_t *) (ps_intra_pred_ctxt->s_intra_pred_constants.pv_state); + intra_pred_layer_state_t *ps_layer_state = + &ps_intra_pred_state->ps_layer_state[ps_proc->u1_spatial_layer_id]; + intra_pred_layer_state_t *ps_ref_layer_state = + &ps_intra_pred_state->ps_layer_state[ps_proc->u1_spatial_layer_id - 1]; + + intra_pred_mb_state_t *ps_luma_mb_state; + intra_pred_mb_state_t *ps_chroma_mb_state; + + coordinates_t *ps_mb_pos = &ps_intra_pred_ctxt->s_intra_pred_variables.s_mb_pos; + svc_ilp_data_t *ps_svc_ilp_data = ps_intra_pred_ctxt->s_intra_pred_variables.ps_svc_ilp_data; + + s_frame_dims.i4_abscissa = + ps_svc_ilp_data->ps_intra_recon_bufs[ps_proc->u1_spatial_layer_id].u4_width; + s_frame_dims.i4_ordinate = + ps_svc_ilp_data->ps_intra_recon_bufs[ps_proc->u1_spatial_layer_id].u4_height; + s_frame_dims_in_mbs.i4_abscissa = s_frame_dims.i4_abscissa / MB_SIZE; + s_frame_dims_in_mbs.i4_ordinate = s_frame_dims.i4_ordinate / MB_SIZE; + + ps_luma_mb_state = ps_layer_state->ps_luma_mb_states + ps_mb_pos->i4_abscissa + + ps_mb_pos->i4_ordinate * s_frame_dims_in_mbs.i4_abscissa; + ps_chroma_mb_state = ps_layer_state->ps_chroma_mb_states + ps_mb_pos->i4_abscissa + + ps_mb_pos->i4_ordinate * s_frame_dims_in_mbs.i4_abscissa; + + for(i = 0; i < NUM_SP_COMPONENTS; i++) + { + UWORD32 u4_ref_wd, u4_ref_ht; + + bool b_is_chroma = (Y != ((COMPONENT_TYPE) i)); + mem_element_t *ps_buf = b_is_chroma ? &s_out_chroma : &s_out_luma; + intra_pred_mb_state_t *ps_mb_state = b_is_chroma ? ps_chroma_mb_state : ps_luma_mb_state; + layer_resampler_props_t *ps_layer_props = + b_is_chroma ? ps_layer_state->ps_chroma_props : ps_layer_state->ps_luma_props; + + s_intra_samp_ctxt[i].i4_res_lyr_id = ps_proc->u1_spatial_layer_id; + + s_intra_samp_ctxt[i].i4_refarray_stride = REF_ARRAY_WIDTH; + s_intra_samp_ctxt[i].i4_ref_width = + ps_svc_ilp_data->ps_intra_recon_bufs[ps_proc->u1_spatial_layer_id - 1].u4_width; + s_intra_samp_ctxt[i].i4_ref_height = + ps_svc_ilp_data->ps_intra_recon_bufs[ps_proc->u1_spatial_layer_id - 1].u4_height; + + isvce_intra_sampling_function_selector(&s_intra_samp_ctxt[i], + ps_codec->s_cfg.s_svc_params.d_spatial_res_ratio, + ps_codec->s_cfg.e_arch); + + s_intra_samp_ctxt[i].pu1_refarray_buffer = ps_layer_state->pu1_refarray_buffer; + s_intra_samp_ctxt[i].pu1_refarray_cb = ps_layer_state->pu1_refarray_cb; + s_intra_samp_ctxt[i].pu1_refarray_cr = ps_layer_state->pu1_refarray_cr; + s_intra_samp_ctxt[i].pi4_temp_interpolation_buffer = + ps_layer_state->pi4_temp_interpolation_buffer; + + s_intra_samp_ctxt[i].as_res_lyrs[s_intra_samp_ctxt[i].i4_res_lyr_id].ps_mb_pos = ps_mb_pos; + + /* Phase is used only by chroma functions */ + s_intra_samp_ctxt[i].as_res_lyrs[s_intra_samp_ctxt[i].i4_res_lyr_id].i4_x_phase_0 = + ps_chroma_mb_state->ps_ref_array_phases[0].i4_abscissa; + s_intra_samp_ctxt[i].as_res_lyrs[s_intra_samp_ctxt[i].i4_res_lyr_id].i4_x_phase_1 = + ps_chroma_mb_state->ps_ref_array_phases[1].i4_abscissa; + s_intra_samp_ctxt[i].as_res_lyrs[s_intra_samp_ctxt[i].i4_res_lyr_id].i4_y_phase_0 = + ps_chroma_mb_state->ps_ref_array_phases[0].i4_ordinate; + s_intra_samp_ctxt[i].as_res_lyrs[s_intra_samp_ctxt[i].i4_res_lyr_id].i4_y_phase_1 = + ps_chroma_mb_state->ps_ref_array_phases[2].i4_ordinate; + s_intra_samp_ctxt[i] + .as_res_lyrs[s_intra_samp_ctxt[i].i4_res_lyr_id] + .i1_constrained_intra_rsmpl_flag = 0; + s_intra_samp_ctxt[i].as_res_lyrs[s_intra_samp_ctxt[i].i4_res_lyr_id].i4_ref_width = + ps_svc_ilp_data->ps_intra_recon_bufs[ps_proc->u1_spatial_layer_id - 1].u4_width; + s_intra_samp_ctxt[i].as_res_lyrs[s_intra_samp_ctxt[i].i4_res_lyr_id].i4_ref_height = + ps_svc_ilp_data->ps_intra_recon_bufs[ps_proc->u1_spatial_layer_id - 1].u4_height; + + s_intra_samp_ctxt[i].as_res_lyrs[s_intra_samp_ctxt[i].i4_res_lyr_id].i2_x_min_pos = + ps_mb_state->s_min_pos.i4_abscissa; + s_intra_samp_ctxt[i].as_res_lyrs[s_intra_samp_ctxt[i].i4_res_lyr_id].i2_x_max_pos = + ps_mb_state->s_max_pos.i4_abscissa; + s_intra_samp_ctxt[i].as_res_lyrs[s_intra_samp_ctxt[i].i4_res_lyr_id].i2_y_min_pos = + ps_mb_state->s_min_pos.i4_ordinate; + s_intra_samp_ctxt[i].as_res_lyrs[s_intra_samp_ctxt[i].i4_res_lyr_id].i2_y_max_pos = + ps_mb_state->s_max_pos.i4_ordinate; + + s_intra_samp_ctxt[i].as_res_lyrs[s_intra_samp_ctxt[i].i4_res_lyr_id].ps_phase = + ps_mb_state->ps_ref_array_phases; + + s_intra_samp_ctxt[i] + .as_res_lyrs[s_intra_samp_ctxt[i].i4_res_lyr_id] + .pi4_ref_array_positions_x = ps_mb_state->pi4_ref_array_positions_x; + s_intra_samp_ctxt[i] + .as_res_lyrs[s_intra_samp_ctxt[i].i4_res_lyr_id] + .pi4_ref_array_positions_y = ps_mb_state->pi4_ref_array_positions_y; + + s_intra_samp_ctxt[i].as_res_lyrs[s_intra_samp_ctxt[i].i4_res_lyr_id].ps_offsets = + &ps_mb_state->s_offsets; + + s_intra_samp_ctxt[i].as_res_lyrs[s_intra_samp_ctxt[i].i4_res_lyr_id].ps_ref_array_dims = + &ps_mb_state->s_ref_array_dims; + + i4_cur_stride = + ps_intra_pred_ctxt->s_intra_pred_outputs.s_pred_buf.as_component_bufs[i].i4_data_stride; + ps_buf->pv_buffer = + (UWORD8 *) (ps_intra_pred_ctxt->s_intra_pred_outputs.s_pred_buf.as_component_bufs[i] + .pv_data); + + ps_buf->i4_element_size = 1; + ps_buf->i4_num_element_stride = i4_cur_stride; + + ps_buf = b_is_chroma ? &s_inp_chroma : &s_inp_luma; + + i4_ref_stride = ps_svc_ilp_data->ps_intra_recon_bufs[ps_proc->u1_spatial_layer_id - 1] + .as_component_bufs[i] + .i4_data_stride; + + u4_ref_wd = ps_svc_ilp_data->ps_intra_recon_bufs[ps_proc->u1_spatial_layer_id - 1].u4_width; + u4_ref_ht = + ps_svc_ilp_data->ps_intra_recon_bufs[ps_proc->u1_spatial_layer_id - 1].u4_height; + + /* For chroma, filteringModeFlag=1 */ + /* If filteringModeFlag=1, interpolation requires samples at an offset of -1 + * along both directions */ + if(ps_proc->s_svc_params.d_spatial_res_ratio == 2.0) + { + WORD8 i1_x_odd, i1_y_odd; + + ps_buf->pv_buffer = + (UWORD8 *) ps_svc_ilp_data->ps_intra_recon_bufs[ps_proc->u1_spatial_layer_id - 1] + .as_component_bufs[i] + .pv_data + + (ps_mb_state->s_offsets.i4_abscissa << b_is_chroma) + + ps_mb_state->s_offsets.i4_ordinate * i4_ref_stride; + + if(!b_is_chroma) + { + ps_buf->pv_buffer = ((UWORD8 *) ps_buf->pv_buffer) + -1 + -1 * i4_ref_stride; + } + + i1_x_odd = (ps_proc->i4_mb_x & 1); + i1_y_odd = (ps_proc->i4_mb_y & 1); + + if(i1_x_odd) + { + ps_buf->pv_buffer = (UWORD8 *) ps_buf->pv_buffer - 8; + } + if(i1_y_odd) + { + ps_buf->pv_buffer = + (UWORD8 *) ps_buf->pv_buffer - ((8 >> b_is_chroma) * i4_ref_stride); + } + } + else + { + WORD32 i4_horz_dim = 0; + WORD32 i4_vert_dim = 0; + WORD32 i4_dim = + (WORD32) (ps_mb_state->s_max_pos.i4_abscissa - ps_mb_state->s_min_pos.i4_abscissa) + + (4 >> b_is_chroma); + + if(i4_dim > i4_horz_dim) + { + i4_horz_dim = i4_dim; + } + + i4_dim = + (WORD32) (ps_mb_state->s_max_pos.i4_ordinate - ps_mb_state->s_min_pos.i4_ordinate) + + (4 >> b_is_chroma); + + if(i4_dim > i4_vert_dim) + { + i4_vert_dim = i4_dim; + } + + isvc_intra_resamp_generate_segment_lookup( + &(s_intra_samp_ctxt[i] + .as_res_lyrs[s_intra_samp_ctxt[i].i4_res_lyr_id] + .as_seg_lookup_horz[0]), + i4_horz_dim, ps_layer_props->u4_mb_wd, 3); + + isvc_intra_resamp_generate_segment_lookup( + &(s_intra_samp_ctxt[i] + .as_res_lyrs[s_intra_samp_ctxt[i].i4_res_lyr_id] + .as_seg_lookup_vert[0]), + i4_vert_dim, ps_layer_props->u4_mb_ht, 4); + + ps_buf->pv_buffer = + (UWORD8 *) ps_svc_ilp_data->ps_intra_recon_bufs[ps_proc->u1_spatial_layer_id - 1] + .as_component_bufs[i] + .pv_data + + (CLIP3(0, (WORD32) u4_ref_wd - 1, ps_mb_state->s_offsets.i4_abscissa) + << b_is_chroma) + + CLIP3(0, (WORD32) u4_ref_ht - 1, ps_mb_state->s_offsets.i4_ordinate) * + i4_ref_stride; + } + + ps_buf->i4_element_size = 1; + ps_buf->i4_num_element_stride = i4_ref_stride; + } + + s_ref_mb_mode.i4_element_size = 1; + s_ref_mb_mode.i4_num_element_stride = + (ps_svc_ilp_data->ps_intra_recon_bufs[ps_proc->u1_spatial_layer_id - 1].u4_width >> 4) + 2; + s_ref_mb_mode.pv_buffer = ps_ref_layer_state->pi1_mb_mode; + + if(ps_proc->s_svc_params.d_spatial_res_ratio == 2.0) + { + isvc_intra_samp_mb_dyadic(&s_intra_samp_ctxt[Y], &s_inp_luma, &s_inp_chroma, &s_ref_mb_mode, + &s_out_luma, &s_out_chroma, ps_proc->i4_mb_x, ps_proc->i4_mb_y, 0, + 0); + } + else + { + isvc_intra_samp_mb(&s_intra_samp_ctxt[Y], &s_intra_samp_ctxt[UV], &s_inp_luma, + &s_inp_chroma, &s_ref_mb_mode, &s_out_luma, &s_out_chroma); + } +} + +static FORCEINLINE void isvce_get_sad(UWORD8 *pu1_src, UWORD8 *pu1_pred, UWORD32 src_strd, + UWORD32 pred_strd, WORD32 *pi4_distortion, UWORD32 u4_width, + UWORD32 u4_height) +{ + UWORD32 i, j; + *pi4_distortion = 0; + for(i = 0; i < u4_width; i++) + { + for(j = 0; j < u4_height; j++) + { + *pi4_distortion += ABS(pu1_src[j] - pu1_pred[j]); + } + pu1_src += src_strd; + pu1_pred += pred_strd; + } +} + +/** +****************************************************************************** +* +* @brief +* evaluate IBL mode +* +* @par Description +* This function evaluates IBL mode for the macro-block +* +* @param[in] ps_proc_ctxt +* pointer to proc ctxt +* +* @return none +* +****************************************************************************** +*/ +void isvce_evaluate_IBL_mode(isvce_process_ctxt_t *ps_proc) +{ + isvce_codec_t *ps_codec = ps_proc->ps_codec; + svc_intra_pred_ctxt_t *ps_intra_pred_ctxt = ps_proc->ps_intra_pred_ctxt; + + /* SAD(distortion metric) of a block */ + WORD32 i4_mb_distortion_least = INT_MAX; + + /* cost = distortion + lambda*rate */ + WORD32 i4_mb_cost_least = INT_MAX; + + WORD32 i4_src_strd = ps_proc->s_src_buf_props.as_component_bufs[Y].i4_data_stride; + + UWORD8 *pu1_mb_src = (UWORD8 *) (ps_proc->s_src_buf_props.as_component_bufs[Y].pv_data); + + WORD32 u4_cur_stride = + ps_intra_pred_ctxt->s_intra_pred_outputs.s_pred_buf.as_component_bufs[Y].i4_data_stride; + + UWORD8 *pu1_mb_pred = + (UWORD8 *) (ps_intra_pred_ctxt->s_intra_pred_outputs.s_pred_buf.as_component_bufs[Y] + .pv_data); + + ps_intra_pred_ctxt->s_intra_pred_variables.ps_svc_ilp_data = &ps_codec->s_svc_ilp_data; + ps_intra_pred_ctxt->s_intra_pred_variables.s_mb_pos.i4_abscissa = ps_proc->i4_mb_x; + ps_intra_pred_ctxt->s_intra_pred_variables.s_mb_pos.i4_ordinate = ps_proc->i4_mb_y; + ps_intra_pred_ctxt->s_intra_pred_variables.u1_spatial_layer_id = ps_proc->u1_spatial_layer_id; + + isvce_get_mb_intra_pred(ps_proc); + + /* Luma cost */ + isvce_get_sad(pu1_mb_src, pu1_mb_pred, i4_src_strd, u4_cur_stride, &i4_mb_distortion_least, + ps_intra_pred_ctxt->s_intra_pred_outputs.s_pred_buf.u4_width, + ps_intra_pred_ctxt->s_intra_pred_outputs.s_pred_buf.u4_height); + + /* cost = distortion + lambda*rate */ + i4_mb_cost_least = i4_mb_distortion_least; + + /* update the type of the mb if necessary */ + if(i4_mb_cost_least < ps_proc->i4_mb_cost) + { + ps_proc->i4_mb_cost = i4_mb_cost_least; + ps_proc->i4_mb_distortion = i4_mb_distortion_least; + ps_proc->ps_mb_info->i4_mb_distortion = i4_mb_distortion_least; + ps_proc->ps_mb_info->u2_mb_type = BASE_MODE; + ps_proc->ps_mb_info->u1_base_mode_flag = 1; + ps_proc->ps_mb_info->u1_is_intra = 1; + } + else if(ps_proc->ps_mb_info->u2_mb_type != BASE_MODE) + { + ps_proc->ps_mb_info->u1_base_mode_flag = 0; + } +} + +void isvce_update_ibl_info(svc_intra_pred_ctxt_t *ps_intra_pred_ctxt, UWORD8 u1_num_spatial_layers, + UWORD8 u1_spatial_layer_id, UWORD16 u2_mb_type, WORD32 i4_mb_x, + WORD32 i4_mb_y, WORD8 u1_base_mode_flag) +{ + if(u1_num_spatial_layers > 1) + { + intra_pred_state_t *ps_intra_pred_state = + (intra_pred_state_t *) (ps_intra_pred_ctxt->s_intra_pred_constants.pv_state); + intra_pred_layer_state_t *ps_layer_state = + &ps_intra_pred_state->ps_layer_state[u1_spatial_layer_id]; + WORD8 i1_is_intra = (u2_mb_type == I4x4 || u2_mb_type == I16x16 || u2_mb_type == I8x8); + + WORD8 *pi1_mb_mode = + &ps_layer_state->pi1_mb_mode[i4_mb_x + (i4_mb_y * (ps_layer_state->i4_mb_mode_stride))]; + + if(u1_base_mode_flag == 1) + { + *pi1_mb_mode = SVC_IBL_MB; + } + else + { + if(i1_is_intra) + { + *pi1_mb_mode = SVC_INTRA_MB; + } + else + { + *pi1_mb_mode = SVC_INTER_MB; + } + } + } +} + +void isvce_pad_mb_mode_buf(svc_intra_pred_ctxt_t *ps_intra_pred_ctxt, UWORD8 u1_spatial_layer_id, + UWORD8 u1_num_spatial_layers, DOUBLE d_spatial_res_ratio, UWORD32 u4_wd, + UWORD32 u4_ht) +{ + if(u1_num_spatial_layers > 1) + { + intra_pred_state_t *ps_intra_pred_state = + (intra_pred_state_t *) (ps_intra_pred_ctxt->s_intra_pred_constants.pv_state); + intra_pred_layer_state_t *ps_layer_state = + &ps_intra_pred_state->ps_layer_state[u1_spatial_layer_id]; + + WORD32 i4_layer_luma_wd = + ((DOUBLE) u4_wd / + pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - u1_spatial_layer_id)) + + 0.99; + WORD32 i4_layer_luma_ht = + ((DOUBLE) u4_ht / + pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - u1_spatial_layer_id)) + + 0.99; + + WORD32 row, src_strd; + WORD8 *pu1_src; + + WORD8 *pi1_mb_mode = ps_layer_state->pi1_mb_mode; + WORD32 i4_mb_mode_stride = ps_layer_state->i4_mb_mode_stride; + + /* Add PAD MBs on all directions */ + i4_layer_luma_wd /= MB_SIZE; + i4_layer_luma_ht /= MB_SIZE; + + if(d_spatial_res_ratio == 2.0) + { + UWORD8 *pu1_mb_mode = (UWORD8 *) pi1_mb_mode; + /* Pad left */ + ih264_pad_left_luma(pu1_mb_mode, i4_mb_mode_stride, i4_layer_luma_ht, 1); + + /* Pad right */ + ih264_pad_right_luma(pu1_mb_mode + i4_layer_luma_wd, i4_mb_mode_stride, + i4_layer_luma_ht, 1); + + /* Pad top */ + ih264_pad_top(pu1_mb_mode - 1, i4_mb_mode_stride, i4_layer_luma_wd + 2, 1); + + /* Pad bottom */ + ih264_pad_bottom(pu1_mb_mode + (i4_layer_luma_ht * i4_mb_mode_stride) - 1, + i4_mb_mode_stride, i4_layer_luma_wd + 2, 1); + } + else + { + /* Pad left */ + pu1_src = pi1_mb_mode; + src_strd = i4_mb_mode_stride; + for(row = 0; row < i4_layer_luma_ht; row++) + { + memset(pu1_src - 1, -1, 1); + pu1_src += src_strd; + } + + /* Pad right */ + pu1_src = pi1_mb_mode + i4_layer_luma_wd; + for(row = 0; row < i4_layer_luma_ht; row++) + { + memset(pu1_src, -1, 1); + pu1_src += src_strd; + } + + /* Pad top */ + pu1_src = pi1_mb_mode - 1; + memset(pu1_src - src_strd, -1, i4_layer_luma_wd + 2); + + /* Pad bottom */ + pu1_src = pi1_mb_mode + (i4_layer_luma_ht * i4_mb_mode_stride) - 1; + memset(pu1_src, -1, i4_layer_luma_wd + 2); + } + } +} diff --git a/encoder/svc/isvce_ibl_eval.h b/encoder/svc/isvce_ibl_eval.h new file mode 100644 index 0000000..b214b3d --- /dev/null +++ b/encoder/svc/isvce_ibl_eval.h @@ -0,0 +1,105 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** +******************************************************************************* +* @file +* isvce_intra_pred.h +* +* @brief +* Contains function declarations for function declared in +*isvce_intra_pred.c +* +* @author +* ittiam +* +* @remarks +* None +* +******************************************************************************* +*/ +#ifndef _ISVCE_IBL_EVAL_H_ +#define _ISVCE_IBL_EVAL_H_ + +#include "ih264_typedefs.h" +#include "isvc_macros.h" +#include "ih264_debug.h" +#include "isvc_defs.h" +#include "isvc_structs.h" +#include "isvc_intra_resample.h" +#include "isvce_structs.h" +#include "isvce_structs.h" + +#define TEMP_BUF_SIZE_LUMA (REF_ARRAY_WIDTH * REF_ARRAY_WIDTH) +#define TEMP_BUF_SIZE_CB (REF_ARRAY_WIDTH * REF_ARRAY_WIDTH) +#define TEMP_BUF_SIZE_CR (DYADIC_REF_W_C * DYADIC_REF_H_C) + +#define INTERMEDIATE_BUFF_WIDTH 48 +#define INTERMEDIATE_BUFF_HEIGHT (MB_SIZE + 4) +#define TEMP_INTERPOLATION_BUF_SIZE (INTERMEDIATE_BUFF_WIDTH * INTERMEDIATE_BUFF_HEIGHT) + +/* Structs */ +typedef struct intra_pred_constants_t +{ + void *pv_state; +} intra_pred_constants_t; + +typedef struct intra_pred_outputs_t +{ + yuv_buf_props_t s_pred_buf; +} intra_pred_outputs_t; + +typedef struct intra_pred_variables_t +{ + svc_ilp_data_t *ps_svc_ilp_data; + + coordinates_t s_mb_pos; + + UWORD8 u1_spatial_layer_id; +} intra_pred_variables_t; + +typedef struct svc_intra_pred_ctxt_t +{ + intra_pred_constants_t s_intra_pred_constants; + + intra_pred_variables_t s_intra_pred_variables; + + intra_pred_outputs_t s_intra_pred_outputs; + +} svc_intra_pred_ctxt_t; + +extern UWORD32 isvce_get_svc_intra_pred_ctxt_size(UWORD8 u1_num_spatial_layers, + DOUBLE d_spatial_res_ratio, UWORD32 u4_wd, + UWORD32 u4_ht); + +extern void isvce_intra_pred_ctxt_init(isvce_codec_t *ps_codec, iv_mem_rec_t *ps_mem_rec); + +extern void isvce_update_ibl_info(svc_intra_pred_ctxt_t *ps_intra_pred_ctxt, + UWORD8 u1_num_spatial_layers, UWORD8 u1_spatial_layer_id, + UWORD16 u2_mb_type, WORD32 i4_mb_x, WORD32 i4_mb_y, + WORD8 u1_base_mode_flag); + +extern void isvce_evaluate_IBL_mode(isvce_process_ctxt_t *ps_proc); + +extern void isvce_pad_mb_mode_buf(svc_intra_pred_ctxt_t *ps_intra_pred_ctxt, + UWORD8 u1_spatial_layer_id, UWORD8 u1_num_spatial_layers, + DOUBLE d_spatial_res_ratio, UWORD32 u4_wd, UWORD32 u4_ht); + +#endif diff --git a/encoder/svc/isvce_ibl_private_defs.h b/encoder/svc/isvce_ibl_private_defs.h new file mode 100644 index 0000000..7bd8169 --- /dev/null +++ b/encoder/svc/isvce_ibl_private_defs.h @@ -0,0 +1,94 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** +******************************************************************************* +* @file +* isvce_intra_pred_private_defs.h +* +* @brief +* Contains datatype and macro definitions used exclusively in +* residual prediction +* +******************************************************************************* +*/ + +#ifndef _ISVCE_IBL_PRIVATE_DEFS_H_ +#define _ISVCE_IBL_PRIVATE_DEFS_H_ + +#include "ih264_typedefs.h" +#include "isvc_defs.h" +#include "isvc_structs.h" +#include "isvce_structs.h" +#include "isvc_intra_resample.h" + +/* Structs */ +typedef struct intra_pred_mb_state_t +{ + coordinates_t s_offsets; + + coordinates_t s_ref_array_dims; + + WORD32 *pi4_ref_array_positions_x; + + WORD32 *pi4_ref_array_positions_y; + + coordinates_t *ps_ref_array_phases; + + coordinates_t s_min_pos; + + coordinates_t s_max_pos; + +} intra_pred_mb_state_t; + +typedef struct intra_pred_layer_state_t +{ + layer_resampler_props_t *ps_luma_props; + + layer_resampler_props_t *ps_chroma_props; + + intra_pred_mb_state_t *ps_luma_mb_states; + + intra_pred_mb_state_t *ps_chroma_mb_states; + + WORD8 *pi1_mb_mode; + + WORD32 i4_mb_mode_stride; + + /* buffer to store the reference + layer data before intra sampling */ + UWORD8 *pu1_refarray_buffer; + + UWORD8 *pu1_refarray_cb; + + UWORD8 *pu1_refarray_cr; + + WORD32 *pi4_temp_interpolation_buffer; + +} intra_pred_layer_state_t; + +typedef struct intra_pred_state_t +{ + /* Array of size numSpatialLayers */ + intra_pred_layer_state_t *ps_layer_state; + +} intra_pred_state_t; + +#endif diff --git a/encoder/svc/isvce_ilp_mv.c b/encoder/svc/isvce_ilp_mv.c new file mode 100644 index 0000000..9aa45a3 --- /dev/null +++ b/encoder/svc/isvce_ilp_mv.c @@ -0,0 +1,737 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** +******************************************************************************* +* @file +* isvce_ilp_mv.c +* +* @brief +* Contains functions used for deriving inter_layer MV's +* +******************************************************************************* +*/ +#include +#include +#include + +#include "ih264_typedefs.h" +#include "ih264_debug.h" +#include "isvc_macros.h" +#include "isvc_defs.h" +#include "isvce_defs.h" +#include "isvce_structs.h" +#include "isvce_ilp_mv_private_defs.h" +#include "isvce_ilp_mv.h" +#include "isvce_ilp_mv_utils.h" + +/** +******************************************************************************* +* +* @brief +* Returns size of buffers for storing ILP MV ctxt +* +* @param[in] u1_num_spatial_layers +* Num Spatial Layers +* +* @param[in] d_spatial_res_ratio +* Resolution Ratio b/w spatial layers +* +* @param[in] u4_wd +* Input Width +* +* @param[in] u4_ht +* Input Height +* +* @returns Size of buffers +* +******************************************************************************* +*/ +UWORD32 isvce_get_ilp_mv_ctxt_size(UWORD8 u1_num_spatial_layers, DOUBLE d_spatial_res_ratio, + UWORD32 u4_wd, UWORD32 u4_ht) +{ + UWORD32 u4_size = 0; + + if(u1_num_spatial_layers > 1) + { + WORD32 i; + + u4_size += MAX_PROCESS_CTXT * sizeof(svc_ilp_mv_ctxt_t); + u4_size += MAX_PROCESS_CTXT * sizeof(ilp_mv_state_t); + + u4_size += u1_num_spatial_layers * sizeof(ilp_mv_layer_state_t); + + for(i = u1_num_spatial_layers - 1; i >= 1; i--) + { + WORD32 i4_layer_luma_wd = + (WORD32) ((DOUBLE) u4_wd / + pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - i)) + + 0.99; + WORD32 i4_layer_luma_ht = + ((DOUBLE) u4_ht / pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - i)) + 0.99; + WORD32 i4_layer_luma_mbs = (i4_layer_luma_wd / MB_SIZE) * (i4_layer_luma_ht / MB_SIZE); + + u4_size += i4_layer_luma_mbs * sizeof(ilp_mv_mb_state_t); + } + } + + return u4_size; +} + +static FORCEINLINE void isvce_ref_layer_pu_and_mb_pos_init(layer_resampler_props_t *ps_layer_props, + ilp_mv_mb_state_t *ps_mb_state, + coordinates_t *ps_mb_pos, + UWORD32 u4_ref_wd, UWORD32 u4_ref_ht, + UWORD8 u1_field_pic_flag, + UWORD8 u1_field_mb_flag) +{ + UWORD32 i, j; + + coordinates_t(*aps_pu_positions)[MAX_PU_IN_MB_ROW] = ps_mb_state->as_pu_positions; + coordinates_t(*aps_mb_positions)[MAX_PU_IN_MB_ROW] = ps_mb_state->as_mb_positions; + + for(i = 0; i < MAX_PU_IN_MB_COL; i++) + { + UWORD32 u4_y_ref16; + + UWORD32 u4_yc = ps_mb_pos->i4_ordinate * ps_layer_props->u4_mb_ht + + (4 * i + 1) * (1 + u1_field_mb_flag - u1_field_pic_flag); + + u4_y_ref16 = + (u4_yc * ps_layer_props->u4_scale_y + (1 << (ps_layer_props->u4_shift_y - 1))) >> + ps_layer_props->u4_shift_y; + u4_y_ref16 = MIN(u4_y_ref16, u4_ref_ht - 1); + + for(j = 0; j < MAX_PU_IN_MB_ROW; j++) + { + UWORD32 u4_x_ref16; + + UWORD32 u4_xc = ps_mb_pos->i4_abscissa * ps_layer_props->u4_mb_wd + 4 * j + 1; + + u4_x_ref16 = + (u4_xc * ps_layer_props->u4_scale_x + (1 << (ps_layer_props->u4_shift_x - 1))) >> + ps_layer_props->u4_shift_x; + u4_x_ref16 = MIN(u4_x_ref16, u4_ref_wd - 1); + + aps_pu_positions[i][j].i4_abscissa = u4_x_ref16; + aps_pu_positions[i][j].i4_ordinate = u4_y_ref16; + + aps_mb_positions[i][j].i4_abscissa = (u4_x_ref16 / MB_SIZE); + aps_mb_positions[i][j].i4_ordinate = (u4_y_ref16 / MB_SIZE); + } + } +} + +static void isvce_ilp_mv_layer_state_init(ilp_mv_layer_state_t *ps_layer_state, + DOUBLE d_spatial_res_ratio, UWORD32 u4_wd, UWORD32 u4_ht) +{ + UWORD32 i, j; + + const UWORD8 u1_ref_layer_field_pic_flag = 0; + const UWORD8 u1_field_pic_flag = 0; + const UWORD8 u1_field_mb_flag = 0; + + ilp_mv_mb_state_t *ps_mb_states; + layer_resampler_props_t *ps_layer_props; + + UWORD32 u4_wd_in_mbs; + UWORD32 u4_ht_in_mbs; + + UWORD32 u4_ref_wd = (u4_wd / d_spatial_res_ratio); + UWORD32 u4_ref_ht = (u4_ht / d_spatial_res_ratio) * (1 + u1_ref_layer_field_pic_flag); + UWORD32 u4_scaled_wd = u4_wd; + UWORD32 u4_scaled_ht = u4_ht * (1 + u1_field_pic_flag); + + ps_mb_states = ps_layer_state->ps_mb_states; + ps_layer_props = ps_layer_state->ps_props; + + u4_wd_in_mbs = u4_scaled_wd / ps_layer_props->u4_mb_wd; + u4_ht_in_mbs = u4_scaled_ht / ps_layer_props->u4_mb_ht; + + ps_layer_state->s_mv_scale.i4_abscissa = ((u4_scaled_wd << 16) + (u4_ref_wd >> 1)) / u4_ref_wd; + ps_layer_state->s_mv_scale.i4_ordinate = ((u4_scaled_ht << 16) + (u4_ref_ht >> 1)) / u4_ref_ht; + + for(i = 0; i < u4_ht_in_mbs; i++) + { + for(j = 0; j < u4_wd_in_mbs; j++) + { + coordinates_t s_mb_pos = {j, i}; + + isvce_ref_layer_pu_and_mb_pos_init(ps_layer_props, &ps_mb_states[j + i * u4_wd_in_mbs], + &s_mb_pos, u4_ref_wd, u4_ref_ht, u1_field_pic_flag, + u1_field_mb_flag); + } + } +} + +/** +******************************************************************************* +* +* @brief +* Function to initialize svc ilp buffers +* +* @param[in] ps_codec +* Pointer to codec context +* +* @param[in] ps_mem_rec +* Pointer to memory allocated for input buffers +* +******************************************************************************* +*/ +void isvce_ilp_mv_ctxt_init(isvce_codec_t *ps_codec, iv_mem_rec_t *ps_mem_rec) +{ + WORD32 i, j; + + const WORD32 i4_num_proc_ctxts = sizeof(ps_codec->as_process) / sizeof(ps_codec->as_process[0]); + UWORD8 u1_num_spatial_layers = ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers; + + if(u1_num_spatial_layers > 1) + { + ilp_mv_layer_state_t *ps_layer_states; + ilp_mv_mb_state_t *aps_luma_mb_states[MAX_NUM_SPATIAL_LAYERS]; + + DOUBLE d_spatial_res_ratio = ps_codec->s_cfg.s_svc_params.d_spatial_res_ratio; + UWORD32 u4_wd = ps_codec->s_cfg.u4_wd; + UWORD32 u4_ht = ps_codec->s_cfg.u4_ht; + UWORD8 *pu1_buf = ps_mem_rec->pv_base; + WORD64 i8_alloc_mem_size = + isvce_get_ilp_mv_ctxt_size(u1_num_spatial_layers, d_spatial_res_ratio, u4_wd, u4_ht); + + for(i = 0; i < i4_num_proc_ctxts; i++) + { + ilp_mv_state_t *ps_ilp_mv_state; + svc_ilp_mv_ctxt_t *ps_ilp_mv_ctxt; + + isvce_process_ctxt_t *ps_proc = ps_codec->as_process + i; + + ps_ilp_mv_ctxt = ps_proc->ps_svc_ilp_mv_ctxt = (svc_ilp_mv_ctxt_t *) pu1_buf; + pu1_buf += sizeof(svc_ilp_mv_ctxt_t); + i8_alloc_mem_size -= sizeof(svc_ilp_mv_ctxt_t); + + ps_ilp_mv_ctxt->s_ilp_mv_constants.pv_state = pu1_buf; + ps_ilp_mv_state = (ilp_mv_state_t *) pu1_buf; + pu1_buf += sizeof(ilp_mv_state_t); + i8_alloc_mem_size -= sizeof(ilp_mv_state_t); + + if(0 == i) + { + ps_ilp_mv_state->ps_layer_state = (ilp_mv_layer_state_t *) pu1_buf; + ps_layer_states = ps_ilp_mv_state->ps_layer_state; + pu1_buf += u1_num_spatial_layers * sizeof(ps_ilp_mv_state->ps_layer_state[0]); + i8_alloc_mem_size -= + u1_num_spatial_layers * sizeof(ps_ilp_mv_state->ps_layer_state[0]); + } + else + { + ps_ilp_mv_state->ps_layer_state = ps_layer_states; + } + + ASSERT(i8_alloc_mem_size >= 0); + + if(0 == i) + { + for(j = u1_num_spatial_layers - 1; j >= 1; j--) + { + ilp_mv_layer_state_t *ps_layer = &ps_ilp_mv_state->ps_layer_state[j]; + + WORD32 i4_layer_luma_wd = + ((DOUBLE) u4_wd / pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - j)) + + 0.99; + WORD32 i4_layer_luma_ht = + ((DOUBLE) u4_ht / pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - j)) + + 0.99; + WORD32 i4_layer_luma_mbs = + (i4_layer_luma_wd / MB_SIZE) * (i4_layer_luma_ht / MB_SIZE); + + ps_layer->ps_mb_states = (ilp_mv_mb_state_t *) pu1_buf; + aps_luma_mb_states[j] = ps_layer->ps_mb_states; + pu1_buf += i4_layer_luma_mbs * sizeof(ps_layer->ps_mb_states[0]); + i8_alloc_mem_size -= u1_num_spatial_layers * sizeof(ps_layer->ps_mb_states[0]); + + ASSERT(i8_alloc_mem_size >= 0); + /* Asserts below verify that + * 'ps_codec->s_svc_ilp_data.aps_layer_resampler_props' is initialised + */ + ASSERT(ps_codec->s_svc_ilp_data.aps_layer_resampler_props[Y][j].u4_mb_wd == + MB_SIZE); + + ps_layer->ps_props = &ps_codec->s_svc_ilp_data.aps_layer_resampler_props[Y][j]; + + isvce_ilp_mv_layer_state_init(ps_layer, d_spatial_res_ratio, i4_layer_luma_wd, + i4_layer_luma_ht); + } + } + else + { + for(j = u1_num_spatial_layers - 1; j >= 1; j--) + { + ilp_mv_layer_state_t *ps_layer = &ps_ilp_mv_state->ps_layer_state[j]; + + ps_layer->ps_mb_states = aps_luma_mb_states[j]; + + ps_layer->ps_props = &ps_codec->s_svc_ilp_data.aps_layer_resampler_props[Y][j]; + } + } + } + } + else + { + for(i = 0; i < i4_num_proc_ctxts; i++) + { + ps_codec->as_process[i].ps_svc_ilp_mv_ctxt = NULL; + } + } +} + +static void isvce_get_ilp_mvs_for_me(svc_ilp_mv_ctxt_t *ps_ilp_mv_ctxt) +{ + svc_layer_data_t *ps_ref_layer_data; + ilp_mv_layer_state_t *ps_layer_state; + ilp_mv_mb_state_t *ps_mb_state; + isvce_mb_info_t *ps_ref_mb_info; + coordinates_t s_frame_dims; + coordinates_t s_frame_dims_in_mbs; + coordinates_t s_ref_frame_dims; + coordinates_t s_ref_frame_dims_in_mbs; + + bool b_is_mv_non_identical; + WORD32 i, j, k; + + ilp_mv_constants_t *ps_ilp_mv_constants = &ps_ilp_mv_ctxt->s_ilp_mv_constants; + ilp_mv_variables_t *ps_ilp_mv_variables = &ps_ilp_mv_ctxt->s_ilp_mv_variables; + ilp_mv_outputs_t *ps_ilp_mv_outputs = &ps_ilp_mv_ctxt->s_ilp_mv_outputs; + ilp_mv_state_t *ps_ilp_mv_state = (ilp_mv_state_t *) ps_ilp_mv_constants->pv_state; + svc_ilp_data_t *ps_svc_ilp_data = ps_ilp_mv_variables->ps_svc_ilp_data; + svc_au_data_t *ps_svc_au_data = ps_svc_ilp_data->ps_svc_au_data; + coordinates_t *ps_mb_pos = &ps_ilp_mv_variables->s_mb_pos; + const isvce_enc_pu_mv_t s_default_mv = {{0, 0}, -1}; + + UWORD8 u1_spatial_layer_id = ps_ilp_mv_variables->u1_spatial_layer_id; + WORD32 i4_num_ilp_mvs = 0; + + s_frame_dims.i4_abscissa = ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id].u4_width; + s_frame_dims.i4_ordinate = ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id].u4_height; + s_frame_dims_in_mbs.i4_abscissa = s_frame_dims.i4_abscissa / MB_SIZE; + s_frame_dims_in_mbs.i4_ordinate = s_frame_dims.i4_ordinate / MB_SIZE; + s_ref_frame_dims.i4_abscissa = + ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id - 1].u4_width; + s_ref_frame_dims.i4_ordinate = + ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id - 1].u4_height; + s_ref_frame_dims_in_mbs.i4_abscissa = s_ref_frame_dims.i4_abscissa / MB_SIZE; + s_ref_frame_dims_in_mbs.i4_ordinate = s_ref_frame_dims.i4_ordinate / MB_SIZE; + + ps_ref_layer_data = &ps_svc_au_data->ps_svc_layer_data[u1_spatial_layer_id - 1]; + ps_layer_state = &ps_ilp_mv_state->ps_layer_state[u1_spatial_layer_id]; + ps_mb_state = + &ps_layer_state->ps_mb_states[ps_mb_pos->i4_abscissa + + ps_mb_pos->i4_ordinate * s_frame_dims_in_mbs.i4_abscissa]; + + for(i = 0; i < MAX_PU_IN_MB_COL; i++) + { + for(j = 0; j < MAX_PU_IN_MB_ROW; j++) + { + b_is_mv_non_identical = true; + + ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L0] = s_default_mv; + ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L1] = s_default_mv; + + ps_ref_mb_info = + &ps_ref_layer_data->ps_mb_info[ps_mb_state->as_mb_positions[i][j].i4_abscissa + + ps_mb_state->as_mb_positions[i][j].i4_ordinate * + s_ref_frame_dims_in_mbs.i4_abscissa]; + + if((ps_ref_mb_info->u2_mb_type == P16x16) || (ps_ref_mb_info->u2_mb_type == B16x16)) + { + ps_ilp_mv_outputs->s_ilp_me_cands.e_mb_type[i4_num_ilp_mvs] = + ps_ref_mb_info->u2_mb_type; + + ps_ilp_mv_outputs->s_ilp_me_cands.ae_pred_mode[i4_num_ilp_mvs] = + ps_ref_mb_info->as_pu->u1_pred_mode; + + if(ps_ilp_mv_outputs->s_ilp_me_cands.ae_pred_mode[i4_num_ilp_mvs] != L0) + { + ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L1] = + ps_ref_mb_info->as_pu->as_me_info[L1]; + + ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L1].s_mv.i2_mvx = + (ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L1].s_mv.i2_mvx * + ps_layer_state->s_mv_scale.i4_abscissa + + 32768) >> + 16; + ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L1].s_mv.i2_mvy = + (ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L1].s_mv.i2_mvy * + ps_layer_state->s_mv_scale.i4_ordinate + + 32768) >> + 16; + } + + if(ps_ilp_mv_outputs->s_ilp_me_cands.ae_pred_mode[i4_num_ilp_mvs] != L1) + { + ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L0] = + ps_ref_mb_info->as_pu->as_me_info[L0]; + + ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L0].s_mv.i2_mvx = + (ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L0].s_mv.i2_mvx * + ps_layer_state->s_mv_scale.i4_abscissa + + 32768) >> + 16; + ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L0].s_mv.i2_mvy = + (ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L0].s_mv.i2_mvy * + ps_layer_state->s_mv_scale.i4_ordinate + + 32768) >> + 16; + } + + if(i4_num_ilp_mvs == 0) + { + i4_num_ilp_mvs++; + } + else + { + for(k = i4_num_ilp_mvs - 1; k >= 0; k--) + { + if((ps_ilp_mv_outputs->s_ilp_me_cands.e_mb_type[k] == + ps_ilp_mv_outputs->s_ilp_me_cands.e_mb_type[i4_num_ilp_mvs]) && + (ps_ilp_mv_outputs->s_ilp_me_cands.ae_pred_mode[k] == + ps_ilp_mv_outputs->s_ilp_me_cands.ae_pred_mode[i4_num_ilp_mvs]) && + isvce_check_identical_mv( + ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[k], + ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs], + ps_ilp_mv_outputs->s_ilp_me_cands.ae_pred_mode[k])) + { + b_is_mv_non_identical = false; + } + } + + if(b_is_mv_non_identical) + { + i4_num_ilp_mvs++; + } + } + } + else + { + ps_ilp_mv_outputs->s_ilp_me_cands.e_mb_type[i4_num_ilp_mvs] = INVALID_MB_TYPE; + } + } + } + + ps_ilp_mv_outputs->s_ilp_me_cands.u4_num_ilp_mvs = i4_num_ilp_mvs; + + for(i = 0; i < MAX_ILP_MV_IN_NBR_RGN; i++) + { + b_is_mv_non_identical = true; + + ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L0] = s_default_mv; + ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L1] = s_default_mv; + + if(ps_mb_pos->i4_abscissa + gai1_nbr_ilp_mv_map[i][0] >= 0 && + ps_mb_pos->i4_abscissa + gai1_nbr_ilp_mv_map[i][0] < s_frame_dims_in_mbs.i4_abscissa && + ps_mb_pos->i4_ordinate + gai1_nbr_ilp_mv_map[i][1] >= 0 && + ps_mb_pos->i4_ordinate + gai1_nbr_ilp_mv_map[i][1] < s_frame_dims_in_mbs.i4_ordinate) + { + ps_mb_state = + &ps_layer_state->ps_mb_states[(ps_mb_pos->i4_abscissa + gai1_nbr_ilp_mv_map[i][0]) + + (ps_mb_pos->i4_ordinate + gai1_nbr_ilp_mv_map[i][1]) * + s_frame_dims_in_mbs.i4_abscissa]; + + ps_ref_mb_info = + &ps_ref_layer_data->ps_mb_info[(ps_mb_state + ->as_mb_positions[gai1_nbr_ilp_mv_map[i][2]] + [gai1_nbr_ilp_mv_map[i][3]] + .i4_abscissa) + + ps_mb_state + ->as_mb_positions[gai1_nbr_ilp_mv_map[i][2]] + [gai1_nbr_ilp_mv_map[i][3]] + .i4_ordinate * + s_ref_frame_dims_in_mbs.i4_abscissa]; + + if((ps_ref_mb_info->u2_mb_type == P16x16) || (ps_ref_mb_info->u2_mb_type == B16x16)) + { + ps_ilp_mv_outputs->s_ilp_me_cands.e_mb_type[i4_num_ilp_mvs] = + ps_ref_mb_info->u2_mb_type; + + ps_ilp_mv_outputs->s_ilp_me_cands.ae_pred_mode[i4_num_ilp_mvs] = + ps_ref_mb_info->as_pu->u1_pred_mode; + + if(ps_ilp_mv_outputs->s_ilp_me_cands.ae_pred_mode[i4_num_ilp_mvs] != L0) + { + ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L1] = + ps_ref_mb_info->as_pu->as_me_info[L1]; + + ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L1].s_mv.i2_mvx = + (ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L1].s_mv.i2_mvx * + ps_layer_state->s_mv_scale.i4_abscissa + + 32768) >> + 16; + ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L1].s_mv.i2_mvy = + (ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L1].s_mv.i2_mvy * + ps_layer_state->s_mv_scale.i4_ordinate + + 32768) >> + 16; + } + + if(ps_ilp_mv_outputs->s_ilp_me_cands.ae_pred_mode[i4_num_ilp_mvs] != L1) + { + ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L0] = + ps_ref_mb_info->as_pu->as_me_info[L0]; + + ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L0].s_mv.i2_mvx = + (ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L0].s_mv.i2_mvx * + ps_layer_state->s_mv_scale.i4_abscissa + + 32768) >> + 16; + ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L0].s_mv.i2_mvy = + (ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L0].s_mv.i2_mvy * + ps_layer_state->s_mv_scale.i4_ordinate + + 32768) >> + 16; + } + + if(i4_num_ilp_mvs == 0) + { + i4_num_ilp_mvs++; + } + else + { + for(k = i4_num_ilp_mvs - 1; k >= 0; k--) + { + if((ps_ilp_mv_outputs->s_ilp_me_cands.e_mb_type[k] == + ps_ilp_mv_outputs->s_ilp_me_cands.e_mb_type[i4_num_ilp_mvs]) && + (ps_ilp_mv_outputs->s_ilp_me_cands.ae_pred_mode[k] == + ps_ilp_mv_outputs->s_ilp_me_cands.ae_pred_mode[i4_num_ilp_mvs]) && + isvce_check_identical_mv( + ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[k], + ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs], + ps_ilp_mv_outputs->s_ilp_me_cands.ae_pred_mode[k])) + b_is_mv_non_identical = false; + } + + if(b_is_mv_non_identical) + { + i4_num_ilp_mvs++; + } + } + } + else + { + ps_ilp_mv_outputs->s_ilp_me_cands.e_mb_type[i4_num_ilp_mvs] = INVALID_MB_TYPE; + } + } + } + + ps_ilp_mv_outputs->s_ilp_me_cands.u4_num_ilp_mvs_incl_nbrs = i4_num_ilp_mvs; +} + +void isvce_get_mb_ilp_mv(svc_ilp_mv_ctxt_t *ps_ilp_mv_ctxt) +{ + svc_layer_data_t *ps_ref_layer_data; + ilp_mv_layer_state_t *ps_layer_state; + ilp_mv_mb_state_t *ps_mb_state; + isvce_mb_info_t *ps_ref_mb_info; + coordinates_t s_frame_dims; + coordinates_t s_frame_dims_in_mbs; + coordinates_t s_ref_frame_dims; + coordinates_t s_ref_frame_dims_in_mbs; + + WORD32 i, j; + + ilp_mv_constants_t *ps_ilp_mv_constants = &ps_ilp_mv_ctxt->s_ilp_mv_constants; + ilp_mv_variables_t *ps_ilp_mv_variables = &ps_ilp_mv_ctxt->s_ilp_mv_variables; + ilp_mv_outputs_t *ps_ilp_mv_outputs = &ps_ilp_mv_ctxt->s_ilp_mv_outputs; + ilp_mv_state_t *ps_ilp_mv_state = (ilp_mv_state_t *) ps_ilp_mv_constants->pv_state; + svc_ilp_data_t *ps_svc_ilp_data = ps_ilp_mv_variables->ps_svc_ilp_data; + svc_au_data_t *ps_svc_au_data = ps_svc_ilp_data->ps_svc_au_data; + coordinates_t *ps_mb_pos = &ps_ilp_mv_variables->s_mb_pos; + const isvce_enc_pu_mv_t s_default_mv = {{0, 0}, -1}; + + UWORD8 u1_spatial_layer_id = ps_ilp_mv_variables->u1_spatial_layer_id; + + s_frame_dims.i4_abscissa = ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id].u4_width; + s_frame_dims.i4_ordinate = ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id].u4_height; + s_frame_dims_in_mbs.i4_abscissa = s_frame_dims.i4_abscissa / MB_SIZE; + s_frame_dims_in_mbs.i4_ordinate = s_frame_dims.i4_ordinate / MB_SIZE; + s_ref_frame_dims.i4_abscissa = + ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id - 1].u4_width; + s_ref_frame_dims.i4_ordinate = + ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id - 1].u4_height; + s_ref_frame_dims_in_mbs.i4_abscissa = s_ref_frame_dims.i4_abscissa / MB_SIZE; + s_ref_frame_dims_in_mbs.i4_ordinate = s_ref_frame_dims.i4_ordinate / MB_SIZE; + + ps_ref_layer_data = &ps_svc_au_data->ps_svc_layer_data[u1_spatial_layer_id - 1]; + ps_layer_state = &ps_ilp_mv_state->ps_layer_state[u1_spatial_layer_id]; + ps_mb_state = + &ps_layer_state->ps_mb_states[ps_mb_pos->i4_abscissa + + ps_mb_pos->i4_ordinate * s_frame_dims_in_mbs.i4_abscissa]; + + ps_ilp_mv_outputs->s_ilp_mv.as_mv[0][L0] = s_default_mv; + ps_ilp_mv_outputs->s_ilp_mv.as_mv[0][L1] = s_default_mv; + + ps_ref_mb_info = &ps_ref_layer_data->ps_mb_info[ps_mb_state->as_mb_positions[0][0].i4_abscissa + + ps_mb_state->as_mb_positions[0][0].i4_ordinate * + s_ref_frame_dims_in_mbs.i4_abscissa]; + + if((ps_ref_mb_info->u2_mb_type == P16x16) || (ps_ref_mb_info->u2_mb_type == B16x16)) + { + ps_ilp_mv_outputs->s_ilp_mv.e_mb_type = ps_ref_mb_info->u2_mb_type; + + ps_ilp_mv_outputs->s_ilp_mv.ae_pred_mode[0] = ps_ref_mb_info->as_pu->u1_pred_mode; + + if(ps_ilp_mv_outputs->s_ilp_mv.ae_pred_mode[0] != L0) + { + ps_ilp_mv_outputs->s_ilp_mv.as_mv[0][L1] = ps_ref_mb_info->as_pu->as_me_info[L1]; + } + + if(ps_ilp_mv_outputs->s_ilp_mv.ae_pred_mode[0] != L1) + { + ps_ilp_mv_outputs->s_ilp_mv.as_mv[0][L0] = ps_ref_mb_info->as_pu->as_me_info[L0]; + } + } + else + { + ps_ilp_mv_outputs->s_ilp_mv.e_mb_type = INVALID_MB_TYPE; + } + + /* Function call to get non 16x16 ilp mvs for me candidates */ + isvce_get_ilp_mvs_for_me(ps_ilp_mv_ctxt); + + /* Encoder supports only 16x16 partition. */ + /* The code below ensures only 16x16 ILP MV's are used */ + for(i = 0; i < MAX_PU_IN_MB_COL; i++) + { + for(j = 0; j < MAX_PU_IN_MB_ROW; j++) + { + bool b_unsupported_mv; + + ps_ref_mb_info = + &ps_ref_layer_data->ps_mb_info[ps_mb_state->as_mb_positions[i][j].i4_abscissa + + ps_mb_state->as_mb_positions[i][j].i4_ordinate * + s_ref_frame_dims_in_mbs.i4_abscissa]; + + b_unsupported_mv = + (ps_ref_mb_info->u2_mb_type != ps_ilp_mv_outputs->s_ilp_mv.e_mb_type) || + (ps_ilp_mv_outputs->s_ilp_mv.ae_pred_mode[0] != + ps_ref_mb_info->as_pu->u1_pred_mode) || + !isvce_check_identical_mv(ps_ilp_mv_outputs->s_ilp_mv.as_mv[0], + ps_ref_mb_info->as_pu->as_me_info, + ps_ilp_mv_outputs->s_ilp_mv.ae_pred_mode[0]); + + if(b_unsupported_mv) + { + ps_ilp_mv_outputs->s_ilp_mv.as_mv[0][L0] = s_default_mv; + ps_ilp_mv_outputs->s_ilp_mv.as_mv[0][L1] = s_default_mv; + ps_ilp_mv_outputs->s_ilp_mv.e_mb_type = INVALID_MB_TYPE; + + return; + } + } + } + + if(ps_ilp_mv_outputs->s_ilp_mv.e_mb_type != INVALID_MB_TYPE) + { + if(ps_ilp_mv_outputs->s_ilp_mv.ae_pred_mode[0] != L0) + { + ps_ilp_mv_outputs->s_ilp_mv.as_mv[0][L1].s_mv.i2_mvx = + (ps_ilp_mv_outputs->s_ilp_mv.as_mv[0][L1].s_mv.i2_mvx * + ps_layer_state->s_mv_scale.i4_abscissa + + 32768) >> + 16; + ps_ilp_mv_outputs->s_ilp_mv.as_mv[0][L1].s_mv.i2_mvy = + (ps_ilp_mv_outputs->s_ilp_mv.as_mv[0][L1].s_mv.i2_mvy * + ps_layer_state->s_mv_scale.i4_ordinate + + 32768) >> + 16; + } + + if(ps_ilp_mv_outputs->s_ilp_mv.ae_pred_mode[0] != L1) + { + ps_ilp_mv_outputs->s_ilp_mv.as_mv[0][L0].s_mv.i2_mvx = + (ps_ilp_mv_outputs->s_ilp_mv.as_mv[0][L0].s_mv.i2_mvx * + ps_layer_state->s_mv_scale.i4_abscissa + + 32768) >> + 16; + ps_ilp_mv_outputs->s_ilp_mv.as_mv[0][L0].s_mv.i2_mvy = + (ps_ilp_mv_outputs->s_ilp_mv.as_mv[0][L0].s_mv.i2_mvy * + ps_layer_state->s_mv_scale.i4_ordinate + + 32768) >> + 16; + } + } + else + { + ps_ilp_mv_outputs->s_ilp_mv.e_mb_type = INVALID_MB_TYPE; + ps_ilp_mv_outputs->s_ilp_mv.ae_pred_mode[0] = INVALID_PRED_MODE; + } +} + +void isvce_mvp_idx_eval(isvce_mb_info_t *ps_mb_info, isvce_enc_pu_mv_t *ps_spatial_mvp, + isvce_enc_pu_mv_t *ps_ilp_mvp, UWORD8 *pu1_mvd_costs) +{ + if(USE_ILP_MV_AS_MVP && ps_ilp_mvp && !ps_mb_info->u1_is_intra && + (ps_mb_info->u2_mb_type != PSKIP) && (ps_mb_info->u2_mb_type != BSKIP) && + (ps_mb_info->u2_mb_type != BASE_MODE)) + { + isvce_enc_pu_mv_t *ps_mv; + isvce_enc_pu_mv_t *aps_mvps[2]; + + WORD32 ai4_mvd_costs[2]; + WORD32 i, j; + + for(i = 0; i < NUM_PRED_DIRS; i++) + { + PRED_MODE_T e_pred_mode = (PRED_MODE_T) i; + PRED_MODE_T e_cmpl_pred_mode = (e_pred_mode == L0) ? L1 : L0; + + if(ps_mb_info->as_pu->u1_pred_mode != e_pred_mode) + { + ps_mv = &ps_mb_info->as_pu->as_me_info[e_cmpl_pred_mode]; + aps_mvps[0] = &ps_spatial_mvp[e_cmpl_pred_mode]; + aps_mvps[1] = &ps_ilp_mvp[e_cmpl_pred_mode]; + + for(j = 0; j < 2; j++) + { + if((aps_mvps[j]->i1_ref_idx != -1) && + (!j || ((j == 1) && (ps_mv->i1_ref_idx == aps_mvps[j]->i1_ref_idx)))) + { + ai4_mvd_costs[j] = + pu1_mvd_costs[ps_mv->s_mv.i2_mvx - aps_mvps[j]->s_mv.i2_mvx] + + pu1_mvd_costs[ps_mv->s_mv.i2_mvy - aps_mvps[j]->s_mv.i2_mvy]; + } + else + { + ai4_mvd_costs[j] = INT32_MAX; + } + } + + ps_mb_info->as_pu->au1_mvp_idx[e_cmpl_pred_mode] = + ai4_mvd_costs[0] > ai4_mvd_costs[1]; + } + else + { + ps_mb_info->as_pu->au1_mvp_idx[e_cmpl_pred_mode] = 0; + } + } + } + else + { + ps_mb_info->as_pu->au1_mvp_idx[L0] = 0; + ps_mb_info->as_pu->au1_mvp_idx[L1] = 0; + } +} diff --git a/encoder/svc/isvce_ilp_mv.h b/encoder/svc/isvce_ilp_mv.h new file mode 100644 index 0000000..f9d1df4 --- /dev/null +++ b/encoder/svc/isvce_ilp_mv.h @@ -0,0 +1,115 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** +******************************************************************************* +* @file +* isvce_ilp_mv.h +* +* @brief +* Contains function declarations for function declared in +* isvce_ilp_mv.c +* +* @author +* ittiam +* +* @remarks +* None +* +******************************************************************************* +*/ + +#ifndef _ISVCE_ILP_MV_H_ +#define _ISVCE_ILP_MV_H_ + +#include "ih264_typedefs.h" +#include "iv2.h" +#include "isvc_macros.h" +#include "ih264_debug.h" +#include "isvc_defs.h" +#include "isvc_structs.h" +#include "isvce_defs.h" +#include "isvce_pred_structs.h" +#include "isvce_structs.h" +#include "isvce_structs.h" +#include "isvce_utils.h" + +/* Structs */ +typedef struct ilp_mv_constants_t +{ + void *pv_state; +} ilp_mv_constants_t; + +typedef struct ilp_mv_outputs_t +{ + ilp_mv_t s_ilp_mv; + + ilp_me_cands_t s_ilp_me_cands; + +} ilp_mv_outputs_t; + +typedef struct ilp_mv_variables_t +{ + svc_ilp_data_t *ps_svc_ilp_data; + + coordinates_t s_mb_pos; + + UWORD8 u1_spatial_layer_id; +} ilp_mv_variables_t; + +typedef struct svc_ilp_mv_ctxt_t +{ + ilp_mv_constants_t s_ilp_mv_constants; + + ilp_mv_variables_t s_ilp_mv_variables; + + ilp_mv_outputs_t s_ilp_mv_outputs; + +} svc_ilp_mv_ctxt_t; + +/* Function declarations */ +extern UWORD32 isvce_get_ilp_mv_ctxt_size(UWORD8 u1_num_spatial_layers, DOUBLE d_spatial_res_ratio, + UWORD32 u4_wd, UWORD32 u4_ht); + +extern void isvce_ilp_mv_ctxt_init(isvce_codec_t *ps_codec, iv_mem_rec_t *ps_mem_rec); + +extern void isvce_get_mb_ilp_mv(svc_ilp_mv_ctxt_t *ps_ilp_mv_ctxt); + +extern void isvce_mvp_idx_eval(isvce_mb_info_t *ps_mb_info, isvce_enc_pu_mv_t *ps_spatial_mvp, + isvce_enc_pu_mv_t *ps_ilp_mvp, UWORD8 *pu1_mvd_costs); + +static FORCEINLINE UWORD8 isvce_is_ilp_mv_winning_mv(isvce_mb_info_t *ps_mb_info, + ilp_mv_t *ps_ilp_mv) +{ + if(ENABLE_ILP_MV && ps_ilp_mv && (ps_mb_info->u2_mb_type != PSKIP) && + (ps_mb_info->u2_mb_type != BSKIP)) + { + if((ps_mb_info->u2_mb_type == ps_ilp_mv->e_mb_type) && + (((PRED_MODE_T) ps_mb_info->as_pu->u1_pred_mode) == ps_ilp_mv->ae_pred_mode[0])) + { + return isvce_check_identical_mv(ps_mb_info->as_pu->as_me_info, ps_ilp_mv->as_mv[0], + ps_ilp_mv->ae_pred_mode[0]); + } + } + + return 0; +} + +#endif diff --git a/encoder/svc/isvce_ilp_mv_private_defs.h b/encoder/svc/isvce_ilp_mv_private_defs.h new file mode 100644 index 0000000..2893ca0 --- /dev/null +++ b/encoder/svc/isvce_ilp_mv_private_defs.h @@ -0,0 +1,68 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** +******************************************************************************* +* @file +* isvc_svc_ilp_mv_private_defs.h +* +* @brief +* Contains datatype and macro definitions used exclusively in +* ILP MV derivations +* +******************************************************************************* +*/ + +#ifndef _ISVCE_ILP_MV_PRIVATE_DEFS_H_ +#define _ISVCE_ILP_MV_PRIVATE_DEFS_H_ + +#include "ih264_typedefs.h" +#include "isvc_defs.h" +#include "isvc_structs.h" +#include "isvce_structs.h" + +/* Structs */ +/* Offsets, etc used for resLayer MV upsampling */ +/* Derived as per 'G.8.6.1.1' for all MB's once during init */ +typedef struct ilp_mv_mb_state_t +{ + coordinates_t as_pu_positions[MAX_PU_IN_MB_COL][MAX_PU_IN_MB_ROW]; + + coordinates_t as_mb_positions[MAX_PU_IN_MB_COL][MAX_PU_IN_MB_ROW]; +} ilp_mv_mb_state_t; + +typedef struct ilp_mv_layer_state_t +{ + layer_resampler_props_t *ps_props; + + ilp_mv_mb_state_t *ps_mb_states; + + coordinates_t s_mv_scale; + +} ilp_mv_layer_state_t; + +typedef struct ilp_mv_state_t +{ + /* Array of size numSpatialLayers */ + ilp_mv_layer_state_t *ps_layer_state; + +} ilp_mv_state_t; + +#endif diff --git a/encoder/svc/isvce_ilp_mv_utils.h b/encoder/svc/isvce_ilp_mv_utils.h new file mode 100644 index 0000000..af9708c --- /dev/null +++ b/encoder/svc/isvce_ilp_mv_utils.h @@ -0,0 +1,111 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ +/** +******************************************************************************* +* @file +* isvce_ilp_mv_utils.h +* +* @brief +* Defs to perform experiments in ilp mv +* +* +* @remarks +* None +* +******************************************************************************* +*/ +#ifndef _ISVCE_ILP_MV_UTILS_H_ +#define _ISVCE_ILP_MV_UTILS_H_ + +#include + +#include "ih264_typedefs.h" +#include "isvc_defs.h" +#include "isvc_macros.h" +#include "isvce_pred_structs.h" +#include "isvce_structs.h" + +#define MAX_CAND_IF_NUM_ILP_MV_LT_2 8 +#define MAX_CAND_IF_NUM_ILP_MV_GTEQ_2 6 + +/* nbr_mb.x, nbr_mb.y, pu_pos.x, pu_pos.y */ +#define NBR_PU_AND_MB_POS 4 + +static const WORD8 gai1_nbr_ilp_mv_map[MAX_ILP_MV_IN_NBR_RGN][NBR_PU_AND_MB_POS] = { + {-1, 0, 3, 0}, + {0, -1, 0, 3}, + {1, 0, 0, 0}, + {0, 1, 0, 0}, +}; + +/** +******************************************************************************* +* +* @brief +* This function checks if the max difference between ILP MVs is less than four +* or not if number of ILP MVs is greater than or equal to two +* +* @param[in] ps_me +* Pointer to ilp_me_cands +* +* @returns One if number of ILP MVs is greater than equal to two and max +* difference between them is less than 4 otherwise returns zero +* +* @remarks none +* +******************************************************************************* +*/ +static FORCEINLINE bool isvce_check_max_mv_diff_lt_4(ilp_me_cands_t *ps_ilp_me_cands, + WORD32 i4_reflist) +{ + UWORD32 i, j; + UWORD32 u4_mv_diff_x, u4_mv_diff_y; + + for(i = 1; i < ps_ilp_me_cands->u4_num_ilp_mvs; i++) + { + for(j = 0; j < i; j++) + { + if(((ps_ilp_me_cands->ae_pred_mode[i] == ((PRED_MODE_T) i4_reflist)) || + ((ps_ilp_me_cands->ae_pred_mode[i] == BI))) && + ((ps_ilp_me_cands->ae_pred_mode[j] == ((PRED_MODE_T) i4_reflist)) || + ((ps_ilp_me_cands->ae_pred_mode[j] == BI)))) + { + u4_mv_diff_x = ABS(ps_ilp_me_cands->as_mv[i][i4_reflist].s_mv.i2_mvx - + ps_ilp_me_cands->as_mv[j][i4_reflist].s_mv.i2_mvx); + + u4_mv_diff_y = ABS(ps_ilp_me_cands->as_mv[i][i4_reflist].s_mv.i2_mvy - + ps_ilp_me_cands->as_mv[j][i4_reflist].s_mv.i2_mvy); + + if(u4_mv_diff_x >= 4 || u4_mv_diff_y >= 4) + { + return false; + } + } + else + { + return false; + } + } + } + + return true; +} + +#endif diff --git a/encoder/svc/isvce_interface_structs.h b/encoder/svc/isvce_interface_structs.h new file mode 100644 index 0000000..3ac59fb --- /dev/null +++ b/encoder/svc/isvce_interface_structs.h @@ -0,0 +1,116 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** +******************************************************************************* +* @file +* isvce_interface_structs.h +* +* @brief +* Contains struct definition used for interface objects such as input, +* output, and rec +* +* @author +* ittiam +* +* @remarks +* None +* +******************************************************************************* +*/ + +#ifndef _ISVCE_INTERFACE_STRUCTS_H_ +#define _ISVCE_INTERFACE_STRUCTS_H_ + +#include "isvc_structs.h" + +typedef struct isvce_raw_inp_buf_t +{ + /** Descriptor of raw buffer */ + iv_raw_buf_t s_raw_buf; + + /** Lower 32bits of time stamp corresponding to the above buffer */ + UWORD32 u4_timestamp_low; + + /** Upper 32bits of time stamp corresponding to the above buffer */ + UWORD32 u4_timestamp_high; + + /** Flag to indicate if the current buffer is last buffer */ + UWORD32 u4_is_last; + + /** Flag to indicate if mb info is sent along with input buffer */ + UWORD32 u4_mb_info_type; + + /** Flag to indicate the size of mb info structure */ + UWORD32 u4_mb_info_size; + + /** Buffer containing mb info if isvce_mb_info_type is non-zero */ + void *pv_mb_info; + + /** Flag to indicate if pic info is sent along with input buffer */ + UWORD32 u4_pic_info_type; + + /** Buffer containing pic info if isvce_mb_info_type is non-zero */ + void *pv_pic_info; + + /** SEI CCV params flag */ + UWORD8 u1_sei_ccv_params_present_flag; + + /** SEI CCV params info */ + sei_ccv_params_t s_sei_ccv; + +} isvce_raw_inp_buf_t; + +typedef struct +{ + /** Descriptor of bitstream buffer */ + iv_bits_buf_t as_bits_buf[MAX_NUM_SPATIAL_LAYERS]; + + /** Lower 32bits of time stamp corresponding to the above buffer */ + UWORD32 u4_timestamp_low; + + /** Upper 32bits of time stamp corresponding to the above buffer */ + UWORD32 u4_timestamp_high; + + /** Flag to indicate if the current buffer is last buffer */ + UWORD32 u4_is_last; + +} isvce_out_buf_t; + +typedef struct +{ + /** Descriptor of picture buffer */ + svc_au_buf_t s_pic_buf; + + /** Lower 32bits of time stamp corresponding to the above buffer */ + UWORD32 u4_timestamp_low; + + /** Upper 32bits of time stamp corresponding to the above buffer */ + UWORD32 u4_timestamp_high; + + /** Flag to indicate if the current buffer is last buffer */ + UWORD32 u4_is_last; + + /** Picture count corresponding to current picture */ + WORD32 i4_pic_cnt; + +} isvce_rec_buf_t; + +#endif diff --git a/encoder/svc/isvce_intra_modes_eval.c b/encoder/svc/isvce_intra_modes_eval.c new file mode 100644 index 0000000..58eb7b9 --- /dev/null +++ b/encoder/svc/isvce_intra_modes_eval.c @@ -0,0 +1,2334 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** +******************************************************************************* +* @file +* isvce_intra_modes_eval.c +* +* @brief +* This file contains definitions of routines that perform rate distortion +* analysis on a macroblock if they are to be coded as intra. +* +* @author +* ittiam +* +* @par List of Functions: +* - isvce_derive_neighbor_availability_of_mbs() +* - isvce_derive_ngbr_avbl_of_mb_partitions() +* - isvce_evaluate_intra16x16_modes_for_least_cost_rdoptoff() +* - isvce_evaluate_intra8x8_modes_for_least_cost_rdoptoff() +* - isvce_evaluate_intra4x4_modes_for_least_cost_rdoptoff() +* - isvce_evaluate_intra4x4_modes_for_least_cost_rdopton() +* - isvce_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff() +* - isvce_evaluate_intra16x16_modes() +* - isvce_evaluate_intra4x4_modes() +* - isvce_evaluate_intra_chroma_modes() +* +* @remarks +* None +* +******************************************************************************* +*/ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System include files */ +#include +#include +#include +#include + +/* User include files */ +#include "ih264e_config.h" +#include "ih264_typedefs.h" +#include "iv2.h" +#include "ive2.h" +#include "ih264_debug.h" +#include "isvc_defs.h" +#include "isvc_macros.h" +#include "ih264_intra_pred_filters.h" +#include "isvc_structs.h" +#include "isvc_common_tables.h" +#include "isvc_trans_quant_itrans_iquant.h" +#include "isvc_inter_pred_filters.h" +#include "isvc_mem_fns.h" +#include "ih264_padding.h" +#include "ih264_size_defs.h" +#include "ih264_deblk_edge_filters.h" +#include "isvc_cabac_tables.h" +#include "isvce_defs.h" +#include "ime_distortion_metrics.h" +#include "ih264e_error.h" +#include "ih264e_bitstream.h" +#include "ime_defs.h" +#include "ime_structs.h" +#include "irc_cntrl_param.h" +#include "irc_frame_info_collector.h" +#include "isvce_rate_control.h" +#include "isvce_cabac_structs.h" +#include "isvce_structs.h" +#include "ih264e_intra_modes_eval.h" +#include "isvce_globals.h" +#include "ime_platform_macros.h" + +/*****************************************************************************/ +/* Function Definitions */ +/*****************************************************************************/ + +/** +****************************************************************************** +* +* @brief +* derivation process for subblock/partition availability +* +* @par Description +* Calculates the availability of the left, top, topright and topleft subblock +* or partitions. +* +* @param[in] ps_proc_ctxt +* pointer to macroblock context (handle) +* +* @param[in] i1_pel_pos_x +* column position of the pel wrt the current block +* +* @param[in] i1_pel_pos_y +* row position of the pel in wrt current block +* +* @remarks Assumptions: before calling this function it is assumed that +* the neighbor availability of the current macroblock is already derived. +* Based on table 6-3 of H264 specification +* +* @return availability status (yes or no) +* +****************************************************************************** +*/ +UWORD8 isvce_derive_ngbr_avbl_of_mb_partitions(block_neighbors_t *ps_ngbr_avbl, WORD8 i1_pel_pos_x, + WORD8 i1_pel_pos_y) +{ + UWORD8 u1_neighbor_avail = 0; + + /**********************************************************************/ + /* values of i1_pel_pos_x in the range 0-15 inclusive correspond to */ + /* various columns of a macroblock */ + /* */ + /* values of i1_pel_pos_y in the range 0-15 inclusive correspond to */ + /* various rows of a macroblock */ + /* */ + /* other values of i1_pel_pos_x & i1_pel_pos_y represents elements */ + /* outside the bound of an mb ie., represents its neighbors. */ + /**********************************************************************/ + if(i1_pel_pos_x < 0) + { /* column(-1) */ + if(i1_pel_pos_y < 0) + { /* row(-1) */ + u1_neighbor_avail = ps_ngbr_avbl->u1_mb_d; /* current mb topleft availability */ + } + else if(i1_pel_pos_y >= 0 && i1_pel_pos_y < 16) + { /* all rows of a macroblock */ + u1_neighbor_avail = ps_ngbr_avbl->u1_mb_a; /* current mb left availability */ + } + else /* if (i1_pel_pos_y >= 16) */ + { /* rows(+16) */ + u1_neighbor_avail = 0; /* current mb bottom left availability */ + } + } + else if(i1_pel_pos_x >= 0 && i1_pel_pos_x < 16) + { /* all columns of a macroblock */ + if(i1_pel_pos_y < 0) + { /* row(-1) */ + u1_neighbor_avail = ps_ngbr_avbl->u1_mb_b; /* current mb top availability */ + } + else if(i1_pel_pos_y >= 0 && i1_pel_pos_y < 16) + { /* all rows of a macroblock */ + u1_neighbor_avail = 1; /* current mb availability */ + /* availability of the partition is dependent on the position of the + * partition inside the mb */ + /* although the availability is declared as 1 in all cases these needs to + * be corrected somewhere else and this is not done in here */ + } + else /* if (i1_pel_pos_y >= 16) */ + { /* rows(+16) */ + u1_neighbor_avail = 0; /* current mb bottom availability */ + } + } + else if(i1_pel_pos_x >= 16) + { /* column(+16) */ + if(i1_pel_pos_y < 0) + { /* row(-1) */ + u1_neighbor_avail = ps_ngbr_avbl->u1_mb_c; /* current mb top right availability */ + } + else /* if (i1_pel_pos_y >= 0) */ + { /* all other rows */ + u1_neighbor_avail = 0; /* current mb right & bottom right availability */ + } + } + + return u1_neighbor_avail; +} + +/** +****************************************************************************** +* +* @brief +* evaluate best intra 16x16 mode (rate distortion opt off) +* +* @par Description +* This function evaluates all the possible intra 16x16 modes and finds the mode +* that best represents the macro-block (least distortion) and occupies fewer +* bits in the bit-stream. +* +* @param[in] ps_proc_ctxt +* pointer to process context (handle) +* +* @remarks +* Ideally the cost of encoding a macroblock is calculated as +* (distortion + lambda*rate). Where distortion is SAD/SATD,... between the +* input block and the reconstructed block and rate is the number of bits taken +* to place the macroblock in the bit-stream. In this routine the rate does not +* exactly point to the total number of bits it takes, rather it points to +*header bits necessary for encoding the macroblock. Assuming the deltaQP, cbp +*bits and residual bits fall in to texture bits the number of bits taken to +*encoding mbtype is considered as rate, we compute cost. Further we will +*approximate the distortion as the deviation b/w input and the predicted block +*as opposed to input and reconstructed block. +* +* NOTE: As per the Document JVT-O079, for intra 16x16 macroblock, +* the SAD and cost are one and the same. +* +* @return none +* +****************************************************************************** +*/ + +void isvce_evaluate_intra16x16_modes_for_least_cost_rdoptoff(isvce_process_ctxt_t *ps_proc) +{ + /* Codec Context */ + isvce_codec_t *ps_codec = ps_proc->ps_codec; + isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns; + mem_fxns_t *ps_mem_fxns = &ps_isa_dependent_fxns->s_mem_fxns; + + /* SAD(distortion metric) of an 8x8 block */ + WORD32 i4_mb_distortion = INT_MAX, i4_mb_distortion_least = INT_MAX; + + /* lambda */ + UWORD32 u4_lambda = ps_proc->u4_lambda; + + /* cost = distortion + lambda*rate */ + WORD32 i4_mb_cost = INT_MAX, i4_mb_cost_least = INT_MAX; + + /* intra mode */ + UWORD32 u4_intra_mode, u4_best_intra_16x16_mode = DC_I16x16; + + /* neighbor pels for intra prediction */ + UWORD8 *pu1_ngbr_pels_i16 = ps_proc->au1_ngbr_pels; + + /* neighbor availability */ + WORD32 i4_ngbr_avbl; + + /* pointer to src macro block */ + UWORD8 *pu1_curr_mb = ps_proc->s_src_buf_props.as_component_bufs[0].pv_data; + UWORD8 *pu1_ref_mb = ps_proc->s_rec_buf_props.as_component_bufs[0].pv_data; + + /* pointer to prediction macro block */ + UWORD8 *pu1_pred_mb_intra_16x16 = ps_proc->pu1_pred_mb_intra_16x16; + UWORD8 *pu1_pred_mb_intra_16x16_plane = ps_proc->pu1_pred_mb_intra_16x16_plane; + + /* strides */ + WORD32 i4_src_strd = ps_proc->s_src_buf_props.as_component_bufs[0].i4_data_stride; + WORD32 i4_pred_strd = ps_proc->i4_pred_strd; + WORD32 i4_rec_strd = ps_proc->s_rec_buf_props.as_component_bufs[0].i4_data_stride; + + /* pointer to neighbors left, top, topleft */ + UWORD8 *pu1_mb_a = pu1_ref_mb - 1; + UWORD8 *pu1_mb_b = pu1_ref_mb - i4_rec_strd; + UWORD8 *pu1_mb_d = pu1_mb_b - 1; + UWORD8 u1_mb_a, u1_mb_b, u1_mb_d; + /* valid intra modes map */ + UWORD32 u4_valid_intra_modes; + + /* lut for valid intra modes */ + const UWORD8 u1_valid_intra_modes[8] = {4, 6, 4, 6, 5, 7, 5, 15}; + + UWORD32 i, u4_enable_fast_sad = 0, offset = 0; + isvce_mb_info_t *ps_top_mb_syn_ele = ps_proc->s_nbr_info.ps_top_row_mb_info + ps_proc->i4_mb_x; + UWORD32 u4_constrained_intra_pred = + ps_codec->au4_constrained_intra_pred[ps_proc->u1_spatial_layer_id]; + + if(ps_proc->i4_slice_type != ISLICE) + { + /* Offset for MBtype */ + offset = (ps_proc->i4_slice_type == PSLICE) ? 5 : 23; + u4_enable_fast_sad = ps_proc->s_me_ctxt.u4_enable_fast_sad; + } + + /* locating neighbors that are available for prediction */ + + /* gather prediction pels from the neighbors, if particular set is not + * available it is set to zero*/ + /* left pels */ + u1_mb_a = + ((ps_proc->ps_ngbr_avbl->u1_mb_a) && + (u4_constrained_intra_pred ? (ps_proc->s_nbr_info.ps_left_mb_info->u1_is_intra && + !ps_proc->s_nbr_info.ps_left_mb_info->u1_base_mode_flag) + : 1)); + if(u1_mb_a) + { + for(i = 0; i < 16; i++) pu1_ngbr_pels_i16[16 - 1 - i] = pu1_mb_a[i * i4_rec_strd]; + } + else + { + ps_mem_fxns->pf_mem_set_mul8(pu1_ngbr_pels_i16, 0, MB_SIZE); + } + /* top pels */ + u1_mb_b = ((ps_proc->ps_ngbr_avbl->u1_mb_b) && + (u4_constrained_intra_pred + ? (ps_top_mb_syn_ele->u1_is_intra && !ps_top_mb_syn_ele->u1_base_mode_flag) + : 1)); + if(u1_mb_b) + { + ps_mem_fxns->pf_mem_cpy_mul8(pu1_ngbr_pels_i16 + 16 + 1, pu1_mb_b, 16); + } + else + { + ps_mem_fxns->pf_mem_set_mul8(pu1_ngbr_pels_i16 + 16 + 1, 0, MB_SIZE); + } + /* topleft pels */ + u1_mb_d = ((ps_proc->ps_ngbr_avbl->u1_mb_d) && + (u4_constrained_intra_pred ? (ps_top_mb_syn_ele[-1].u1_is_intra && + !ps_top_mb_syn_ele[-1].u1_base_mode_flag) + : 1)); + if(u1_mb_d) + { + pu1_ngbr_pels_i16[16] = *pu1_mb_d; + } + else + { + pu1_ngbr_pels_i16[16] = 0; + } + + i4_ngbr_avbl = (u1_mb_a) + (u1_mb_b << 2) + (u1_mb_d << 1); + ps_proc->i4_ngbr_avbl_16x16_mb = i4_ngbr_avbl; + + /* set valid intra modes for evaluation */ + u4_valid_intra_modes = u1_valid_intra_modes[i4_ngbr_avbl]; + + if(ps_codec->s_cfg.u4_enc_speed_preset == IVE_FAST || + ps_codec->s_cfg.u4_enc_speed_preset == IVE_FASTEST) + u4_valid_intra_modes &= ~(1 << PLANE_I16x16); + + /* evaluate b/w HORZ_I16x16, VERT_I16x16 & DC_I16x16 */ + ps_codec->pf_ih264e_evaluate_intra16x16_modes( + pu1_curr_mb, pu1_ngbr_pels_i16, pu1_pred_mb_intra_16x16, i4_src_strd, i4_pred_strd, + i4_ngbr_avbl, &u4_intra_mode, &i4_mb_distortion_least, u4_valid_intra_modes); + + /* cost = distortion + lambda*rate */ + i4_mb_cost_least = i4_mb_distortion_least; + + if(((u4_valid_intra_modes >> 3) & 1) != 0) + { + /* intra prediction for PLANE mode*/ + (ps_codec->apf_intra_pred_16_l)[PLANE_I16x16]( + pu1_ngbr_pels_i16, pu1_pred_mb_intra_16x16_plane, 0, i4_pred_strd, i4_ngbr_avbl); + + /* evaluate distortion between the actual blk and the estimated blk for the + * given mode */ + ps_codec->apf_compute_sad_16x16[u4_enable_fast_sad]( + pu1_curr_mb, pu1_pred_mb_intra_16x16_plane, i4_src_strd, i4_pred_strd, i4_mb_cost_least, + &i4_mb_distortion); + + /* cost = distortion + lambda*rate */ + i4_mb_cost = i4_mb_distortion; + + /* update the least cost information if necessary */ + if(i4_mb_cost < i4_mb_distortion_least) + { + u4_intra_mode = PLANE_I16x16; + + i4_mb_cost_least = i4_mb_cost; + i4_mb_distortion_least = i4_mb_distortion; + } + } + + u4_best_intra_16x16_mode = u4_intra_mode; + + DEBUG("%d partition cost, %d intra mode\n", i4_mb_cost_least * 32, u4_best_intra_16x16_mode); + + ps_proc->u1_l_i16_mode = u4_best_intra_16x16_mode; + + /* cost = distortion + lambda*rate */ + i4_mb_cost_least = + i4_mb_distortion_least + u4_lambda * u1_uev_codelength[offset + u4_best_intra_16x16_mode]; + + /* update the type of the mb if necessary */ + if(i4_mb_cost_least < ps_proc->i4_mb_cost) + { + ps_proc->i4_mb_cost = i4_mb_cost_least; + ps_proc->i4_mb_distortion = i4_mb_distortion_least; + ps_proc->ps_mb_info->u2_mb_type = I16x16; + } +} + +/** +****************************************************************************** +* +* @brief +* evaluate best intra 8x8 mode (rate distortion opt on) +* +* @par Description +* This function evaluates all the possible intra 8x8 modes and finds the mode +* that best represents the macro-block (least distortion) and occupies fewer +* bits in the bit-stream. +* +* @param[in] ps_proc_ctxt +* pointer to proc ctxt +* +* @remarks Ideally the cost of encoding a macroblock is calculated as +* (distortion + lambda*rate). Where distortion is SAD/SATD,... between the +* input block and the reconstructed block and rate is the number of bits taken +* to place the macroblock in the bit-stream. In this routine the rate does not +* exactly point to the total number of bits it takes, rather it points to +*header bits necessary for encoding the macroblock. Assuming the deltaQP, cbp +*bits and residual bits fall in to texture bits the number of bits taken to +*encoding mbtype is considered as rate, we compute cost. Further we will +*approximate the distortion as the deviation b/w input and the predicted block +*as opposed to input and reconstructed block. +* +* NOTE: TODO: This function needs to be tested +* +* @return none +* +****************************************************************************** +*/ +void isvce_evaluate_intra8x8_modes_for_least_cost_rdoptoff(isvce_process_ctxt_t *ps_proc) +{ + /* Codec Context */ + isvce_codec_t *ps_codec = ps_proc->ps_codec; + + /* SAD(distortion metric) of an 4x4 block */ + WORD32 i4_partition_distortion, i4_partition_distortion_least = INT_MAX, + i4_total_distortion = 0; + + /* lambda */ + UWORD32 u4_lambda = ps_proc->u4_lambda; + + /* cost = distortion + lambda*rate */ + WORD32 i4_partition_cost, i4_partition_cost_least, i4_total_cost = u4_lambda; + + /* cost due to mbtype */ + UWORD32 u4_cost_one_bit = u4_lambda, u4_cost_four_bits = 4 * u4_lambda; + + /* intra mode */ + UWORD32 u4_intra_mode, u4_best_intra_8x8_mode = DC_I8x8, u4_estimated_intra_8x8_mode; + + /* neighbor pels for intra prediction */ + UWORD8 *pu1_ngbr_pels_i8 = ps_proc->au1_ngbr_pels; + + /* pointer to curr partition */ + UWORD8 *pu1_mb_curr; + + /* pointer to prediction macro block */ + UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb; + + /* strides */ + WORD32 i4_src_strd = ps_proc->s_src_buf_props.as_component_bufs[0].i4_data_stride; + WORD32 i4_pred_strd = ps_proc->i4_pred_strd; + + /* neighbors left, top, top right, top left */ + UWORD8 *pu1_mb_a; + UWORD8 *pu1_mb_b; + UWORD8 *pu1_mb_d; + + /* neighbor availability */ + WORD32 i4_ngbr_avbl; + block_neighbors_t s_ngbr_avbl; + + /* temp vars */ + UWORD32 b8, u4_pix_x, u4_pix_y; + UWORD32 u4_constrained_intra_pred = + ps_codec->au4_constrained_intra_pred[ps_proc->u1_spatial_layer_id]; + block_neighbors_t s_ngbr_avbl_MB; + + /* ngbr mb syntax information */ + UWORD8 *pu1_top_mb_intra_modes = + ps_proc->s_nbr_info.ps_top_mb_intra_modes[ps_proc->i4_mb_x].au1_intra_modes; + isvce_mb_info_t *ps_top_mb_syn_ele = ps_proc->s_nbr_info.ps_top_row_mb_info + ps_proc->i4_mb_x; + isvce_mb_info_t *ps_top_right_mb_syn_ele = ps_top_mb_syn_ele + 1; + /* valid intra modes map */ + UWORD32 u4_valid_intra_modes; + + if(ps_proc->ps_ngbr_avbl->u1_mb_c) + { + ps_top_right_mb_syn_ele = ps_top_mb_syn_ele + 1; + } + /* left pels */ + s_ngbr_avbl_MB.u1_mb_a = + ((ps_proc->ps_ngbr_avbl->u1_mb_a) && + (u4_constrained_intra_pred ? (ps_proc->s_nbr_info.ps_left_mb_info->u1_is_intra && + !ps_proc->s_nbr_info.ps_left_mb_info->u1_base_mode_flag) + : 1)); + + /* top pels */ + s_ngbr_avbl_MB.u1_mb_b = ((ps_proc->ps_ngbr_avbl->u1_mb_b) && + (u4_constrained_intra_pred ? (ps_top_mb_syn_ele->u1_is_intra && + !ps_top_mb_syn_ele->u1_base_mode_flag) + : 1)); + + /* topleft pels */ + s_ngbr_avbl_MB.u1_mb_d = + ((ps_proc->ps_ngbr_avbl->u1_mb_d) && + (u4_constrained_intra_pred + ? (ps_top_mb_syn_ele[-1].u1_is_intra && !ps_top_mb_syn_ele[-1].u1_base_mode_flag) + : 1)); + + /* top right */ + s_ngbr_avbl_MB.u1_mb_c = + ((ps_proc->ps_ngbr_avbl->u1_mb_c) && + (u4_constrained_intra_pred ? (ps_top_right_mb_syn_ele->u1_is_intra && + !ps_top_right_mb_syn_ele->u1_base_mode_flag) + : 1)); + + for(b8 = 0; b8 < 4; b8++) + { + u4_pix_x = (b8 & 0x01) << 3; + u4_pix_y = (b8 >> 1) << 3; + + pu1_mb_curr = ((UWORD8 *) ps_proc->s_src_buf_props.as_component_bufs[0].pv_data) + + u4_pix_x + (u4_pix_y * i4_src_strd); + /* when rdopt is off, we use the input as reference for constructing + * prediction buffer */ + /* as opposed to using the recon pels. (open loop intra prediction) */ + pu1_mb_a = pu1_mb_curr - 1; /* pointer to left macro block */ + pu1_mb_b = pu1_mb_curr - i4_src_strd; /* pointer to top macro block */ + pu1_mb_d = pu1_mb_b - 1; /* pointer to top left macro block */ + + /* locating neighbors that are available for prediction */ + /* TODO : update the neighbor availability information basing on constrained + * intra pred information */ + /* TODO : i4_ngbr_avbl is only being used in DC mode. Can the DC mode be + * split in to distinct routines */ + /* basing on neighbors available and hence evade the computation of neighbor + * availability totally. */ + s_ngbr_avbl.u1_mb_a = isvce_derive_ngbr_avbl_of_mb_partitions( + &s_ngbr_avbl_MB, u4_pix_x - 1, u4_pix_y); /* xD = -1, yD = 0 */ + s_ngbr_avbl.u1_mb_b = isvce_derive_ngbr_avbl_of_mb_partitions( + &s_ngbr_avbl_MB, u4_pix_x, u4_pix_y - 1); /* xD = 0, yD = -1 */ + s_ngbr_avbl.u1_mb_c = isvce_derive_ngbr_avbl_of_mb_partitions( + &s_ngbr_avbl_MB, u4_pix_x + 8, u4_pix_y - 1); /* xD = BLK_8x8_SIZE, yD = -1 */ + s_ngbr_avbl.u1_mb_d = isvce_derive_ngbr_avbl_of_mb_partitions( + &s_ngbr_avbl_MB, u4_pix_x - 1, u4_pix_y - 1); /* xD = -1, yD = -1 */ + + /* i4_ngbr_avbl = blk_a * LEFT_MB_AVAILABLE_MASK + blk_b * + * TOP_MB_AVAILABLE_MASK + blk_c * TOP_RIGHT_MB_AVAILABLE_MASK + blk_d * + * TOP_LEFT_MB_AVAILABLE_MASK */ + i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) + + (s_ngbr_avbl.u1_mb_b << 2) + (s_ngbr_avbl.u1_mb_c << 3) + + (s_ngbr_avbl.u1_mb_a << 4); + /* if top partition is available and top right is not available for intra + * prediction, then */ + /* padd top right samples using top sample and make top right also available + */ + /* i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) + + * (s_ngbr_avbl.u1_mb_b << 2) + ((s_ngbr_avbl.u1_mb_b | + * s_ngbr_avbl.u1_mb_c) << 3); */ + ps_proc->ai4_neighbor_avail_8x8_subblks[b8] = i4_ngbr_avbl; + + ih264_intra_pred_luma_8x8_mode_ref_filtering(pu1_mb_a, pu1_mb_b, pu1_mb_d, pu1_ngbr_pels_i8, + i4_src_strd, i4_ngbr_avbl); + + i4_partition_cost_least = INT_MAX; + /* set valid intra modes for evaluation */ + u4_valid_intra_modes = 0x1ff; + + if(!s_ngbr_avbl.u1_mb_b) + { + u4_valid_intra_modes &= ~(1 << VERT_I4x4); + u4_valid_intra_modes &= ~(1 << DIAG_DL_I4x4); + u4_valid_intra_modes &= ~(1 << VERT_L_I4x4); + } + if(!s_ngbr_avbl.u1_mb_a) + { + u4_valid_intra_modes &= ~(1 << HORZ_I4x4); + u4_valid_intra_modes &= ~(1 << HORZ_U_I4x4); + } + if(!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b || !s_ngbr_avbl.u1_mb_d) + { + u4_valid_intra_modes &= ~(1 << DIAG_DR_I4x4); + u4_valid_intra_modes &= ~(1 << VERT_R_I4x4); + u4_valid_intra_modes &= ~(1 << HORZ_D_I4x4); + } + + /* estimate the intra 8x8 mode for the current partition (for evaluating + * cost) */ + if(!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b) + { + u4_estimated_intra_8x8_mode = DC_I8x8; + } + else + { + UWORD32 u4_left_intra_8x8_mode = DC_I8x8; + UWORD32 u4_top_intra_8x8_mode = DC_I8x8; + + if(u4_pix_x == 0) + { + if(ps_proc->s_nbr_info.ps_left_mb_info->u2_mb_type == I8x8) + { + u4_left_intra_8x8_mode = + ps_proc->s_nbr_info.ps_left_mb_intra_modes->au1_intra_modes[b8 + 1]; + } + else if(ps_proc->s_nbr_info.ps_left_mb_info->u2_mb_type == I4x4) + { + u4_left_intra_8x8_mode = ps_proc->s_nbr_info.ps_left_mb_intra_modes + ->au1_intra_modes[(b8 + 1) * 4 + 2]; + } + } + else + { + u4_left_intra_8x8_mode = ps_proc->au1_intra_luma_mb_8x8_modes[b8 - 1]; + } + + if(u4_pix_y == 0) + { + if(ps_top_mb_syn_ele->u2_mb_type == I8x8) + { + u4_top_intra_8x8_mode = pu1_top_mb_intra_modes[b8 + 2]; + } + else if(ps_top_mb_syn_ele->u2_mb_type == I4x4) + { + u4_top_intra_8x8_mode = pu1_top_mb_intra_modes[(b8 + 2) * 4 + 2]; + } + } + else + { + u4_top_intra_8x8_mode = ps_proc->au1_intra_luma_mb_8x8_modes[b8 - 2]; + } + + u4_estimated_intra_8x8_mode = MIN(u4_left_intra_8x8_mode, u4_top_intra_8x8_mode); + } + + /* perform intra mode 8x8 evaluation */ + for(u4_intra_mode = VERT_I8x8; u4_valid_intra_modes != 0; + u4_intra_mode++, u4_valid_intra_modes >>= 1) + { + if((u4_valid_intra_modes & 1) == 0) continue; + + /* intra prediction */ + (ps_codec->apf_intra_pred_8_l)[u4_intra_mode](pu1_ngbr_pels_i8, pu1_pred_mb, 0, + i4_pred_strd, i4_ngbr_avbl); + + /* evaluate distortion between the actual blk and the estimated blk for + * the given mode */ + ime_compute_sad_8x8(pu1_mb_curr, pu1_pred_mb, i4_src_strd, i4_pred_strd, + i4_partition_cost_least, &i4_partition_distortion); + + i4_partition_cost = + i4_partition_distortion + ((u4_estimated_intra_8x8_mode == u4_intra_mode) + ? u4_cost_one_bit + : u4_cost_four_bits); + + /* update the least cost information if necessary */ + if(i4_partition_cost < i4_partition_cost_least) + { + i4_partition_cost_least = i4_partition_cost; + i4_partition_distortion_least = i4_partition_distortion; + u4_best_intra_8x8_mode = u4_intra_mode; + } + } + /* macroblock distortion */ + i4_total_cost += i4_partition_cost_least; + i4_total_distortion += i4_partition_distortion_least; + /* mb partition mode */ + ps_proc->au1_intra_luma_mb_8x8_modes[b8] = u4_best_intra_8x8_mode; + } + + /* update the type of the mb if necessary */ + if(i4_total_cost < ps_proc->i4_mb_cost) + { + ps_proc->i4_mb_cost = i4_total_cost; + ps_proc->i4_mb_distortion = i4_total_distortion; + ps_proc->ps_mb_info->u2_mb_type = I8x8; + } +} + +/** +****************************************************************************** +* +* @brief +* evaluate best intra 4x4 mode (rate distortion opt off) +* +* @par Description +* This function evaluates all the possible intra 4x4 modes and finds the mode +* that best represents the macro-block (least distortion) and occupies fewer +* bits in the bit-stream. +* +* @param[in] ps_proc_ctxt +* pointer to proc ctxt +* +* @remarks +* Ideally the cost of encoding a macroblock is calculated as +* (distortion + lambda*rate). Where distortion is SAD/SATD,... between the +* input block and the reconstructed block and rate is the number of bits taken +* to place the macroblock in the bit-stream. In this routine the rate does not +* exactly point to the total number of bits it takes, rather it points to +*header bits necessary for encoding the macroblock. Assuming the deltaQP, cbp +*bits and residual bits fall in to texture bits the number of bits taken to +*encoding mbtype is considered as rate, we compute cost. Further we will +*approximate the distortion as the deviation b/w input and the predicted block +*as opposed to input and reconstructed block. +* +* NOTE: As per the Document JVT-O079, for the whole intra 4x4 macroblock, +* 24*lambda is added to the SAD before comparison with the best SAD for +* inter prediction. This is an empirical value to prevent using too many intra +* blocks. +* +* @return none +* +****************************************************************************** +*/ +void isvce_evaluate_intra4x4_modes_for_least_cost_rdoptoff(isvce_process_ctxt_t *ps_proc) +{ + /* Codec Context */ + isvce_codec_t *ps_codec = ps_proc->ps_codec; + + /* SAD(distortion metric) of an 4x4 block */ + WORD32 i4_partition_distortion_least = INT_MAX, i4_total_distortion = 0; + + /* lambda */ + UWORD32 u4_lambda = ps_proc->u4_lambda; + + /* cost = distortion + lambda*rate */ + WORD32 i4_partition_cost_least, i4_total_cost = (24 + 1) * u4_lambda; + + /* cost due to mbtype */ + UWORD32 u4_cost_one_bit = u4_lambda, u4_cost_four_bits = 4 * u4_lambda; + + /* intra mode */ + UWORD32 u4_best_intra_4x4_mode = DC_I4x4, u4_estimated_intra_4x4_mode; + + /* neighbor pels for intra prediction */ + UWORD8 *pu1_ngbr_pels_i4 = ps_proc->au1_ngbr_pels; + + /* pointer to curr partition */ + UWORD8 *pu1_mb_curr; + + /* pointer to prediction macro block */ + UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb; + + /* strides */ + WORD32 i4_src_strd = ps_proc->s_src_buf_props.as_component_bufs[0].i4_data_stride; + WORD32 i4_pred_strd = ps_proc->i4_pred_strd; + + /* neighbors left, top, top right, top left */ + UWORD8 *pu1_mb_a; + UWORD8 *pu1_mb_b; + UWORD8 *pu1_mb_c; + UWORD8 *pu1_mb_d; + + /* neighbor availability */ + WORD32 i4_ngbr_avbl; + block_neighbors_t s_ngbr_avbl; + + /* temp vars */ + UWORD32 i, b8, b4, u4_blk_x, u4_blk_y, u4_pix_x, u4_pix_y; + + /* ngbr sub mb modes */ + UWORD8 *pu1_top_mb_intra_modes = + ps_proc->s_nbr_info.ps_top_mb_intra_modes[ps_proc->i4_mb_x].au1_intra_modes; + isvce_mb_info_t *ps_top_mb_syn_ele = ps_proc->s_nbr_info.ps_top_row_mb_info + ps_proc->i4_mb_x; + isvce_mb_info_t *ps_top_right_mb_syn_ele = ps_top_mb_syn_ele + 1; + + /* valid intra modes map */ + UWORD32 u4_valid_intra_modes; + UWORD16 u2_valid_modes[8] = {4, 262, 4, 262, 141, 399, 141, 511}; + + UWORD32 u4_constrained_intra_pred = + ps_codec->au4_constrained_intra_pred[ps_proc->u1_spatial_layer_id]; + UWORD8 u1_mb_a, u1_mb_b, u1_mb_c, u1_mb_d; + if(ps_proc->ps_ngbr_avbl->u1_mb_c) + { + ps_top_right_mb_syn_ele = ps_top_mb_syn_ele + 1; + } + /* left pels */ + u1_mb_a = + ((ps_proc->ps_ngbr_avbl->u1_mb_a) && + (u4_constrained_intra_pred ? (ps_proc->s_nbr_info.ps_left_mb_info->u1_is_intra && + !ps_proc->s_nbr_info.ps_left_mb_info->u1_base_mode_flag) + : 1)); + + /* top pels */ + u1_mb_b = ((ps_proc->ps_ngbr_avbl->u1_mb_b) && + (u4_constrained_intra_pred + ? (ps_top_mb_syn_ele->u1_is_intra && !ps_top_mb_syn_ele->u1_base_mode_flag) + : 1)); + + /* topleft pels */ + u1_mb_d = ((ps_proc->ps_ngbr_avbl->u1_mb_d) && + (u4_constrained_intra_pred ? (ps_top_mb_syn_ele[-1].u1_is_intra && + !ps_top_mb_syn_ele[-1].u1_base_mode_flag) + : 1)); + + /* top right */ + u1_mb_c = ((ps_proc->ps_ngbr_avbl->u1_mb_c) && + (u4_constrained_intra_pred ? (ps_top_right_mb_syn_ele->u1_is_intra && + !ps_top_right_mb_syn_ele->u1_base_mode_flag) + : 1)); + + i4_ngbr_avbl = (u1_mb_a) + (u1_mb_d << 1) + (u1_mb_b << 2) + (u1_mb_c << 3); + memcpy(ps_proc->au1_ngbr_avbl_4x4_subblks, gau1_ih264_4x4_ngbr_avbl[i4_ngbr_avbl], 16); + + for(b8 = 0; b8 < 4; b8++) + { + u4_blk_x = (b8 & 0x01) << 3; + u4_blk_y = (b8 >> 1) << 3; + for(b4 = 0; b4 < 4; b4++) + { + u4_pix_x = u4_blk_x + ((b4 & 0x01) << 2); + u4_pix_y = u4_blk_y + ((b4 >> 1) << 2); + + pu1_mb_curr = ((UWORD8 *) ps_proc->s_src_buf_props.as_component_bufs[0].pv_data) + + u4_pix_x + (u4_pix_y * i4_src_strd); + /* when rdopt is off, we use the input as reference for constructing + * prediction buffer */ + /* as opposed to using the recon pels. (open loop intra prediction) */ + pu1_mb_a = pu1_mb_curr - 1; /* pointer to left macro block */ + pu1_mb_b = pu1_mb_curr - i4_src_strd; /* pointer to top macro block */ + pu1_mb_c = pu1_mb_b + 4; /* pointer to top macro block */ + pu1_mb_d = pu1_mb_b - 1; /* pointer to top left macro block */ + + /* locating neighbors that are available for prediction */ + /* TODO : update the neighbor availability information basing on + * constrained intra pred information */ + /* TODO : i4_ngbr_avbl is only being used in DC mode. Can the DC mode be + * split in to distinct routines */ + /* basing on neighbors available and hence evade the computation of + * neighbor availability totally. */ + + i4_ngbr_avbl = ps_proc->au1_ngbr_avbl_4x4_subblks[(b8 << 2) + b4]; + s_ngbr_avbl.u1_mb_a = (i4_ngbr_avbl & 0x1); + s_ngbr_avbl.u1_mb_d = (i4_ngbr_avbl & 0x2) >> 1; + s_ngbr_avbl.u1_mb_b = (i4_ngbr_avbl & 0x4) >> 2; + s_ngbr_avbl.u1_mb_c = (i4_ngbr_avbl & 0x8) >> 3; + /* set valid intra modes for evaluation */ + u4_valid_intra_modes = u2_valid_modes[i4_ngbr_avbl & 0x7]; + + /* if top partition is available and top right is not available for intra + * prediction, then */ + /* padd top right samples using top sample and make top right also + * available */ + /* i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) + + * (s_ngbr_avbl.u1_mb_b << 2) + ((s_ngbr_avbl.u1_mb_b | + * s_ngbr_avbl.u1_mb_c) << 3); */ + + /* gather prediction pels from the neighbors */ + if(s_ngbr_avbl.u1_mb_a) + { + for(i = 0; i < 4; i++) pu1_ngbr_pels_i4[4 - 1 - i] = pu1_mb_a[i * i4_src_strd]; + } + else + { + memset(pu1_ngbr_pels_i4, 0, 4); + } + + if(s_ngbr_avbl.u1_mb_b) + { + memcpy(pu1_ngbr_pels_i4 + 4 + 1, pu1_mb_b, 4); + } + else + { + memset(pu1_ngbr_pels_i4 + 5, 0, 4); + } + + if(s_ngbr_avbl.u1_mb_d) + pu1_ngbr_pels_i4[4] = *pu1_mb_d; + else + pu1_ngbr_pels_i4[4] = 0; + + if(s_ngbr_avbl.u1_mb_c) + { + memcpy(pu1_ngbr_pels_i4 + 8 + 1, pu1_mb_c, 4); + } + else if(s_ngbr_avbl.u1_mb_b) + { + memset(pu1_ngbr_pels_i4 + 8 + 1, pu1_ngbr_pels_i4[8], 4); + s_ngbr_avbl.u1_mb_c = s_ngbr_avbl.u1_mb_b; + } + + i4_partition_cost_least = INT_MAX; + + /* predict the intra 4x4 mode for the current partition (for evaluating + * cost) */ + if(!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b) + { + u4_estimated_intra_4x4_mode = DC_I4x4; + } + else + { + UWORD32 u4_left_intra_4x4_mode = DC_I4x4; + UWORD32 u4_top_intra_4x4_mode = DC_I4x4; + + if(u4_pix_x == 0) + { + if(ps_proc->s_nbr_info.ps_left_mb_info->u2_mb_type == I4x4) + { + u4_left_intra_4x4_mode = + ps_proc->s_nbr_info.ps_left_mb_intra_modes + ->au1_intra_modes[gau1_raster_to_zscan_map[3 + u4_pix_y]]; + } + else if(ps_proc->s_nbr_info.ps_left_mb_info->u2_mb_type == I8x8) + { + u4_left_intra_4x4_mode = + ps_proc->s_nbr_info.ps_left_mb_intra_modes->au1_intra_modes[b8 + 1]; + } + } + else + { + u4_left_intra_4x4_mode = + ps_proc->au1_intra_luma_mb_4x4_modes + [gau1_raster_to_zscan_map[(u4_pix_x >> 2) + u4_pix_y - 1]]; + } + + if(u4_pix_y == 0) + { + if(ps_top_mb_syn_ele->u2_mb_type == I4x4) + { + u4_top_intra_4x4_mode = + pu1_top_mb_intra_modes[gau1_raster_to_zscan_map[12 + (u4_pix_x >> 2)]]; + } + else if(ps_top_mb_syn_ele->u2_mb_type == I8x8) + { + u4_top_intra_4x4_mode = pu1_top_mb_intra_modes[b8 + 2]; + } + } + else + { + u4_top_intra_4x4_mode = + ps_proc->au1_intra_luma_mb_4x4_modes + [gau1_raster_to_zscan_map[(u4_pix_x >> 2) + u4_pix_y - 4]]; + } + + u4_estimated_intra_4x4_mode = MIN(u4_left_intra_4x4_mode, u4_top_intra_4x4_mode); + } + + ps_proc->au1_predicted_intra_luma_mb_4x4_modes[(b8 << 2) + b4] = + u4_estimated_intra_4x4_mode; + + /* mode evaluation and prediction */ + ps_codec->pf_ih264e_evaluate_intra_4x4_modes( + pu1_mb_curr, pu1_ngbr_pels_i4, pu1_pred_mb, i4_src_strd, i4_pred_strd, i4_ngbr_avbl, + &u4_best_intra_4x4_mode, &i4_partition_cost_least, u4_valid_intra_modes, u4_lambda, + u4_estimated_intra_4x4_mode); + + i4_partition_distortion_least = + i4_partition_cost_least - ((u4_estimated_intra_4x4_mode == u4_best_intra_4x4_mode) + ? u4_cost_one_bit + : u4_cost_four_bits); + + DEBUG("%d partition cost, %d intra mode\n", i4_partition_cost_least, + u4_best_intra_4x4_mode); + /* macroblock distortion */ + i4_total_distortion += i4_partition_distortion_least; + i4_total_cost += i4_partition_cost_least; + /* mb partition mode */ + ps_proc->au1_intra_luma_mb_4x4_modes[(b8 << 2) + b4] = u4_best_intra_4x4_mode; + } + } + + /* update the type of the mb if necessary */ + if(i4_total_cost < ps_proc->i4_mb_cost) + { + ps_proc->i4_mb_cost = i4_total_cost; + ps_proc->i4_mb_distortion = i4_total_distortion; + ps_proc->ps_mb_info->u2_mb_type = I4x4; + } +} + +/** +****************************************************************************** +* +* @brief evaluate best intra 4x4 mode (rate distortion opt on) +* +* @par Description +* This function evaluates all the possible intra 4x4 modes and finds the mode +* that best represents the macro-block (least distortion) and occupies fewer +* bits in the bit-stream. +* +* @param[in] ps_proc_ctxt +* pointer to proc ctxt +* +* @remarks +* Ideally the cost of encoding a macroblock is calculated as +* (distortion + lambda*rate). Where distortion is SAD/SATD,... between the +* input block and the reconstructed block and rate is the number of bits taken +* to place the macroblock in the bit-stream. In this routine the rate does not +* exactly point to the total number of bits it takes, rather it points to +*header bits necessary for encoding the macroblock. Assuming the deltaQP, cbp +*bits and residual bits fall in to texture bits the number of bits taken to +*encoding mbtype is considered as rate, we compute cost. Further we will +*approximate the distortion as the deviation b/w input and the predicted block +*as opposed to input and reconstructed block. +* +* NOTE: As per the Document JVT-O079, for the whole intra 4x4 macroblock, +* 24*lambda is added to the SAD before comparison with the best SAD for +* inter prediction. This is an empirical value to prevent using too many intra +* blocks. +* +* @return none +* +****************************************************************************** +*/ +void isvce_evaluate_intra4x4_modes_for_least_cost_rdopton(isvce_process_ctxt_t *ps_proc) +{ + block_neighbors_t s_ngbr_avbl; + buffer_container_t s_src; + buffer_container_t s_pred; + buffer_container_t s_recon; + buffer_container_t s_quant_coeffs; + buffer_container_t s_res_pred; + + /* neighbors left, top, top right, top left */ + UWORD8 *pu1_mb_a; + UWORD8 *pu1_mb_b; + UWORD8 *pu1_mb_c; + UWORD8 *pu1_mb_d; + UWORD8 *pu1_mb_curr; + UWORD8 *pu1_mb_ref_left, *pu1_mb_ref_top; + UWORD8 *pu1_ref_mb_intra_4x4; + WORD32 i4_ref_strd_left, i4_ref_strd_top; + WORD32 i4_ngbr_avbl; + UWORD32 i, b8, b4, u4_blk_x, u4_blk_y, u4_pix_x, u4_pix_y; + /* valid intra modes map */ + UWORD32 u4_valid_intra_modes; + /* Dummy variable for 4x4 trans function */ + WORD16 i2_dc_dummy; + UWORD8 u1_mb_a, u1_mb_b, u1_mb_c, u1_mb_d; + + isvce_codec_t *ps_codec = ps_proc->ps_codec; + quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0]; + isvce_mb_info_t *ps_top_mb = ps_proc->s_nbr_info.ps_top_row_mb_info + ps_proc->i4_mb_x; + isvce_mb_info_t *ps_top_right_mb = ps_top_mb + 1; + isvce_mb_info_t *ps_top_left_mb = ps_top_mb - 1; + isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns; + enc_loop_fxns_t *ps_enc_loop_fxns = &ps_isa_dependent_fxns->s_enc_loop_fxns; + resi_trans_quant_constants_t s_resi_trans_quant_constants = { + .pu2_scale_matrix = ps_qp_params->pu2_scale_mat, + .pu2_threshold_matrix = ps_qp_params->pu2_thres_mat, + .u4_qbits = ps_qp_params->u1_qbits, + .u4_round_factor = ps_qp_params->u4_dead_zone}; + iq_it_res_rec_constants_t s_iq_it_res_rec_constants = { + .pu2_iscal_mat = ps_qp_params->pu2_iscale_mat, + .pu2_weigh_mat = ps_qp_params->pu2_weigh_mat, + .u4_qp_div_6 = ps_qp_params->u1_qp_div}; + + const UWORD16 u2_valid_modes[8] = {4, 262, 4, 262, 141, 399, 141, 511}; + WORD32 i4_partition_distortion_least = INT_MAX, i4_total_distortion = 0; + UWORD32 u4_lambda = ps_proc->u4_lambda; + WORD32 i4_partition_cost_least, i4_total_cost = (24 + 1) * u4_lambda; + /* cost due to mbtype */ + UWORD32 u4_cost_one_bit = u4_lambda, u4_cost_four_bits = 4 * u4_lambda; + UWORD32 u4_best_intra_4x4_mode = DC_I4x4, u4_estimated_intra_4x4_mode; + UWORD8 *pu1_ngbr_pels_i4 = ps_proc->au1_ngbr_pels; + WORD16 *pi2_quant_coeffs = ps_proc->pi2_res_buf_intra_4x4; + UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb; + WORD32 i4_src_strd = ps_proc->s_src_buf_props.as_component_bufs[0].i4_data_stride; + WORD32 i4_pred_strd = ps_proc->i4_pred_strd; + UWORD8 *pu1_nnz = (UWORD8 *) ps_proc->au4_nnz_intra_4x4; + UWORD8 *pu1_top_mb_intra_modes = + ps_proc->s_nbr_info.ps_top_mb_intra_modes[ps_proc->i4_mb_x].au1_intra_modes; + UWORD32 u4_constrained_intra_pred = + ps_codec->au4_constrained_intra_pred[ps_proc->u1_spatial_layer_id]; + UWORD8 u1_resi_trans_fxn_idx = isvc_get_resi_trans_quant_variant_idx(0); + UWORD8 u1_iq_it_recon_fxn_idx = isvc_get_iq_it_recon_variant_idx(1, 0); + + s_res_pred = ps_proc->ps_mb_res_buf->as_component_bufs[Y]; + + /* compute ngbr availability for sub blks */ + if(ps_proc->ps_ngbr_avbl->u1_mb_c) + { + ps_top_right_mb = ps_top_mb + 1; + } + + /* left pels */ + u1_mb_a = + ((ps_proc->ps_ngbr_avbl->u1_mb_a) && + (u4_constrained_intra_pred ? (ps_proc->s_nbr_info.ps_left_mb_info->u1_is_intra && + !ps_proc->s_nbr_info.ps_left_mb_info->u1_base_mode_flag) + : 1)); + + /* top pels */ + u1_mb_b = + ((ps_proc->ps_ngbr_avbl->u1_mb_b) && + (u4_constrained_intra_pred ? (ps_top_mb->u1_is_intra && !ps_top_mb->u1_base_mode_flag) + : 1)); + + /* topleft pels */ + u1_mb_d = ((ps_proc->ps_ngbr_avbl->u1_mb_d) && + (u4_constrained_intra_pred + ? (ps_top_left_mb->u1_is_intra && !ps_top_left_mb->u1_base_mode_flag) + : 1)); + + /* top right pels */ + u1_mb_c = ((ps_proc->ps_ngbr_avbl->u1_mb_c) && + (u4_constrained_intra_pred + ? (ps_top_right_mb->u1_is_intra && !ps_top_right_mb->u1_base_mode_flag) + : 1)); + + i4_ngbr_avbl = (u1_mb_a) + (u1_mb_d << 1) + (u1_mb_b << 2) + (u1_mb_c << 3); + memcpy(ps_proc->au1_ngbr_avbl_4x4_subblks, gau1_ih264_4x4_ngbr_avbl[i4_ngbr_avbl], 16); + + for(b8 = 0; b8 < 4; b8++) + { + u4_blk_x = (b8 & 0x01) << 3; + u4_blk_y = (b8 >> 1) << 3; + for(b4 = 0; b4 < 4; b4++, pu1_nnz++, pi2_quant_coeffs += MB_SIZE) + { + u4_pix_x = u4_blk_x + ((b4 & 0x01) << 2); + u4_pix_y = u4_blk_y + ((b4 >> 1) << 2); + + pu1_ref_mb_intra_4x4 = + ps_proc->pu1_ref_mb_intra_4x4 + u4_pix_x + (u4_pix_y * i4_pred_strd); + pu1_mb_curr = ((UWORD8 *) ps_proc->s_src_buf_props.as_component_bufs[0].pv_data) + + u4_pix_x + (u4_pix_y * i4_src_strd); + pu1_pred_mb = ps_proc->pu1_pred_mb + u4_pix_x + (u4_pix_y * i4_pred_strd); + if(u4_pix_x == 0) + { + i4_ref_strd_left = ps_proc->s_rec_buf_props.as_component_bufs[0].i4_data_stride; + pu1_mb_ref_left = + ((UWORD8 *) ps_proc->s_rec_buf_props.as_component_bufs[0].pv_data) + u4_pix_x + + (u4_pix_y * i4_ref_strd_left); + } + else + { + i4_ref_strd_left = i4_pred_strd; + pu1_mb_ref_left = pu1_ref_mb_intra_4x4; + } + if(u4_pix_y == 0) + { + i4_ref_strd_top = ps_proc->s_rec_buf_props.as_component_bufs[0].i4_data_stride; + pu1_mb_ref_top = + ((UWORD8 *) ps_proc->s_rec_buf_props.as_component_bufs[0].pv_data) + u4_pix_x + + (u4_pix_y * i4_ref_strd_top); + } + else + { + i4_ref_strd_top = i4_pred_strd; + pu1_mb_ref_top = pu1_ref_mb_intra_4x4; + } + + pu1_mb_a = pu1_mb_ref_left - 1; /* pointer to left macro block */ + pu1_mb_b = pu1_mb_ref_top - i4_ref_strd_top; /* pointer to top macro block */ + pu1_mb_c = pu1_mb_b + 4; /* pointer to top right macro block */ + if(u4_pix_y == 0) + pu1_mb_d = pu1_mb_b - 1; + else + pu1_mb_d = pu1_mb_a - i4_ref_strd_left; /* pointer to top left macro block */ + + /* locating neighbors that are available for prediction */ + /* TODO : update the neighbor availability information basing on + * constrained intra pred information */ + /* TODO : i4_ngbr_avbl is only being used in DC mode. Can the DC mode be + * split in to distinct routines */ + /* basing on neighbors available and hence evade the computation of + * neighbor availability totally. */ + + i4_ngbr_avbl = ps_proc->au1_ngbr_avbl_4x4_subblks[(b8 << 2) + b4]; + s_ngbr_avbl.u1_mb_a = (i4_ngbr_avbl & 0x1); + s_ngbr_avbl.u1_mb_d = (i4_ngbr_avbl & 0x2) >> 1; + s_ngbr_avbl.u1_mb_b = (i4_ngbr_avbl & 0x4) >> 2; + s_ngbr_avbl.u1_mb_c = (i4_ngbr_avbl & 0x8) >> 3; + /* set valid intra modes for evaluation */ + u4_valid_intra_modes = u2_valid_modes[i4_ngbr_avbl & 0x7]; + + /* if top partition is available and top right is not available for intra + * prediction, then */ + /* padd top right samples using top sample and make top right also + * available */ + /* i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) + + * (s_ngbr_avbl.u1_mb_b << 2) + ((s_ngbr_avbl.u1_mb_b | + * s_ngbr_avbl.u1_mb_c) << 3); */ + + /* gather prediction pels from the neighbors */ + if(s_ngbr_avbl.u1_mb_a) + { + for(i = 0; i < 4; i++) pu1_ngbr_pels_i4[4 - 1 - i] = pu1_mb_a[i * i4_ref_strd_left]; + } + else + { + memset(pu1_ngbr_pels_i4, 0, 4); + } + if(s_ngbr_avbl.u1_mb_b) + { + memcpy(pu1_ngbr_pels_i4 + 4 + 1, pu1_mb_b, 4); + } + else + { + memset(pu1_ngbr_pels_i4 + 4 + 1, 0, 4); + } + if(s_ngbr_avbl.u1_mb_d) + pu1_ngbr_pels_i4[4] = *pu1_mb_d; + else + pu1_ngbr_pels_i4[4] = 0; + if(s_ngbr_avbl.u1_mb_c) + { + memcpy(pu1_ngbr_pels_i4 + 8 + 1, pu1_mb_c, 4); + } + else if(s_ngbr_avbl.u1_mb_b) + { + memset(pu1_ngbr_pels_i4 + 8 + 1, pu1_ngbr_pels_i4[8], 4); + s_ngbr_avbl.u1_mb_c = s_ngbr_avbl.u1_mb_b; + } + + i4_partition_cost_least = INT_MAX; + + /* predict the intra 4x4 mode for the current partition (for evaluating + * cost) */ + if(!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b) + { + u4_estimated_intra_4x4_mode = DC_I4x4; + } + else + { + UWORD32 u4_left_intra_4x4_mode = DC_I4x4; + UWORD32 u4_top_intra_4x4_mode = DC_I4x4; + + if(u4_pix_x == 0) + { + if(ps_proc->s_nbr_info.ps_left_mb_info->u2_mb_type == I4x4) + { + u4_left_intra_4x4_mode = + ps_proc->s_nbr_info.ps_left_mb_intra_modes + ->au1_intra_modes[gau1_raster_to_zscan_map[3 + u4_pix_y]]; + } + else if(ps_proc->s_nbr_info.ps_left_mb_info->u2_mb_type == I8x8) + { + u4_left_intra_4x4_mode = + ps_proc->s_nbr_info.ps_left_mb_intra_modes->au1_intra_modes[b8 + 1]; + } + } + else + { + u4_left_intra_4x4_mode = + ps_proc->au1_intra_luma_mb_4x4_modes + [gau1_raster_to_zscan_map[(u4_pix_x >> 2) + u4_pix_y - 1]]; + } + + if(u4_pix_y == 0) + { + if(ps_top_mb->u2_mb_type == I4x4) + { + u4_top_intra_4x4_mode = + pu1_top_mb_intra_modes[gau1_raster_to_zscan_map[12 + (u4_pix_x >> 2)]]; + } + else if(ps_top_mb->u2_mb_type == I8x8) + { + u4_top_intra_4x4_mode = pu1_top_mb_intra_modes[b8 + 2]; + } + } + else + { + u4_top_intra_4x4_mode = + ps_proc->au1_intra_luma_mb_4x4_modes + [gau1_raster_to_zscan_map[(u4_pix_x >> 2) + u4_pix_y - 4]]; + } + + u4_estimated_intra_4x4_mode = MIN(u4_left_intra_4x4_mode, u4_top_intra_4x4_mode); + } + + ps_proc->au1_predicted_intra_luma_mb_4x4_modes[(b8 << 2) + b4] = + u4_estimated_intra_4x4_mode; + + /*mode evaluation and prediction*/ + ps_codec->pf_ih264e_evaluate_intra_4x4_modes( + pu1_mb_curr, pu1_ngbr_pels_i4, pu1_pred_mb, i4_src_strd, i4_pred_strd, i4_ngbr_avbl, + &u4_best_intra_4x4_mode, &i4_partition_cost_least, u4_valid_intra_modes, u4_lambda, + u4_estimated_intra_4x4_mode); + + i4_partition_distortion_least = + i4_partition_cost_least - ((u4_estimated_intra_4x4_mode == u4_best_intra_4x4_mode) + ? u4_cost_one_bit + : u4_cost_four_bits); + + DEBUG("%d partition cost, %d intra mode\n", i4_partition_cost_least, + u4_best_intra_4x4_mode); + + /* macroblock distortion */ + i4_total_distortion += i4_partition_distortion_least; + i4_total_cost += i4_partition_cost_least; + + /* mb partition mode */ + ps_proc->au1_intra_luma_mb_4x4_modes[(b8 << 2) + b4] = u4_best_intra_4x4_mode; + + /********************************************************/ + /* error estimation, */ + /* transform */ + /* quantization */ + /********************************************************/ + s_src.pv_data = pu1_mb_curr; + s_src.i4_data_stride = i4_src_strd; + + s_pred.pv_data = pu1_pred_mb; + s_pred.i4_data_stride = i4_pred_strd; + + s_quant_coeffs.pv_data = pi2_quant_coeffs; + s_quant_coeffs.i4_data_stride = 4; + + ps_enc_loop_fxns->apf_resi_trans_quant_4x4[u1_resi_trans_fxn_idx]( + &s_src, &s_pred, &s_quant_coeffs, &s_res_pred, + /* No op stride, this implies a buff of lenght 1x16 */ + &s_resi_trans_quant_constants, pu1_nnz, &i2_dc_dummy, 0); + + /********************************************************/ + /* ierror estimation, */ + /* itransform */ + /* iquantization */ + /********************************************************/ + + /* Tx blk coeffs are stored blk by blk */ + /* Hence, in order to access rows of each Tx blk, one needs to stride of + * TxxSize */ + s_quant_coeffs.i4_data_stride = 4; + + s_recon.pv_data = pu1_ref_mb_intra_4x4; + s_recon.i4_data_stride = i4_pred_strd; + + ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4[u1_iq_it_recon_fxn_idx]( + &s_quant_coeffs, &s_pred, &s_res_pred, &s_res_pred, &s_recon, + &s_iq_it_res_rec_constants, ps_proc->pv_scratch_buff, s_quant_coeffs.pv_data, 0, 0); + } + } + + /* update the type of the mb if necessary */ + if(i4_total_cost < ps_proc->i4_mb_cost) + { + ps_proc->i4_mb_cost = i4_total_cost; + ps_proc->i4_mb_distortion = i4_total_distortion; + ps_proc->ps_mb_info->u2_mb_type = I4x4; + } +} + +/** +****************************************************************************** +* +* @brief +* evaluate best chroma intra 8x8 mode (rate distortion opt off) +* +* @par Description +* This function evaluates all the possible chroma intra 8x8 modes and finds +* the mode that best represents the macroblock (least distortion) and occupies +* fewer bits in the bitstream. +* +* @param[in] ps_proc_ctxt +* pointer to macroblock context (handle) +* +* @remarks +* For chroma best intra pred mode is calculated based only on SAD +* +* @returns none +* +****************************************************************************** +*/ + +void isvce_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff(isvce_process_ctxt_t *ps_proc) +{ + /* Codec Context */ + isvce_codec_t *ps_codec = ps_proc->ps_codec; + isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns; + mem_fxns_t *ps_mem_fxns = &ps_isa_dependent_fxns->s_mem_fxns; + + /* SAD(distortion metric) of an 8x8 block */ + WORD32 i4_mb_distortion, i4_chroma_mb_distortion; + + /* intra mode */ + UWORD32 u4_best_chroma_intra_8x8_mode = DC_CH_I8x8; + + /* neighbor pels for intra prediction */ + UWORD8 *pu1_ngbr_pels_c_i8x8 = ps_proc->au1_ngbr_pels; + + /* pointer to curr macro block */ + UWORD8 *pu1_curr_mb = ((UWORD8 *) ps_proc->s_src_buf_props.as_component_bufs[1].pv_data); + UWORD8 *pu1_ref_mb = ((UWORD8 *) ps_proc->s_rec_buf_props.as_component_bufs[1].pv_data); + + /* pointer to prediction macro block */ + UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb_intra_chroma; + UWORD8 *pu1_pred_mb_plane = ps_proc->pu1_pred_mb_intra_chroma_plane; + + /* strides */ + WORD32 i4_src_strd_c = ps_proc->s_src_buf_props.as_component_bufs[1].i4_data_stride; + WORD32 i4_pred_strd = ps_proc->i4_pred_strd; + WORD32 i4_rec_strd_c = ps_proc->s_rec_buf_props.as_component_bufs[1].i4_data_stride; + + /* neighbors left, top, top left */ + UWORD8 *pu1_mb_a = pu1_ref_mb - 2; + UWORD8 *pu1_mb_b = pu1_ref_mb - i4_rec_strd_c; + UWORD8 *pu1_mb_d = pu1_mb_b - 2; + + /* neighbor availability */ + const UWORD8 u1_valid_intra_modes[8] = {1, 3, 1, 3, 5, 7, 5, 15}; + WORD32 i4_ngbr_avbl; + + /* valid intra modes map */ + UWORD32 u4_valid_intra_modes; + isvce_mb_info_t *ps_top_mb_syn_ele = ps_proc->s_nbr_info.ps_top_row_mb_info + ps_proc->i4_mb_x; + + /* temp var */ + UWORD8 i; + UWORD32 u4_constrained_intra_pred = + ps_codec->au4_constrained_intra_pred[ps_proc->u1_spatial_layer_id]; + UWORD8 u1_mb_a, u1_mb_b, u1_mb_d; + /* locating neighbors that are available for prediction */ + + /* gather prediction pels from the neighbors */ + /* left pels */ + u1_mb_a = + ((ps_proc->ps_ngbr_avbl->u1_mb_a) && + (u4_constrained_intra_pred ? (ps_proc->s_nbr_info.ps_left_mb_info->u1_is_intra && + !ps_proc->s_nbr_info.ps_left_mb_info->u1_base_mode_flag) + : 1)); + if(u1_mb_a) + { + for(i = 0; i < 16; i += 2) + { + pu1_ngbr_pels_c_i8x8[16 - 2 - i] = pu1_mb_a[(i / 2) * i4_rec_strd_c]; + pu1_ngbr_pels_c_i8x8[16 - 1 - i] = pu1_mb_a[(i / 2) * i4_rec_strd_c + 1]; + } + } + else + { + ps_mem_fxns->pf_mem_set_mul8(pu1_ngbr_pels_c_i8x8, 0, MB_SIZE); + } + + /* top pels */ + u1_mb_b = ((ps_proc->ps_ngbr_avbl->u1_mb_b) && + (u4_constrained_intra_pred + ? (ps_top_mb_syn_ele->u1_is_intra && !ps_top_mb_syn_ele->u1_base_mode_flag) + : 1)); + if(u1_mb_b) + { + ps_mem_fxns->pf_mem_cpy_mul8(&pu1_ngbr_pels_c_i8x8[18], pu1_mb_b, 16); + } + else + { + ps_mem_fxns->pf_mem_set_mul8((pu1_ngbr_pels_c_i8x8 + 18), 0, MB_SIZE); + } + + /* top left pels */ + u1_mb_d = ((ps_proc->ps_ngbr_avbl->u1_mb_d) && + (u4_constrained_intra_pred ? (ps_top_mb_syn_ele[-1].u1_is_intra && + !ps_top_mb_syn_ele[-1].u1_base_mode_flag) + : 1)); + if(u1_mb_d) + { + pu1_ngbr_pels_c_i8x8[16] = *pu1_mb_d; + pu1_ngbr_pels_c_i8x8[17] = *(pu1_mb_d + 1); + } + i4_ngbr_avbl = (u1_mb_a) + (u1_mb_b << 2) + (u1_mb_d << 1); + ps_proc->i4_chroma_neighbor_avail_8x8_mb = i4_ngbr_avbl; + + u4_valid_intra_modes = u1_valid_intra_modes[i4_ngbr_avbl]; + + if(ps_codec->s_cfg.u4_enc_speed_preset == IVE_FAST || + ps_codec->s_cfg.u4_enc_speed_preset == IVE_FASTEST) + u4_valid_intra_modes &= ~(1 << PLANE_CH_I8x8); + + i4_chroma_mb_distortion = INT_MAX; + + /* perform intra mode chroma 8x8 evaluation */ + /* intra prediction */ + ps_codec->pf_ih264e_evaluate_intra_chroma_modes( + pu1_curr_mb, pu1_ngbr_pels_c_i8x8, pu1_pred_mb, i4_src_strd_c, i4_pred_strd, i4_ngbr_avbl, + &u4_best_chroma_intra_8x8_mode, &i4_chroma_mb_distortion, u4_valid_intra_modes); + + if(u4_valid_intra_modes & 8) /* if Chroma PLANE is valid*/ + { + (ps_codec->apf_intra_pred_c)[PLANE_CH_I8x8](pu1_ngbr_pels_c_i8x8, pu1_pred_mb_plane, 0, + i4_pred_strd, i4_ngbr_avbl); + + /* evaluate distortion(sad) */ + ps_codec->pf_compute_sad_16x8(pu1_curr_mb, pu1_pred_mb_plane, i4_src_strd_c, i4_pred_strd, + i4_chroma_mb_distortion, &i4_mb_distortion); + + /* update the least distortion information if necessary */ + if(i4_mb_distortion < i4_chroma_mb_distortion) + { + i4_chroma_mb_distortion = i4_mb_distortion; + u4_best_chroma_intra_8x8_mode = PLANE_CH_I8x8; + } + } + + DEBUG("%d partition cost, %d intra mode\n", i4_chroma_mb_distortion, + u4_best_chroma_intra_8x8_mode); + + ps_proc->u1_c_i8_mode = u4_best_chroma_intra_8x8_mode; +} + +/** +****************************************************************************** +* +* @brief +* Evaluate best intra 16x16 mode (among VERT, HORZ and DC) and do the +* prediction. +* +* @par Description +* This function evaluates first three 16x16 modes and compute corresponding sad +* and return the buffer predicted with best mode. +* +* @param[in] pu1_src +* UWORD8 pointer to the source +* +* @param[in] pu1_ngbr_pels_i16 +* UWORD8 pointer to neighbouring pels +* +* @param[out] pu1_dst +* UWORD8 pointer to the destination +* +* @param[in] src_strd +* integer source stride +* +* @param[in] dst_strd +* integer destination stride +* +* @param[in] u4_n_avblty +* availability of neighbouring pixels +* +* @param[in] u4_intra_mode +* Pointer to the variable in which best mode is returned +* +* @param[in] pu4_sadmin +* Pointer to the variable in which minimum sad is returned +* +* @param[in] u4_valid_intra_modes +* Says what all modes are valid +* +* @returns none +* +****************************************************************************** +*/ +void isvce_evaluate_intra16x16_modes(UWORD8 *pu1_src, UWORD8 *pu1_ngbr_pels_i16, UWORD8 *pu1_dst, + UWORD32 src_strd, UWORD32 dst_strd, WORD32 u4_n_avblty, + UWORD32 *u4_intra_mode, WORD32 *pu4_sadmin, + UWORD32 u4_valid_intra_modes) +{ + UWORD8 *pu1_neighbour; + UWORD8 *pu1_src_temp = pu1_src; + UWORD8 left = 0, top = 0; + WORD32 u4_dcval = 0; + WORD32 i, j; + WORD32 i4_sad_vert = INT_MAX, i4_sad_horz = INT_MAX, i4_sad_dc = INT_MAX, i4_min_sad = INT_MAX; + UWORD8 val; + + left = (u4_n_avblty & LEFT_MB_AVAILABLE_MASK); + top = (u4_n_avblty & TOP_MB_AVAILABLE_MASK) >> 2; + + /* left available */ + if(left) + { + i4_sad_horz = 0; + + for(i = 0; i < 16; i++) + { + val = pu1_ngbr_pels_i16[15 - i]; + + u4_dcval += val; + + for(j = 0; j < 16; j++) + { + i4_sad_horz += ABS(val - pu1_src_temp[j]); + } + + pu1_src_temp += src_strd; + } + u4_dcval += 8; + } + + pu1_src_temp = pu1_src; + /* top available */ + if(top) + { + i4_sad_vert = 0; + + for(i = 0; i < 16; i++) + { + u4_dcval += pu1_ngbr_pels_i16[17 + i]; + + for(j = 0; j < 16; j++) + { + i4_sad_vert += ABS(pu1_ngbr_pels_i16[17 + j] - pu1_src_temp[j]); + } + pu1_src_temp += src_strd; + } + u4_dcval += 8; + } + + u4_dcval = (u4_dcval) >> (3 + left + top); + + pu1_src_temp = pu1_src; + + /* none available */ + u4_dcval += (left == 0) * (top == 0) * 128; + + i4_sad_dc = 0; + + for(i = 0; i < 16; i++) + { + for(j = 0; j < 16; j++) + { + i4_sad_dc += ABS(u4_dcval - pu1_src_temp[j]); + } + pu1_src_temp += src_strd; + } + + if((u4_valid_intra_modes & 04) == 0) /* If DC is disabled */ + i4_sad_dc = INT_MAX; + + if((u4_valid_intra_modes & 01) == 0) /* If VERT is disabled */ + i4_sad_vert = INT_MAX; + + if((u4_valid_intra_modes & 02) == 0) /* If HORZ is disabled */ + i4_sad_horz = INT_MAX; + + i4_min_sad = MIN3(i4_sad_horz, i4_sad_dc, i4_sad_vert); + + /* Finding Minimum sad and doing corresponding prediction */ + if(i4_min_sad < *pu4_sadmin) + { + *pu4_sadmin = i4_min_sad; + if(i4_min_sad == i4_sad_vert) + { + *u4_intra_mode = VERT_I16x16; + pu1_neighbour = pu1_ngbr_pels_i16 + 17; + for(j = 0; j < 16; j++) + { + memcpy(pu1_dst, pu1_neighbour, MB_SIZE); + pu1_dst += dst_strd; + } + } + else if(i4_min_sad == i4_sad_horz) + { + *u4_intra_mode = HORZ_I16x16; + for(j = 0; j < 16; j++) + { + val = pu1_ngbr_pels_i16[15 - j]; + memset(pu1_dst, val, MB_SIZE); + pu1_dst += dst_strd; + } + } + else + { + *u4_intra_mode = DC_I16x16; + for(j = 0; j < 16; j++) + { + memset(pu1_dst, u4_dcval, MB_SIZE); + pu1_dst += dst_strd; + } + } + } +} + +/** +****************************************************************************** +* +* @brief +* Evaluate best intra 4x4 mode and perform prediction. +* +* @par Description +* This function evaluates 4x4 modes and compute corresponding sad +* and return the buffer predicted with best mode. +* +* @param[in] pu1_src +* UWORD8 pointer to the source +* +* @param[in] pu1_ngbr_pels +* UWORD8 pointer to neighbouring pels +* +* @param[out] pu1_dst +* UWORD8 pointer to the destination +* +* @param[in] src_strd +* integer source stride +* +* @param[in] dst_strd +* integer destination stride +* +* @param[in] u4_n_avblty +* availability of neighbouring pixels +* +* @param[in] u4_intra_mode +* Pointer to the variable in which best mode is returned +* +* @param[in] pu4_sadmin +* Pointer to the variable in which minimum cost is returned +* +* @param[in] u4_valid_intra_modes +* Says what all modes are valid +* +* @param[in] u4_lambda +* Lamda value for computing cost from SAD +* +* @param[in] u4_predictd_mode +* Predicted mode for cost computation +* +* @returns none +* +****************************************************************************** +*/ +void isvce_evaluate_intra_4x4_modes(UWORD8 *pu1_src, UWORD8 *pu1_ngbr_pels, UWORD8 *pu1_dst, + UWORD32 src_strd, UWORD32 dst_strd, WORD32 u4_n_avblty, + UWORD32 *u4_intra_mode, WORD32 *pu4_sadmin, + UWORD32 u4_valid_intra_modes, UWORD32 u4_lambda, + UWORD32 u4_predictd_mode) +{ + UWORD8 *pu1_src_temp = pu1_src; + UWORD8 *pu1_pred = pu1_ngbr_pels; + UWORD8 left = 0, top = 0; + UWORD8 u1_pred_val = 0; + UWORD8 u1_pred_vals[4] = {0}; + UWORD8 *pu1_pred_val = NULL; + /* To store FILT121 operated values*/ + UWORD8 u1_pred_vals_diag_121[15] = {0}; + /* To store FILT11 operated values*/ + UWORD8 u1_pred_vals_diag_11[15] = {0}; + UWORD8 u1_pred_vals_vert_r[8] = {0}; + UWORD8 u1_pred_vals_horz_d[10] = {0}; + UWORD8 u1_pred_vals_horz_u[10] = {0}; + WORD32 u4_dcval = 0; + WORD32 i4_sad[MAX_I4x4] = {INT_MAX, INT_MAX, INT_MAX, INT_MAX, INT_MAX, + INT_MAX, INT_MAX, INT_MAX, INT_MAX}; + + WORD32 i4_cost[MAX_I4x4] = {INT_MAX, INT_MAX, INT_MAX, INT_MAX, INT_MAX, + INT_MAX, INT_MAX, INT_MAX, INT_MAX}; + WORD32 i, i4_min_cost = INT_MAX; + + left = (u4_n_avblty & LEFT_MB_AVAILABLE_MASK); + top = (u4_n_avblty & TOP_MB_AVAILABLE_MASK) >> 2; + + /* Computing SAD */ + + /* VERT mode valid */ + if(u4_valid_intra_modes & 1) + { + pu1_pred = pu1_ngbr_pels + 5; + i4_sad[VERT_I4x4] = 0; + i4_cost[VERT_I4x4] = 0; + + USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]); + pu1_src_temp += src_strd; + USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]); + pu1_src_temp += src_strd; + USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]); + pu1_src_temp += src_strd; + USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]); + + i4_cost[VERT_I4x4] = + i4_sad[VERT_I4x4] + ((u4_predictd_mode == VERT_I4x4) ? u4_lambda : 4 * u4_lambda); + } + + /* HORZ mode valid */ + if(u4_valid_intra_modes & 2) + { + i4_sad[HORZ_I4x4] = 0; + i4_cost[HORZ_I4x4] = 0; + pu1_src_temp = pu1_src; + + u1_pred_val = pu1_ngbr_pels[3]; + + i4_sad[HORZ_I4x4] += + ABS(pu1_src_temp[0] - u1_pred_val) + ABS(pu1_src_temp[1] - u1_pred_val) + + ABS(pu1_src_temp[2] - u1_pred_val) + ABS(pu1_src_temp[3] - u1_pred_val); + pu1_src_temp += src_strd; + + u1_pred_val = pu1_ngbr_pels[2]; + + i4_sad[HORZ_I4x4] += + ABS(pu1_src_temp[0] - u1_pred_val) + ABS(pu1_src_temp[1] - u1_pred_val) + + ABS(pu1_src_temp[2] - u1_pred_val) + ABS(pu1_src_temp[3] - u1_pred_val); + pu1_src_temp += src_strd; + + u1_pred_val = pu1_ngbr_pels[1]; + + i4_sad[HORZ_I4x4] += + ABS(pu1_src_temp[0] - u1_pred_val) + ABS(pu1_src_temp[1] - u1_pred_val) + + ABS(pu1_src_temp[2] - u1_pred_val) + ABS(pu1_src_temp[3] - u1_pred_val); + pu1_src_temp += src_strd; + + u1_pred_val = pu1_ngbr_pels[0]; + + i4_sad[HORZ_I4x4] += + ABS(pu1_src_temp[0] - u1_pred_val) + ABS(pu1_src_temp[1] - u1_pred_val) + + ABS(pu1_src_temp[2] - u1_pred_val) + ABS(pu1_src_temp[3] - u1_pred_val); + + i4_cost[HORZ_I4x4] = + i4_sad[HORZ_I4x4] + ((u4_predictd_mode == HORZ_I4x4) ? u4_lambda : 4 * u4_lambda); + } + + /* DC mode valid */ + if(u4_valid_intra_modes & 4) + { + i4_sad[DC_I4x4] = 0; + i4_cost[DC_I4x4] = 0; + pu1_src_temp = pu1_src; + + if(left) + u4_dcval = + pu1_ngbr_pels[0] + pu1_ngbr_pels[1] + pu1_ngbr_pels[2] + pu1_ngbr_pels[3] + 2; + if(top) + u4_dcval += + pu1_ngbr_pels[5] + pu1_ngbr_pels[6] + pu1_ngbr_pels[7] + pu1_ngbr_pels[8] + 2; + + u4_dcval = (u4_dcval) ? (u4_dcval >> (1 + left + top)) : 128; + + /* none available */ + memset(u1_pred_vals, u4_dcval, 4); + USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]); + pu1_src_temp += src_strd; + USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]); + pu1_src_temp += src_strd; + USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]); + pu1_src_temp += src_strd; + USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]); + pu1_src_temp += src_strd; + + i4_cost[DC_I4x4] = + i4_sad[DC_I4x4] + ((u4_predictd_mode == DC_I4x4) ? u4_lambda : 4 * u4_lambda); + } + + /* if modes other than VERT, HORZ and DC are valid */ + if(u4_valid_intra_modes > 7) + { + pu1_pred = pu1_ngbr_pels; + pu1_pred[13] = pu1_pred[14] = pu1_pred[12]; + + /* Performing FILT121 and FILT11 operation for all neighbour values*/ + for(i = 0; i < 13; i++) + { + u1_pred_vals_diag_121[i] = FILT121(pu1_pred[0], pu1_pred[1], pu1_pred[2]); + u1_pred_vals_diag_11[i] = FILT11(pu1_pred[0], pu1_pred[1]); + + pu1_pred++; + } + + if(u4_valid_intra_modes & 8) /* DIAG_DL */ + { + i4_sad[DIAG_DL_I4x4] = 0; + i4_cost[DIAG_DL_I4x4] = 0; + pu1_src_temp = pu1_src; + pu1_pred_val = u1_pred_vals_diag_121 + 5; + + USADA8(pu1_src_temp, pu1_pred_val, i4_sad[DIAG_DL_I4x4]); + pu1_src_temp += src_strd; + USADA8(pu1_src_temp, (pu1_pred_val + 1), i4_sad[DIAG_DL_I4x4]); + pu1_src_temp += src_strd; + USADA8(pu1_src_temp, (pu1_pred_val + 2), i4_sad[DIAG_DL_I4x4]); + pu1_src_temp += src_strd; + USADA8(pu1_src_temp, (pu1_pred_val + 3), i4_sad[DIAG_DL_I4x4]); + pu1_src_temp += src_strd; + i4_cost[DIAG_DL_I4x4] = + i4_sad[DIAG_DL_I4x4] + + ((u4_predictd_mode == DIAG_DL_I4x4) ? u4_lambda : 4 * u4_lambda); + } + + if(u4_valid_intra_modes & 16) /* DIAG_DR */ + { + i4_sad[DIAG_DR_I4x4] = 0; + i4_cost[DIAG_DR_I4x4] = 0; + pu1_src_temp = pu1_src; + pu1_pred_val = u1_pred_vals_diag_121 + 3; + + USADA8(pu1_src_temp, pu1_pred_val, i4_sad[DIAG_DR_I4x4]); + pu1_src_temp += src_strd; + USADA8(pu1_src_temp, (pu1_pred_val - 1), i4_sad[DIAG_DR_I4x4]); + pu1_src_temp += src_strd; + USADA8(pu1_src_temp, (pu1_pred_val - 2), i4_sad[DIAG_DR_I4x4]); + pu1_src_temp += src_strd; + USADA8(pu1_src_temp, (pu1_pred_val - 3), i4_sad[DIAG_DR_I4x4]); + pu1_src_temp += src_strd; + i4_cost[DIAG_DR_I4x4] = + i4_sad[DIAG_DR_I4x4] + + ((u4_predictd_mode == DIAG_DR_I4x4) ? u4_lambda : 4 * u4_lambda); + } + + if(u4_valid_intra_modes & 32) /* VERT_R mode valid ????*/ + { + i4_sad[VERT_R_I4x4] = 0; + + pu1_src_temp = pu1_src; + u1_pred_vals_vert_r[0] = u1_pred_vals_diag_121[2]; + memcpy((u1_pred_vals_vert_r + 1), (u1_pred_vals_diag_11 + 4), 3); + u1_pred_vals_vert_r[4] = u1_pred_vals_diag_121[1]; + memcpy((u1_pred_vals_vert_r + 5), (u1_pred_vals_diag_121 + 3), 3); + + pu1_pred_val = u1_pred_vals_diag_11 + 4; + USADA8(pu1_src_temp, pu1_pred_val, i4_sad[VERT_R_I4x4]); + pu1_pred_val = u1_pred_vals_diag_121 + 3; + pu1_src_temp += src_strd; + USADA8(pu1_src_temp, pu1_pred_val, i4_sad[VERT_R_I4x4]); + pu1_src_temp += src_strd; + USADA8(pu1_src_temp, (u1_pred_vals_vert_r), i4_sad[VERT_R_I4x4]); + pu1_src_temp += src_strd; + USADA8(pu1_src_temp, (u1_pred_vals_vert_r + 4), i4_sad[VERT_R_I4x4]); + + i4_cost[VERT_R_I4x4] = i4_sad[VERT_R_I4x4] + + ((u4_predictd_mode == VERT_R_I4x4) ? u4_lambda : 4 * u4_lambda); + } + + if(u4_valid_intra_modes & 64) /* HORZ_D mode valid ????*/ + { + i4_sad[HORZ_D_I4x4] = 0; + + pu1_src_temp = pu1_src; + u1_pred_vals_horz_d[6] = u1_pred_vals_diag_11[3]; + memcpy((u1_pred_vals_horz_d + 7), (u1_pred_vals_diag_121 + 3), 3); + u1_pred_vals_horz_d[0] = u1_pred_vals_diag_11[0]; + u1_pred_vals_horz_d[1] = u1_pred_vals_diag_121[0]; + u1_pred_vals_horz_d[2] = u1_pred_vals_diag_11[1]; + u1_pred_vals_horz_d[3] = u1_pred_vals_diag_121[1]; + u1_pred_vals_horz_d[4] = u1_pred_vals_diag_11[2]; + u1_pred_vals_horz_d[5] = u1_pred_vals_diag_121[2]; + + pu1_pred_val = u1_pred_vals_horz_d; + USADA8(pu1_src_temp, (pu1_pred_val + 6), i4_sad[HORZ_D_I4x4]); + pu1_src_temp += src_strd; + USADA8(pu1_src_temp, (pu1_pred_val + 4), i4_sad[HORZ_D_I4x4]); + pu1_src_temp += src_strd; + USADA8(pu1_src_temp, (pu1_pred_val + 2), i4_sad[HORZ_D_I4x4]); + pu1_src_temp += src_strd; + USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[HORZ_D_I4x4]); + + i4_cost[HORZ_D_I4x4] = i4_sad[HORZ_D_I4x4] + + ((u4_predictd_mode == HORZ_D_I4x4) ? u4_lambda : 4 * u4_lambda); + } + + if(u4_valid_intra_modes & 128) /* VERT_L mode valid ????*/ + { + i4_sad[VERT_L_I4x4] = 0; + pu1_src_temp = pu1_src; + pu1_pred_val = u1_pred_vals_diag_11 + 5; + USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]); + pu1_src_temp += src_strd; + pu1_pred_val = u1_pred_vals_diag_121 + 5; + USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]); + pu1_src_temp += src_strd; + pu1_pred_val = u1_pred_vals_diag_11 + 6; + USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]); + pu1_src_temp += src_strd; + pu1_pred_val = u1_pred_vals_diag_121 + 6; + USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]); + + i4_cost[VERT_L_I4x4] = i4_sad[VERT_L_I4x4] + + ((u4_predictd_mode == VERT_L_I4x4) ? u4_lambda : 4 * u4_lambda); + } + + if(u4_valid_intra_modes & 256) /* HORZ_U mode valid ????*/ + { + i4_sad[HORZ_U_I4x4] = 0; + pu1_src_temp = pu1_src; + u1_pred_vals_horz_u[0] = u1_pred_vals_diag_11[2]; + u1_pred_vals_horz_u[1] = u1_pred_vals_diag_121[1]; + u1_pred_vals_horz_u[2] = u1_pred_vals_diag_11[1]; + u1_pred_vals_horz_u[3] = u1_pred_vals_diag_121[0]; + u1_pred_vals_horz_u[4] = u1_pred_vals_diag_11[0]; + u1_pred_vals_horz_u[5] = FILT121(pu1_ngbr_pels[0], pu1_ngbr_pels[0], pu1_ngbr_pels[1]); + + memset((u1_pred_vals_horz_u + 6), pu1_ngbr_pels[0], 4); + + pu1_pred_val = u1_pred_vals_horz_u; + USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[HORZ_U_I4x4]); + pu1_src_temp += src_strd; + USADA8(pu1_src_temp, (pu1_pred_val + 2), i4_sad[HORZ_U_I4x4]); + pu1_src_temp += src_strd; + USADA8(pu1_src_temp, (pu1_pred_val + 4), i4_sad[HORZ_U_I4x4]); + pu1_src_temp += src_strd; + USADA8(pu1_src_temp, (pu1_pred_val + 6), i4_sad[HORZ_U_I4x4]); + + i4_cost[HORZ_U_I4x4] = i4_sad[HORZ_U_I4x4] + + ((u4_predictd_mode == HORZ_U_I4x4) ? u4_lambda : 4 * u4_lambda); + } + + i4_min_cost = + MIN3(MIN3(i4_cost[0], i4_cost[1], i4_cost[2]), MIN3(i4_cost[3], i4_cost[4], i4_cost[5]), + MIN3(i4_cost[6], i4_cost[7], i4_cost[8])); + } + else + { + /* Only first three modes valid */ + i4_min_cost = MIN3(i4_cost[0], i4_cost[1], i4_cost[2]); + } + + *pu4_sadmin = i4_min_cost; + + if(i4_min_cost == i4_cost[0]) + { + *u4_intra_mode = VERT_I4x4; + pu1_pred_val = pu1_ngbr_pels + 5; + memcpy(pu1_dst, (pu1_pred_val), 4); + pu1_dst += dst_strd; + memcpy(pu1_dst, (pu1_pred_val), 4); + pu1_dst += dst_strd; + memcpy(pu1_dst, (pu1_pred_val), 4); + pu1_dst += dst_strd; + memcpy(pu1_dst, (pu1_pred_val), 4); + } + else if(i4_min_cost == i4_cost[1]) + { + *u4_intra_mode = HORZ_I4x4; + memset(pu1_dst, pu1_ngbr_pels[3], 4); + pu1_dst += dst_strd; + memset(pu1_dst, pu1_ngbr_pels[2], 4); + pu1_dst += dst_strd; + memset(pu1_dst, pu1_ngbr_pels[1], 4); + pu1_dst += dst_strd; + memset(pu1_dst, pu1_ngbr_pels[0], 4); + } + else if(i4_min_cost == i4_cost[2]) + { + *u4_intra_mode = DC_I4x4; + memset(pu1_dst, u4_dcval, 4); + pu1_dst += dst_strd; + memset(pu1_dst, u4_dcval, 4); + pu1_dst += dst_strd; + memset(pu1_dst, u4_dcval, 4); + pu1_dst += dst_strd; + memset(pu1_dst, u4_dcval, 4); + } + + else if(i4_min_cost == i4_cost[3]) + { + *u4_intra_mode = DIAG_DL_I4x4; + pu1_pred_val = u1_pred_vals_diag_121 + 5; + memcpy(pu1_dst, (pu1_pred_val), 4); + pu1_dst += dst_strd; + memcpy(pu1_dst, (pu1_pred_val + 1), 4); + pu1_dst += dst_strd; + memcpy(pu1_dst, (pu1_pred_val + 2), 4); + pu1_dst += dst_strd; + memcpy(pu1_dst, (pu1_pred_val + 3), 4); + } + else if(i4_min_cost == i4_cost[4]) + { + *u4_intra_mode = DIAG_DR_I4x4; + pu1_pred_val = u1_pred_vals_diag_121 + 3; + + memcpy(pu1_dst, (pu1_pred_val), 4); + pu1_dst += dst_strd; + memcpy(pu1_dst, (pu1_pred_val - 1), 4); + pu1_dst += dst_strd; + memcpy(pu1_dst, (pu1_pred_val - 2), 4); + pu1_dst += dst_strd; + memcpy(pu1_dst, (pu1_pred_val - 3), 4); + } + + else if(i4_min_cost == i4_cost[5]) + { + *u4_intra_mode = VERT_R_I4x4; + pu1_pred_val = u1_pred_vals_diag_11 + 4; + memcpy(pu1_dst, (pu1_pred_val), 4); + pu1_dst += dst_strd; + pu1_pred_val = u1_pred_vals_diag_121 + 3; + memcpy(pu1_dst, (pu1_pred_val), 4); + pu1_dst += dst_strd; + memcpy(pu1_dst, (u1_pred_vals_vert_r), 4); + pu1_dst += dst_strd; + memcpy(pu1_dst, (u1_pred_vals_vert_r + 4), 4); + } + else if(i4_min_cost == i4_cost[6]) + { + *u4_intra_mode = HORZ_D_I4x4; + pu1_pred_val = u1_pred_vals_horz_d; + memcpy(pu1_dst, (pu1_pred_val + 6), 4); + pu1_dst += dst_strd; + memcpy(pu1_dst, (pu1_pred_val + 4), 4); + pu1_dst += dst_strd; + memcpy(pu1_dst, (pu1_pred_val + 2), 4); + pu1_dst += dst_strd; + memcpy(pu1_dst, (pu1_pred_val), 4); + pu1_dst += dst_strd; + } + else if(i4_min_cost == i4_cost[7]) + { + *u4_intra_mode = VERT_L_I4x4; + pu1_pred_val = u1_pred_vals_diag_11 + 5; + memcpy(pu1_dst, (pu1_pred_val), 4); + pu1_dst += dst_strd; + pu1_pred_val = u1_pred_vals_diag_121 + 5; + memcpy(pu1_dst, (pu1_pred_val), 4); + pu1_dst += dst_strd; + pu1_pred_val = u1_pred_vals_diag_11 + 6; + memcpy(pu1_dst, (pu1_pred_val), 4); + pu1_dst += dst_strd; + pu1_pred_val = u1_pred_vals_diag_121 + 6; + memcpy(pu1_dst, (pu1_pred_val), 4); + } + else if(i4_min_cost == i4_cost[8]) + { + *u4_intra_mode = HORZ_U_I4x4; + pu1_pred_val = u1_pred_vals_horz_u; + memcpy(pu1_dst, (pu1_pred_val), 4); + pu1_dst += dst_strd; + memcpy(pu1_dst, (pu1_pred_val + 2), 4); + pu1_dst += dst_strd; + memcpy(pu1_dst, (pu1_pred_val + 4), 4); + pu1_dst += dst_strd; + memcpy(pu1_dst, (pu1_pred_val + 6), 4); + pu1_dst += dst_strd; + } + + return; +} + +/** +****************************************************************************** +* +* @brief: +* Evaluate best intr chroma mode (among VERT, HORZ and DC ) and do the +*prediction. +* +* @par Description +* This function evaluates first three intra chroma modes and compute +*corresponding sad and return the buffer predicted with best mode. +* +* @param[in] pu1_src +* UWORD8 pointer to the source +* +* @param[in] pu1_ngbr_pels +* UWORD8 pointer to neighbouring pels +* +* @param[out] pu1_dst +* UWORD8 pointer to the destination +* +* @param[in] src_strd +* integer source stride +* +* @param[in] dst_strd +* integer destination stride +* +* @param[in] u4_n_avblty +* availability of neighbouring pixels +* +* @param[in] u4_intra_mode +* Pointer to the variable in which best mode is returned +* +* @param[in] pu4_sadmin +* Pointer to the variable in which minimum sad is returned +* +* @param[in] u4_valid_intra_modes +* Says what all modes are valid +* +* @return none +* +****************************************************************************** +*/ +void isvce_evaluate_intra_chroma_modes(UWORD8 *pu1_src, UWORD8 *pu1_ngbr_pels, UWORD8 *pu1_dst, + UWORD32 src_strd, UWORD32 dst_strd, WORD32 u4_n_avblty, + UWORD32 *u4_intra_mode, WORD32 *pu4_sadmin, + UWORD32 u4_valid_intra_modes) +{ + UWORD8 *pu1_neighbour; + UWORD8 *pu1_src_temp = pu1_src; + UWORD8 left = 0, top = 0; + WORD32 u4_dcval_u_l[2] = {0, 0}, /*sum left neighbours for 'U' ,two separate sets - sum of + first four from top,and sum of four values from bottom */ + u4_dcval_u_t[2] = {0, 0}; /*sum top neighbours for 'U'*/ + + WORD32 u4_dcval_v_l[2] = {0, 0}, /*sum left neighbours for 'V'*/ + u4_dcval_v_t[2] = {0, 0}; /*sum top neighbours for 'V'*/ + + WORD32 i, j, row, col, i4_sad_vert = INT_MAX, i4_sad_horz = INT_MAX, i4_sad_dc = INT_MAX, + i4_min_sad = INT_MAX; + UWORD8 val_u, val_v; + + WORD32 u4_dc_val[2][2][2]; /* ----------- + | | | Chroma can have four + | 00 | 01 | separate dc value... + ----------- u4_dc_val corresponds to this dc + values | | | with u4_dc_val[2][2][U] and + u4_dc_val[2][2][V] | 10 | 11 | + ----------- */ + left = (u4_n_avblty & LEFT_MB_AVAILABLE_MASK); + top = (u4_n_avblty & TOP_MB_AVAILABLE_MASK) >> 2; + + /*Evaluating HORZ*/ + if(left) /* Ifleft available*/ + { + i4_sad_horz = 0; + + for(i = 0; i < 8; i++) + { + val_v = pu1_ngbr_pels[15 - 2 * i]; + val_u = pu1_ngbr_pels[15 - 2 * i - 1]; + row = i / 4; + u4_dcval_u_l[row] += val_u; + u4_dcval_v_l[row] += val_v; + for(j = 0; j < 8; j++) + { + i4_sad_horz += ABS(val_u - pu1_src_temp[2 * j]); /* Finding SAD for HORZ mode*/ + i4_sad_horz += ABS(val_v - pu1_src_temp[2 * j + 1]); + } + + pu1_src_temp += src_strd; + } + u4_dcval_u_l[0] += 2; + u4_dcval_u_l[1] += 2; + u4_dcval_v_l[0] += 2; + u4_dcval_v_l[1] += 2; + } + + /*Evaluating VERT**/ + pu1_src_temp = pu1_src; + if(top) /* top available*/ + { + i4_sad_vert = 0; + + for(i = 0; i < 8; i++) + { + col = i / 4; + + val_u = pu1_ngbr_pels[18 + i * 2]; + val_v = pu1_ngbr_pels[18 + i * 2 + 1]; + u4_dcval_u_t[col] += val_u; + u4_dcval_v_t[col] += val_v; + + for(j = 0; j < 16; j++) + { + i4_sad_vert += + ABS(pu1_ngbr_pels[18 + j] - pu1_src_temp[j]); /* Finding SAD for VERT mode*/ + } + pu1_src_temp += src_strd; + } + u4_dcval_u_t[0] += 2; + u4_dcval_u_t[1] += 2; + u4_dcval_v_t[0] += 2; + u4_dcval_v_t[1] += 2; + } + + /* computing DC value*/ + /* Equation 8-128 in spec*/ + u4_dc_val[0][0][0] = (u4_dcval_u_l[0] + u4_dcval_u_t[0]) >> (1 + left + top); + u4_dc_val[0][0][1] = (u4_dcval_v_l[0] + u4_dcval_v_t[0]) >> (1 + left + top); + u4_dc_val[1][1][0] = (u4_dcval_u_l[1] + u4_dcval_u_t[1]) >> (1 + left + top); + u4_dc_val[1][1][1] = (u4_dcval_v_l[1] + u4_dcval_v_t[1]) >> (1 + left + top); + + if(top) + { + /* Equation 8-132 in spec*/ + u4_dc_val[0][1][0] = (u4_dcval_u_t[1]) >> (1 + top); + u4_dc_val[0][1][1] = (u4_dcval_v_t[1]) >> (1 + top); + } + else + { + u4_dc_val[0][1][0] = (u4_dcval_u_l[0]) >> (1 + left); + u4_dc_val[0][1][1] = (u4_dcval_v_l[0]) >> (1 + left); + } + + if(left) + { + u4_dc_val[1][0][0] = (u4_dcval_u_l[1]) >> (1 + left); + u4_dc_val[1][0][1] = (u4_dcval_v_l[1]) >> (1 + left); + } + else + { + u4_dc_val[1][0][0] = (u4_dcval_u_t[0]) >> (1 + top); + u4_dc_val[1][0][1] = (u4_dcval_v_t[0]) >> (1 + top); + } + + if(!(left || top)) + { + /*none available*/ + u4_dc_val[0][0][0] = u4_dc_val[0][0][1] = u4_dc_val[0][1][0] = u4_dc_val[0][1][1] = + u4_dc_val[1][0][0] = u4_dc_val[1][0][1] = u4_dc_val[1][1][0] = u4_dc_val[1][1][1] = 128; + } + + /* Evaluating DC */ + pu1_src_temp = pu1_src; + i4_sad_dc = 0; + for(i = 0; i < 8; i++) + { + for(j = 0; j < 8; j++) + { + col = j / 4; + row = i / 4; + val_u = u4_dc_val[row][col][0]; + val_v = u4_dc_val[row][col][1]; + + i4_sad_dc += ABS(val_u - pu1_src_temp[2 * j]); /* Finding SAD for DC mode*/ + i4_sad_dc += ABS(val_v - pu1_src_temp[2 * j + 1]); + } + pu1_src_temp += src_strd; + } + + if((u4_valid_intra_modes & 01) == 0) /* If DC is disabled*/ + i4_sad_dc = INT_MAX; + if((u4_valid_intra_modes & 02) == 0) /* If HORZ is disabled*/ + i4_sad_horz = INT_MAX; + if((u4_valid_intra_modes & 04) == 0) /* If VERT is disabled*/ + i4_sad_vert = INT_MAX; + + i4_min_sad = MIN3(i4_sad_horz, i4_sad_dc, i4_sad_vert); + + /* Finding Minimum sad and doing corresponding prediction*/ + if(i4_min_sad < *pu4_sadmin) + { + *pu4_sadmin = i4_min_sad; + + if(i4_min_sad == i4_sad_dc) + { + *u4_intra_mode = DC_CH_I8x8; + for(i = 0; i < 8; i++) + { + for(j = 0; j < 8; j++) + { + col = j / 4; + row = i / 4; + + pu1_dst[2 * j] = u4_dc_val[row][col][0]; + pu1_dst[2 * j + 1] = u4_dc_val[row][col][1]; + } + pu1_dst += dst_strd; + } + } + else if(i4_min_sad == i4_sad_horz) + { + *u4_intra_mode = HORZ_CH_I8x8; + for(j = 0; j < 8; j++) + { + val_v = pu1_ngbr_pels[15 - 2 * j]; + val_u = pu1_ngbr_pels[15 - 2 * j - 1]; + + for(i = 0; i < 8; i++) + { + pu1_dst[2 * i] = val_u; + pu1_dst[2 * i + 1] = val_v; + } + pu1_dst += dst_strd; + } + } + else + { + *u4_intra_mode = VERT_CH_I8x8; + pu1_neighbour = pu1_ngbr_pels + 18; + for(j = 0; j < 8; j++) + { + memcpy(pu1_dst, pu1_neighbour, MB_SIZE); + pu1_dst += dst_strd; + } + } + } + + return; +} diff --git a/encoder/svc/isvce_intra_modes_eval.h b/encoder/svc/isvce_intra_modes_eval.h new file mode 100644 index 0000000..00ba756 --- /dev/null +++ b/encoder/svc/isvce_intra_modes_eval.h @@ -0,0 +1,361 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** +******************************************************************************* +* @file +* isvce_intra_modes_eval.h +* +* @brief +* This file contains declarations of routines that perform rate distortion +* analysis on a macroblock if coded as intra. +* +* @author +* ittiam +* +* @remarks +* none +* +******************************************************************************* +*/ + +#ifndef _ISVCE_INTRA_MODES_EVAL_H_ +#define _ISVCE_INTRA_MODES_EVAL_H_ + +/** +****************************************************************************** +* +* @brief +* derivation process for subblock/partition availability +* +* @par Description +* Calculates the availability of the left, top, topright and topleft subblock +* or partitions. +* +* @param[in] ps_proc_ctxt +* pointer to macroblock context (handle) +* +* @param[in] i1_pel_pos_x +* column position of the pel wrt the current block +* +* @param[in] i1_pel_pos_y +* row position of the pel in wrt current block +* +* @remarks Assumptions: before calling this function it is assumed that +* the neighbor availability of the current macroblock is already derived. +* Based on table 6-3 of H264 specification +* +* @return availability status (yes or no) +* +****************************************************************************** +*/ +UWORD8 isvce_derive_ngbr_avbl_of_mb_partitions(block_neighbors_t *s_ngbr_avbl, WORD8 i1_pel_pos_x, + WORD8 i1_pel_pos_y); + +/** +****************************************************************************** +* +* @brief +* evaluate best intra 16x16 mode (rate distortion opt off) +* +* @par Description +* This function evaluates all the possible intra 16x16 modes and finds the mode +* that best represents the macro-block (least distortion) and occupies fewer +* bits in the bit-stream. +* +* @param[in] ps_proc_ctxt +* pointer to process context (handle) +* +* @remarks +* Ideally the cost of encoding a macroblock is calculated as +* (distortion + lambda*rate). Where distortion is SAD/SATD,... between the +* input block and the reconstructed block and rate is the number of bits taken +* to place the macroblock in the bit-stream. In this routine the rate does not +* exactly point to the total number of bits it takes, rather it points to header +* bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits +* and residual bits fall in to texture bits the number of bits taken to encoding +* mbtype is considered as rate, we compute cost. Further we will approximate +* the distortion as the deviation b/w input and the predicted block as opposed +* to input and reconstructed block. +* +* NOTE: As per the Document JVT-O079, for intra 16x16 macroblock, +* the SAD and cost are one and the same. +* +* @return none +* +****************************************************************************** +*/ +void isvce_evaluate_intra16x16_modes_for_least_cost_rdoptoff(isvce_process_ctxt_t *ps_proc_ctxt); + +/** +****************************************************************************** +* +* @brief +* evaluate best intra 8x8 mode (rate distortion opt on) +* +* @par Description +* This function evaluates all the possible intra 8x8 modes and finds the mode +* that best represents the macro-block (least distortion) and occupies fewer +* bits in the bit-stream. +* +* @param[in] ps_proc_ctxt +* pointer to proc ctxt +* +* @remarks Ideally the cost of encoding a macroblock is calculated as +* (distortion + lambda*rate). Where distortion is SAD/SATD,... between the +* input block and the reconstructed block and rate is the number of bits taken +* to place the macroblock in the bit-stream. In this routine the rate does not +* exactly point to the total number of bits it takes, rather it points to header +* bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits +* and residual bits fall in to texture bits the number of bits taken to encoding +* mbtype is considered as rate, we compute cost. Further we will approximate +* the distortion as the deviation b/w input and the predicted block as opposed +* to input and reconstructed block. +* +* NOTE: TODO: This function needs to be tested +* +* @return none +* +****************************************************************************** +*/ +void isvce_evaluate_intra8x8_modes_for_least_cost_rdoptoff(isvce_process_ctxt_t *ps_proc_ctxt); + +/** +****************************************************************************** +* +* @brief +* evaluate best intra 4x4 mode (rate distortion opt on) +* +* @par Description +* This function evaluates all the possible intra 4x4 modes and finds the mode +* that best represents the macro-block (least distortion) and occupies fewer +* bits in the bit-stream. +* +* @param[in] ps_proc_ctxt +* pointer to proc ctxt +* +* @remarks +* Ideally the cost of encoding a macroblock is calculated as +* (distortion + lambda*rate). Where distortion is SAD/SATD,... between the +* input block and the reconstructed block and rate is the number of bits taken +* to place the macroblock in the bit-stream. In this routine the rate does not +* exactly point to the total number of bits it takes, rather it points to header +* bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits +* and residual bits fall in to texture bits the number of bits taken to encoding +* mbtype is considered as rate, we compute cost. Further we will approximate +* the distortion as the deviation b/w input and the predicted block as opposed +* to input and reconstructed block. +* +* NOTE: As per the Document JVT-O079, for the whole intra 4x4 macroblock, +* 24*lambda is added to the SAD before comparison with the best SAD for +* inter prediction. This is an empirical value to prevent using too many intra +* blocks. +* +* @return none +* +****************************************************************************** +*/ +void isvce_evaluate_intra4x4_modes_for_least_cost_rdopton(isvce_process_ctxt_t *ps_proc_ctxt); + +/** +****************************************************************************** +* +* @brief +* evaluate best intra 4x4 mode (rate distortion opt off) +* +* @par Description +* This function evaluates all the possible intra 4x4 modes and finds the mode +* that best represents the macro-block (least distortion) and occupies fewer +* bits in the bit-stream. +* +* @param[in] ps_proc_ctxt +* pointer to proc ctxt +* +* @remarks +* Ideally the cost of encoding a macroblock is calculated as +* (distortion + lambda*rate). Where distortion is SAD/SATD,... between the +* input block and the reconstructed block and rate is the number of bits taken +* to place the macroblock in the bit-stream. In this routine the rate does not +* exactly point to the total number of bits it takes, rather it points to header +* bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits +* and residual bits fall in to texture bits the number of bits taken to encoding +* mbtype is considered as rate, we compute cost. Further we will approximate +* the distortion as the deviation b/w input and the predicted block as opposed +* to input and reconstructed block. +* +* NOTE: As per the Document JVT-O079, for the whole intra 4x4 macroblock, +* 24*lambda is added to the SAD before comparison with the best SAD for +* inter prediction. This is an empirical value to prevent using too many intra +* blocks. +* +* @return none +* +****************************************************************************** +*/ +void isvce_evaluate_intra4x4_modes_for_least_cost_rdoptoff(isvce_process_ctxt_t *ps_proc_ctxt); + +/** +****************************************************************************** +* +* @brief +* evaluate best chroma intra 8x8 mode (rate distortion opt off) +* +* @par Description +* This function evaluates all the possible chroma intra 8x8 modes and finds +* the mode that best represents the macroblock (least distortion) and occupies +* fewer bits in the bitstream. +* +* @param[in] ps_proc_ctxt +* pointer to macroblock context (handle) +* +* @remarks +* For chroma best intra pred mode is calculated based only on SAD +* +* @returns none +* +****************************************************************************** +*/ +void isvce_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff( + isvce_process_ctxt_t *ps_proc_ctxt); + +/** +****************************************************************************** +* +* @brief +* Evaluate best intra 16x16 mode (among VERT, HORZ and DC) and do the +* prediction. +* +* @par Description +* This function evaluates first three 16x16 modes and compute corresponding sad +* and return the buffer predicted with best mode. +* +* @param[in] pu1_src +* UWORD8 pointer to the source +* +* @param[in] pu1_ngbr_pels_i16 +* UWORD8 pointer to neighbouring pels +* +* @param[out] pu1_dst +* UWORD8 pointer to the destination +* +* @param[in] src_strd +* integer source stride +* +* @param[in] dst_strd +* integer destination stride +* +* @param[in] u4_n_avblty +* availability of neighbouring pixels +* +* @param[in] u4_intra_mode +* Pointer to the variable in which best mode is returned +* +* @param[in] pu4_sadmin +* Pointer to the variable in which minimum sad is returned +* +* @param[in] u4_valid_intra_modes +* Says what all modes are valid +* +* @returns none +* +****************************************************************************** +*/ +typedef void isvce_evaluate_intra_modes_ft(UWORD8 *pu1_src, UWORD8 *pu1_ngbr_pels_i16, + UWORD8 *pu1_dst, UWORD32 src_strd, UWORD32 dst_strd, + WORD32 u4_n_avblty, UWORD32 *u4_intra_mode, + WORD32 *pu4_sadmin, UWORD32 u4_valid_intra_modes); + +isvce_evaluate_intra_modes_ft isvce_evaluate_intra16x16_modes; +isvce_evaluate_intra_modes_ft isvce_evaluate_intra_chroma_modes; + +/* assembly */ +isvce_evaluate_intra_modes_ft isvce_evaluate_intra16x16_modes_a9q; +isvce_evaluate_intra_modes_ft isvce_evaluate_intra_chroma_modes_a9q; + +isvce_evaluate_intra_modes_ft isvce_evaluate_intra16x16_modes_av8; +isvce_evaluate_intra_modes_ft isvce_evaluate_intra_chroma_modes_av8; + +/* x86 intrinsics */ +isvce_evaluate_intra_modes_ft isvce_evaluate_intra16x16_modes_ssse3; +isvce_evaluate_intra_modes_ft isvce_evaluate_intra_chroma_modes_ssse3; + +/** +****************************************************************************** +* +* @brief +* Evaluate best intra 4x4 mode and perform prediction. +* +* @par Description +* This function evaluates 4x4 modes and compute corresponding sad +* and return the buffer predicted with best mode. +* +* @param[in] pu1_src +* UWORD8 pointer to the source +* +* @param[in] pu1_ngbr_pels +* UWORD8 pointer to neighbouring pels +* +* @param[out] pu1_dst +* UWORD8 pointer to the destination +* +* @param[in] src_strd +* integer source stride +* +* @param[in] dst_strd +* integer destination stride +* +* @param[in] u4_n_avblty +* availability of neighbouring pixels +* +* @param[in] u4_intra_mode +* Pointer to the variable in which best mode is returned +* +* @param[in] pu4_sadmin +* Pointer to the variable in which minimum cost is returned +* +* @param[in] u4_valid_intra_modes +* Says what all modes are valid +* +* @param[in] u4_lambda +* Lamda value for computing cost from SAD +* +* @param[in] u4_predictd_mode +* Predicted mode for cost computation +* +* @returns none +* +****************************************************************************** +*/ +typedef void isvce_evaluate_intra_4x4_modes_ft(UWORD8 *pu1_src, UWORD8 *pu1_ngbr_pels, + UWORD8 *pu1_dst, UWORD32 src_strd, UWORD32 dst_strd, + WORD32 u4_n_avblty, UWORD32 *u4_intra_mode, + WORD32 *pu4_sadmin, UWORD32 u4_valid_intra_modes, + UWORD32 u4_lambda, UWORD32 u4_predictd_mode); + +isvce_evaluate_intra_4x4_modes_ft isvce_evaluate_intra_4x4_modes; + +/* x86 intrinsics */ +isvce_evaluate_intra_4x4_modes_ft isvce_evaluate_intra_4x4_modes_ssse3; + +/* assembly */ +isvce_evaluate_intra_4x4_modes_ft isvce_evaluate_intra_4x4_modes_a9q; +isvce_evaluate_intra_4x4_modes_ft isvce_evaluate_intra_4x4_modes_av8; + +#endif diff --git a/encoder/svc/isvce_mc.c b/encoder/svc/isvce_mc.c new file mode 100644 index 0000000..0710545 --- /dev/null +++ b/encoder/svc/isvce_mc.c @@ -0,0 +1,480 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** + ******************************************************************************* + * @file + * isvce_mc.c + * + * @brief + * Contains definition of functions for motion compensation + * + * @author + * ittiam + * + * @par List of Functions: + * - isvce_motion_comp_luma() + * - isvce_motion_comp_chroma() + * + * @remarks + * None + * + ******************************************************************************* + */ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System include files */ +#include + +/* User include files */ +#include "ih264_typedefs.h" +#include "ih264_debug.h" +#include "isvc_defs.h" +#include "iv2.h" +#include "ive2.h" +#include "ime_distortion_metrics.h" +#include "ime_defs.h" +#include "ime_structs.h" +#include "isvc_structs.h" +#include "isvc_inter_pred_filters.h" +#include "isvc_mem_fns.h" +#include "ih264_padding.h" +#include "ih264_intra_pred_filters.h" +#include "ih264_deblk_edge_filters.h" +#include "isvc_trans_quant_itrans_iquant.h" +#include "isvc_cabac_tables.h" +#include "isvce_defs.h" +#include "ih264e_error.h" +#include "ih264e_bitstream.h" +#include "irc_cntrl_param.h" +#include "irc_frame_info_collector.h" +#include "isvce_rate_control.h" +#include "isvce_cabac_structs.h" +#include "isvce_structs.h" +#include "isvce_mc.h" +#include "ih264e_half_pel.h" +#include "isvce_ibl_eval.h" + +/*****************************************************************************/ +/* Function Definitions */ +/*****************************************************************************/ + +/** + ****************************************************************************** + * + * @brief + * performs motion compensation for a luma mb for the given mv. + * + * @par Description + * This routine performs motion compensation of an inter mb. When the inter + * mb mode is P16x16, there is no need to copy 16x16 unit from reference buffer + * to pred buffer. In this case the function returns pointer and stride of the + * ref. buffer and this info is used in place of pred buffer else where. + * In other cases, the pred buffer is populated via copy / filtering + copy + * (q pel cases) and returned. + * + * @param[in] ps_proc + * pointer to current proc ctxt + * + * @return none + * + * @remarks Assumes half pel buffers for the entire frame are populated. + * + ****************************************************************************** + */ +void isvce_motion_comp_luma(isvce_process_ctxt_t *ps_proc, buffer_container_t *ps_pred) +{ + /* codec context */ + isvce_codec_t *ps_codec = ps_proc->ps_codec; + + /* me ctxt */ + isvce_me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt; + + isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns; + inter_pred_fxns_t *ps_inter_pred_fxns = &ps_isa_dependent_fxns->s_inter_pred_fxns; + + /* Pointer to the structure having motion vectors, size and position of curr + * partitions */ + isvce_enc_pu_t *ps_curr_pu; + + /* pointers to full pel, half pel x, half pel y, half pel xy reference buffer + */ + UWORD8 *pu1_ref[4]; + + /* pred buffer ptr */ + UWORD8 *pu1_pred; + + /* strides of full pel, half pel x, half pel y, half pel xy reference buffer + */ + WORD32 i4_ref_strd[4]; + + /* pred buffer stride */ + WORD32 i4_pred_strd = ps_proc->i4_pred_strd; + + /* full pel motion vectors */ + WORD32 u4_mv_x_full, u4_mv_y_full; + + /* half pel motion vectors */ + WORD32 u4_mv_x_hpel, u4_mv_y_hpel; + + /* quarter pel motion vectors */ + WORD32 u4_mv_x_qpel, u4_mv_y_qpel; + + /* width & height of the partition */ + UWORD32 wd, ht; + + /* partition idx */ + UWORD32 u4_num_prtn; + + /* half / qpel coefficient */ + UWORD32 u4_subpel_factor; + + /* BIPRED Flag */ + WORD32 i4_bipred_flag; + + /* temp var */ + UWORD32 u4_lkup_idx1; + + if((ps_proc->ps_mb_info->u2_mb_type == BASE_MODE) && ps_proc->ps_mb_info->u1_is_intra) + { + svc_intra_pred_ctxt_t *ps_intra_pred_ctxt = ps_proc->ps_intra_pred_ctxt; + + ps_pred->pv_data = + (UWORD8 *) (ps_intra_pred_ctxt->s_intra_pred_outputs.s_pred_buf.as_component_bufs[Y] + .pv_data); + ps_pred->i4_data_stride = + ps_intra_pred_ctxt->s_intra_pred_outputs.s_pred_buf.as_component_bufs[Y].i4_data_stride; + + return; + } + + /* Init */ + i4_ref_strd[0] = ps_proc->as_ref_buf_props[0].as_component_bufs[0].i4_data_stride; + + i4_ref_strd[1] = i4_ref_strd[2] = i4_ref_strd[3] = ps_me_ctxt->u4_subpel_buf_strd; + + for(u4_num_prtn = 0; u4_num_prtn < ps_proc->u4_num_sub_partitions; u4_num_prtn++) + { + mv_t *ps_curr_mv; + + /* update ptr to curr partition */ + ps_curr_pu = ps_proc->ps_mb_info->as_pu + u4_num_prtn; + + /* Set no no bipred */ + i4_bipred_flag = 0; + + switch(ps_curr_pu->u1_pred_mode) + { + case PRED_L0: + ps_curr_mv = &ps_curr_pu->as_me_info[0].s_mv; + pu1_ref[0] = ps_proc->as_ref_buf_props[0].as_component_bufs[0].pv_data; + break; + + case PRED_L1: + ps_curr_mv = &ps_curr_pu->as_me_info[1].s_mv; + pu1_ref[0] = ps_proc->as_ref_buf_props[1].as_component_bufs[0].pv_data; + break; + + case PRED_BI: + /* + * In case of PRED_BI, we only need to ensure that + * the reference buffer that gets selected is + * ps_proc->pu1_best_subpel_buf + */ + + /* Dummy */ + ps_curr_mv = &ps_curr_pu->as_me_info[0].s_mv; + pu1_ref[0] = ps_proc->as_ref_buf_props[0].as_component_bufs[0].pv_data; + + i4_bipred_flag = 1; + break; + + default: + ps_curr_mv = &ps_curr_pu->as_me_info[0].s_mv; + pu1_ref[0] = ps_proc->as_ref_buf_props[0].as_component_bufs[0].pv_data; + break; + } + + /* get full pel mv's (full pel units) */ + u4_mv_x_full = ps_curr_mv->i2_mvx >> 2; + u4_mv_y_full = ps_curr_mv->i2_mvy >> 2; + + /* get half pel mv's */ + u4_mv_x_hpel = (ps_curr_mv->i2_mvx & 0x2) >> 1; + u4_mv_y_hpel = (ps_curr_mv->i2_mvy & 0x2) >> 1; + + /* get quarter pel mv's */ + u4_mv_x_qpel = (ps_curr_mv->i2_mvx & 0x1); + u4_mv_y_qpel = (ps_curr_mv->i2_mvy & 0x1); + + /* width and height of partition */ + wd = (ps_curr_pu->u1_wd_in_4x4_m1 + 1) << 2; + ht = (ps_curr_pu->u1_ht_in_4x4_m1 + 1) << 2; + + /* decision ? qpel/hpel, fpel */ + u4_subpel_factor = + (u4_mv_y_hpel << 3) + (u4_mv_x_hpel << 2) + (u4_mv_y_qpel << 1) + (u4_mv_x_qpel); + + /* Move ref to position given by MV */ + pu1_ref[0] += ((u4_mv_y_full * i4_ref_strd[0]) + u4_mv_x_full); + + /* Sub pel ptrs/ Biperd pointers init */ + pu1_ref[1] = ps_proc->pu1_best_subpel_buf; + i4_ref_strd[1] = ps_proc->u4_bst_spel_buf_strd; + + /* update pred buff ptr */ + pu1_pred = ps_proc->pu1_pred_mb + 4 * ps_curr_pu->u1_pos_y_in_4x4 * i4_pred_strd + + 4 * ps_curr_pu->u1_pos_x_in_4x4; + + /* u4_lkup_idx1 will be non zero for half pel and bipred */ + u4_lkup_idx1 = ((u4_subpel_factor >> 2) != 0) || i4_bipred_flag; + + { + /********************************************************************/ + /* if the block is P16x16 MB and mv are not quarter pel motion */ + /* vectors, there is no need to copy 16x16 unit from reference frame*/ + /* to pred buffer. We might as well send the reference frame buffer */ + /* pointer as pred buffer (ofc with updated stride) to fwd transform*/ + /* and inverse transform unit. */ + /********************************************************************/ + if(ps_proc->u4_num_sub_partitions == 1) + { + ps_pred->pv_data = pu1_ref[u4_lkup_idx1]; + ps_pred->i4_data_stride = i4_ref_strd[u4_lkup_idx1]; + } + /* + * Copying half pel or full pel to prediction buffer + * Currently ps_proc->u4_num_sub_partitions will always be 1 as we only + * support 16x16 in P mbs + */ + else + { + ps_inter_pred_fxns->pf_inter_pred_luma_copy(pu1_ref[u4_lkup_idx1], pu1_pred, + i4_ref_strd[u4_lkup_idx1], i4_pred_strd, + ht, wd, NULL, 0); + } + } + } +} + +/** + ****************************************************************************** + * + * @brief + * performs motion compensation for chroma mb + * + * @par Description + * Copies a MB of data from the reference buffer (Full pel, half pel or q pel) + * according to the motion vectors given + * + * @param[in] ps_proc + * pointer to current proc ctxt + * + * @return none + * + * @remarks Assumes half pel and quarter pel buffers for the entire frame are + * populated. + ****************************************************************************** + */ +void isvce_motion_comp_chroma(isvce_process_ctxt_t *ps_proc, buffer_container_t *ps_pred) +{ + /* codec context */ + isvce_codec_t *ps_codec = ps_proc->ps_codec; + isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns; + inter_pred_fxns_t *ps_inter_pred_fxns = &ps_isa_dependent_fxns->s_inter_pred_fxns; + + /* Pointer to the structure having motion vectors, size and position of curr + * partitions */ + isvce_enc_pu_t *ps_curr_pu; + + /* pointers to full pel, half pel x, half pel y, half pel xy reference buffer + */ + UWORD8 *pu1_ref; + + /* pred buffer ptr */ + UWORD8 *pu1_pred; + + /* strides of full pel reference buffer */ + WORD32 i4_ref_strd; + + /* pred buffer stride */ + WORD32 i4_pred_strd = ps_proc->i4_pred_strd; + + /* full pel motion vectors */ + WORD32 u4_mv_x_full, u4_mv_y_full; + + /* half pel motion vectors */ + WORD32 u4_mv_x_hpel, u4_mv_y_hpel; + + /* quarter pel motion vectors */ + WORD32 u4_mv_x_qpel, u4_mv_y_qpel; + + /* width & height of the partition */ + UWORD32 wd, ht; + + /* partition idx */ + UWORD32 u4_num_prtn; + + WORD32 u4_mv_x; + WORD32 u4_mv_y; + UWORD8 u1_dx, u1_dy; + + ASSERT(ps_proc->u4_num_sub_partitions <= ENC_MAX_PU_IN_MB); + + if((ps_proc->ps_mb_info->u2_mb_type == BASE_MODE) && ps_proc->ps_mb_info->u1_is_intra) + { + svc_intra_pred_ctxt_t *ps_intra_pred_ctxt = ps_proc->ps_intra_pred_ctxt; + + ps_pred->pv_data = + (UWORD8 *) (ps_intra_pred_ctxt->s_intra_pred_outputs.s_pred_buf.as_component_bufs[UV] + .pv_data); + ps_pred->i4_data_stride = + ps_intra_pred_ctxt->s_intra_pred_outputs.s_pred_buf.as_component_bufs[UV] + .i4_data_stride; + + return; + } + else + { + ps_pred->pv_data = ps_proc->pu1_pred_mb; + ps_pred->i4_data_stride = ps_proc->i4_pred_strd; + } + + for(u4_num_prtn = 0; u4_num_prtn < ps_proc->u4_num_sub_partitions; u4_num_prtn++) + { + mv_t *ps_curr_mv; + + ps_curr_pu = ps_proc->ps_mb_info->as_pu + u4_num_prtn; + + if(ps_curr_pu->u1_pred_mode != BI) + { + ps_curr_mv = &ps_curr_pu->as_me_info[ps_curr_pu->u1_pred_mode].s_mv; + pu1_ref = + ps_proc->as_ref_buf_props[ps_curr_pu->u1_pred_mode].as_component_bufs[1].pv_data; + i4_ref_strd = ps_proc->as_ref_buf_props[ps_curr_pu->u1_pred_mode] + .as_component_bufs[1] + .i4_data_stride; + + u4_mv_x = ps_curr_mv->i2_mvx >> 3; + u4_mv_y = ps_curr_mv->i2_mvy >> 3; + + /* corresponds to full pel motion vector in luma, but in chroma + * corresponds to pel formed wiith dx, dy =4 */ + u4_mv_x_full = (ps_curr_mv->i2_mvx & 0x4) >> 2; + u4_mv_y_full = (ps_curr_mv->i2_mvy & 0x4) >> 2; + + /* get half pel mv's */ + u4_mv_x_hpel = (ps_curr_mv->i2_mvx & 0x2) >> 1; + u4_mv_y_hpel = (ps_curr_mv->i2_mvy & 0x2) >> 1; + + /* get quarter pel mv's */ + u4_mv_x_qpel = (ps_curr_mv->i2_mvx & 0x1); + u4_mv_y_qpel = (ps_curr_mv->i2_mvy & 0x1); + + /* width and height of sub macro block */ + wd = (ps_curr_pu->u1_wd_in_4x4_m1 + 1) << 1; + ht = (ps_curr_pu->u1_ht_in_4x4_m1 + 1) << 1; + + /* move the pointers so that they point to the motion compensated + * locations */ + pu1_ref += ((u4_mv_y * i4_ref_strd) + (u4_mv_x << 1)); + + pu1_pred = ps_proc->pu1_pred_mb + 4 * ps_curr_pu->u1_pos_y_in_4x4 * i4_pred_strd + + 2 * ps_curr_pu->u1_pos_x_in_4x4; + + u1_dx = (u4_mv_x_full << 2) + (u4_mv_x_hpel << 1) + (u4_mv_x_qpel); + u1_dy = (u4_mv_y_full << 2) + (u4_mv_y_hpel << 1) + (u4_mv_y_qpel); + + /* cases where u1_dx = 0 or u1_dy = 0 are dealt separately in neon with + * separate functions for better performance + * + * isvc_inter_pred_chroma_dx_zero_a9q + * and + * isvc_inter_pred_chroma_dy_zero_a9q + */ + + ps_inter_pred_fxns->pf_inter_pred_chroma(pu1_ref, pu1_pred, i4_ref_strd, i4_pred_strd, + u1_dx, u1_dy, ht, wd); + } + else + { + /* + * We need to interpolate the L0 and L1 ref pics with the chorma MV + * then use them to average for bilinrar interpred + */ + WORD32 i4_predmode; + UWORD8 *pu1_ref_buf[2]; + + /* Temporary buffers to store the interpolated value from L0 and L1 */ + pu1_ref_buf[L0] = ps_proc->apu1_subpel_buffs[0]; + pu1_ref_buf[L1] = ps_proc->apu1_subpel_buffs[1]; + + for(i4_predmode = 0; i4_predmode < BI; i4_predmode++) + { + ps_curr_mv = &ps_curr_pu->as_me_info[i4_predmode].s_mv; + pu1_ref = ps_proc->as_ref_buf_props[i4_predmode].as_component_bufs[1].pv_data; + i4_ref_strd = + ps_proc->as_ref_buf_props[i4_predmode].as_component_bufs[1].i4_data_stride; + + u4_mv_x = ps_curr_mv->i2_mvx >> 3; + u4_mv_y = ps_curr_mv->i2_mvy >> 3; + + /* + * corresponds to full pel motion vector in luma, but in chroma + * corresponds to pel formed wiith dx, dy =4 + */ + u4_mv_x_full = (ps_curr_mv->i2_mvx & 0x4) >> 2; + u4_mv_y_full = (ps_curr_mv->i2_mvy & 0x4) >> 2; + + /* get half pel mv's */ + u4_mv_x_hpel = (ps_curr_mv->i2_mvx & 0x2) >> 1; + u4_mv_y_hpel = (ps_curr_mv->i2_mvy & 0x2) >> 1; + + /* get quarter pel mv's */ + u4_mv_x_qpel = (ps_curr_mv->i2_mvx & 0x1); + u4_mv_y_qpel = (ps_curr_mv->i2_mvy & 0x1); + + /* width and height of sub macro block */ + wd = (ps_curr_pu->u1_wd_in_4x4_m1 + 1) << 1; + ht = (ps_curr_pu->u1_ht_in_4x4_m1 + 1) << 1; + + /* move the pointers so that they point to the motion compensated + * locations */ + pu1_ref += ((u4_mv_y * i4_ref_strd) + (u4_mv_x << 1)); + + pu1_pred = ps_proc->pu1_pred_mb + 4 * ps_curr_pu->u1_pos_y_in_4x4 * i4_pred_strd + + 2 * ps_curr_pu->u1_pos_x_in_4x4; + + u1_dx = (u4_mv_x_full << 2) + (u4_mv_x_hpel << 1) + (u4_mv_x_qpel); + u1_dy = (u4_mv_y_full << 2) + (u4_mv_y_hpel << 1) + (u4_mv_y_qpel); + + ps_inter_pred_fxns->pf_inter_pred_chroma( + pu1_ref, pu1_ref_buf[i4_predmode], i4_ref_strd, MB_SIZE, u1_dx, u1_dy, ht, wd); + } + + ps_inter_pred_fxns->pf_inter_pred_luma_bilinear(pu1_ref_buf[L0], pu1_ref_buf[L1], + pu1_pred, MB_SIZE, MB_SIZE, + i4_pred_strd, MB_SIZE >> 1, MB_SIZE); + } + } +} diff --git a/encoder/svc/isvce_mc.h b/encoder/svc/isvce_mc.h new file mode 100644 index 0000000..fd2fd71 --- /dev/null +++ b/encoder/svc/isvce_mc.h @@ -0,0 +1,87 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** +******************************************************************************* +* @file +* isvce_mc.h +* +* @brief +* This file contains declarations of routines that perform motion compensation +* of luma and chroma macroblocks. +* +* @author +* ittiam +* +* @remarks +* none +* +******************************************************************************* +*/ +#ifndef _ISVCE_MC_H_ +#define _ISVCE_MC_H_ + +/** +****************************************************************************** +* +* @brief +* performs motion compensation for a luma mb for the given mv. +* +* @par Description +* This routine performs motion compensation of an inter mb. When the inter +* mb mode is P16x16, there is no need to copy 16x16 unit from reference buffer +* to pred buffer. In this case the function returns pointer and stride of the +* ref. buffer and this info is used in place of pred buffer else where. +* In other cases, the pred buffer is populated via copy / filtering + copy +* (q pel cases) and returned. +* +* @param[in] ps_proc +* pointer to current proc ctxt +* +* @return none +* +* @remarks Assumes half pel buffers for the entire frame are populated. +* +****************************************************************************** +*/ +extern void isvce_motion_comp_luma(isvce_process_ctxt_t *ps_proc, buffer_container_t *ps_pred); + +/** +****************************************************************************** +* +* @brief +* performs motion compensation for chroma mb +* +* @par Description +* Copies a MB of data from the reference buffer (Full pel, half pel or q pel) +* according to the motion vectors given +* +* @param[in] ps_proc +* pointer to current proc ctxt +* +* @return none +* +* @remarks Assumes half pel and quarter pel buffers for the entire frame are +* populated. +****************************************************************************** +*/ +extern void isvce_motion_comp_chroma(isvce_process_ctxt_t *ps_proc, buffer_container_t *ps_pred); + +#endif diff --git a/encoder/svc/isvce_me.c b/encoder/svc/isvce_me.c new file mode 100644 index 0000000..3e7fec7 --- /dev/null +++ b/encoder/svc/isvce_me.c @@ -0,0 +1,2924 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** + ******************************************************************************* + * @file + * isvce_me.c + * + * @brief + * Contains definition of functions for motion estimation + * + * @author + * ittiam + * + * @par List of Functions: + * - isvce_init_mv_bits() + * - isvce_skip_analysis_chroma() + * - isvce_skip_analysis_luma() + * - isvce_analyse_skip() + * - isvce_get_search_candidates() + * - isvce_find_skip_motion_vector() + * - isvce_get_mv_predictor() + * - isvce_mv_pred() + * - isvce_mv_pred_me() + * - isvce_init_me() + * - isvce_compute_me() + * - isvce_compute_me_nmb() + * + * @remarks + * None + * + ******************************************************************************* + */ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System include files */ +#include +#include +#include +#include + +/* User include files */ +#include "ih264_typedefs.h" +#include "ih264_macros.h" +#include "isvc_macros.h" +#include "ih264_platform_macros.h" +#include "iv2.h" +#include "ive2.h" +#include "ithread.h" +#include "ih264_platform_macros.h" +#include "isvc_defs.h" +#include "ime_defs.h" +#include "ime_distortion_metrics.h" +#include "ime_structs.h" +#include "isvc_structs.h" +#include "isvc_trans_quant_itrans_iquant.h" +#include "isvc_inter_pred_filters.h" +#include "isvc_mem_fns.h" +#include "ih264_padding.h" +#include "ih264_intra_pred_filters.h" +#include "ih264_deblk_edge_filters.h" +#include "isvc_cabac_tables.h" +#include "isvce_defs.h" +#include "ih264e_error.h" +#include "ih264e_bitstream.h" +#include "irc_cntrl_param.h" +#include "irc_frame_info_collector.h" +#include "isvce_rate_control.h" +#include "isvce_cabac_structs.h" +#include "isvce_structs.h" +#include "isvce_globals.h" +#include "isvce_me.h" +#include "ime.h" +#include "ih264_debug.h" +#include "ih264e_intra_modes_eval.h" +#include "isvce_core_coding.h" +#include "isvce_mc.h" +#include "ih264e_debug.h" +#include "ih264e_half_pel.h" +#include "ime_statistics.h" +#include "ih264e_platform_macros.h" +#include "isvce_defs.h" +#include "isvce_structs.h" +#include "isvce_ilp_mv_utils.h" +#include "isvce_utils.h" + +/*****************************************************************************/ +/* Function Definitions */ +/*****************************************************************************/ + +/** +******************************************************************************* +* +* @brief Diamond Search +* +* @par Description: +* This function computes the sad at vertices of several layers of diamond grid +* at a time. The number of layers of diamond grid that would be evaluated is +* configurable.The function computes the sad at vertices of a diamond grid. If +* the sad at the center of the diamond grid is lesser than the sad at any other +* point of the diamond grid, the function marks the candidate Mb partition as +* mv. +* +* @param[in] ps_mb_part +* pointer to current mb partition ctxt with respect to ME +* +* @param[in] ps_me_ctxt +* pointer to me context +* +* @param[in] u4_lambda_motion +* lambda motion +* +* @param[in] u4_enable_fast_sad +* enable/disable fast sad computation +* +* @returns mv pair & corresponding distortion and cost +* +* @remarks Diamond Srch, radius is 1 +* +******************************************************************************* +*/ +static void isvce_diamond_search_16x16(isvce_me_ctxt_t *ps_me_ctxt, WORD32 i4_reflist) +{ + /* MB partition info */ + mb_part_ctxt *ps_mb_part = &ps_me_ctxt->as_mb_part[i4_reflist]; + + /* lagrange parameter */ + UWORD32 u4_lambda_motion = ps_me_ctxt->u4_lambda_motion; + + /* srch range*/ + WORD32 i4_srch_range_n = ps_me_ctxt->i4_srch_range_n; + WORD32 i4_srch_range_s = ps_me_ctxt->i4_srch_range_s; + WORD32 i4_srch_range_e = ps_me_ctxt->i4_srch_range_e; + WORD32 i4_srch_range_w = ps_me_ctxt->i4_srch_range_w; + + /* pointer to src macro block */ + UWORD8 *pu1_curr_mb = ps_me_ctxt->pu1_src_buf_luma; + UWORD8 *pu1_ref_mb = ps_me_ctxt->apu1_ref_buf_luma[i4_reflist]; + + /* strides */ + WORD32 i4_src_strd = ps_me_ctxt->i4_src_strd; + WORD32 i4_ref_strd = ps_me_ctxt->ai4_rec_strd[i4_reflist]; + + /* least cost */ + WORD32 i4_cost_least = ps_mb_part->i4_mb_cost; + + /* least sad */ + WORD32 i4_distortion_least = ps_mb_part->i4_mb_distortion; + + /* mv pair */ + WORD16 i2_mvx, i2_mvy; + + /* mv bits */ + UWORD8 *pu1_mv_bits = ps_me_ctxt->pu1_mv_bits; + + /* temp var */ + WORD32 i4_cost[4]; + WORD32 i4_sad[4]; + UWORD8 *pu1_ref; + WORD16 i2_mv_u_x, i2_mv_u_y; + + /* Diamond search Iteration Max Cnt */ + WORD64 i8_num_layers = ps_me_ctxt->u4_num_layers; + + /* mv with best sad during initial evaluation */ + i2_mvx = ps_mb_part->s_mv_curr.i2_mvx; + i2_mvy = ps_mb_part->s_mv_curr.i2_mvy; + + i2_mv_u_x = i2_mvx; + i2_mv_u_y = i2_mvy; + + while(i8_num_layers--) + { + /* FIXME : is this the write way to check for out of bounds ? */ + if((i2_mvx - 1 < i4_srch_range_w) || (i2_mvx + 1 > i4_srch_range_e) || + (i2_mvy - 1 < i4_srch_range_n) || (i2_mvy + 1 > i4_srch_range_s)) + { + break; + } + + pu1_ref = pu1_ref_mb + i2_mvx + (i2_mvy * i4_ref_strd); + + ps_me_ctxt->pf_ime_compute_sad4_diamond(pu1_ref, pu1_curr_mb, i4_ref_strd, i4_src_strd, + i4_sad); + + DEBUG_SAD_HISTOGRAM_ADD(i4_sad[0], 2); + DEBUG_SAD_HISTOGRAM_ADD(i4_sad[1], 2); + DEBUG_SAD_HISTOGRAM_ADD(i4_sad[2], 2); + DEBUG_SAD_HISTOGRAM_ADD(i4_sad[3], 2); + + /* compute cost */ + i4_cost[0] = + i4_sad[0] + + u4_lambda_motion * (pu1_mv_bits[((i2_mvx - 1) << 2) - ps_mb_part->s_mv_pred.i2_mvx] + + pu1_mv_bits[(i2_mvy << 2) - ps_mb_part->s_mv_pred.i2_mvy]); + i4_cost[1] = + i4_sad[1] + + u4_lambda_motion * (pu1_mv_bits[((i2_mvx + 1) << 2) - ps_mb_part->s_mv_pred.i2_mvx] + + pu1_mv_bits[(i2_mvy << 2) - ps_mb_part->s_mv_pred.i2_mvy]); + i4_cost[2] = + i4_sad[2] + + u4_lambda_motion * (pu1_mv_bits[(i2_mvx << 2) - ps_mb_part->s_mv_pred.i2_mvx] + + pu1_mv_bits[((i2_mvy - 1) << 2) - ps_mb_part->s_mv_pred.i2_mvy]); + i4_cost[3] = + i4_sad[3] + + u4_lambda_motion * (pu1_mv_bits[(i2_mvx << 2) - ps_mb_part->s_mv_pred.i2_mvx] + + pu1_mv_bits[((i2_mvy + 1) << 2) - ps_mb_part->s_mv_pred.i2_mvy]); + + if(i4_cost_least > i4_cost[0]) + { + i4_cost_least = i4_cost[0]; + i4_distortion_least = i4_sad[0]; + + i2_mv_u_x = (i2_mvx - 1); + i2_mv_u_y = i2_mvy; + } + + if(i4_cost_least > i4_cost[1]) + { + i4_cost_least = i4_cost[1]; + i4_distortion_least = i4_sad[1]; + + i2_mv_u_x = (i2_mvx + 1); + i2_mv_u_y = i2_mvy; + } + + if(i4_cost_least > i4_cost[2]) + { + i4_cost_least = i4_cost[2]; + i4_distortion_least = i4_sad[2]; + + i2_mv_u_x = i2_mvx; + i2_mv_u_y = i2_mvy - 1; + } + + if(i4_cost_least > i4_cost[3]) + { + i4_cost_least = i4_cost[3]; + i4_distortion_least = i4_sad[3]; + + i2_mv_u_x = i2_mvx; + i2_mv_u_y = i2_mvy + 1; + } + + if((i2_mv_u_x == i2_mvx) && (i2_mv_u_y == i2_mvy)) + { + ps_mb_part->u4_exit = 1; + break; + } + else + { + i2_mvx = i2_mv_u_x; + i2_mvy = i2_mv_u_y; + } + } + + if(i4_cost_least < ps_mb_part->i4_mb_cost) + { + ps_mb_part->i4_mb_cost = i4_cost_least; + ps_mb_part->i4_mb_distortion = i4_distortion_least; + ps_mb_part->s_mv_curr.i2_mvx = i2_mvx; + ps_mb_part->s_mv_curr.i2_mvy = i2_mvy; + } +} + +/** +******************************************************************************* +* +* @brief This function computes the best motion vector among the tentative mv +* candidates chosen. +* +* @par Description: +* This function determines the position in the search window at which the +*motion estimation should begin in order to minimise the number of search +*iterations. +* +* @param[in] ps_mb_part +* pointer to current mb partition ctxt with respect to ME +* +* @param[in] u4_lambda_motion +* lambda motion +* +* @param[in] u4_fast_flag +* enable/disable fast sad computation +* +* @returns mv pair & corresponding distortion and cost +* +* @remarks none +* +******************************************************************************* +*/ + +static void isvce_evaluate_init_srchposn_16x16(isvce_me_ctxt_t *ps_me_ctxt, WORD32 i4_reflist) +{ + UWORD32 u4_lambda_motion = ps_me_ctxt->u4_lambda_motion; + + /* candidate mv cnt */ + UWORD32 u4_num_candidates = ps_me_ctxt->u4_num_candidates[i4_reflist]; + + /* list of candidate mvs */ + ime_mv_t *ps_mv_list = ps_me_ctxt->as_mv_init_search[i4_reflist]; + + /* pointer to src macro block */ + UWORD8 *pu1_curr_mb = ps_me_ctxt->pu1_src_buf_luma; + UWORD8 *pu1_ref_mb = ps_me_ctxt->apu1_ref_buf_luma[i4_reflist]; + + /* strides */ + WORD32 i4_src_strd = ps_me_ctxt->i4_src_strd; + WORD32 i4_ref_strd = ps_me_ctxt->ai4_rec_strd[i4_reflist]; + + /* enabled fast sad computation */ + UWORD32 u4_enable_fast_sad = ps_me_ctxt->u4_enable_fast_sad; + + /* SAD(distortion metric) of an 8x8 block */ + WORD32 i4_mb_distortion; + + /* cost = distortion + u4_lambda_motion * rate */ + WORD32 i4_mb_cost, i4_mb_cost_least = INT_MAX, i4_distortion_least = INT_MAX; + + /* mb partitions info */ + mb_part_ctxt *ps_mb_part = &(ps_me_ctxt->as_mb_part[i4_reflist]); + + /* mv bits */ + UWORD8 *pu1_mv_bits = ps_me_ctxt->pu1_mv_bits; + + /* temp var */ + UWORD32 i, j; + WORD32 i4_srch_pos_idx = 0; + UWORD8 *pu1_ref = NULL; + + /* Carry out a search using each of the motion vector pairs identified above + * as predictors. */ + /* TODO : Just like Skip, Do we need to add any bias to zero mv as well */ + for(i = 0; i < u4_num_candidates; i++) + { + /* compute sad */ + WORD32 c_sad = 1; + + for(j = 0; j < i; j++) + { + if((ps_mv_list[i].i2_mvx == ps_mv_list[j].i2_mvx) && + (ps_mv_list[i].i2_mvy == ps_mv_list[j].i2_mvy)) + { + c_sad = 0; + break; + } + } + if(c_sad) + { + /* adjust ref pointer */ + pu1_ref = pu1_ref_mb + ps_mv_list[i].i2_mvx + (ps_mv_list[i].i2_mvy * i4_ref_strd); + + /* compute distortion */ + ps_me_ctxt->pf_ime_compute_sad_16x16[u4_enable_fast_sad]( + pu1_curr_mb, pu1_ref, i4_src_strd, i4_ref_strd, i4_mb_cost_least, + &i4_mb_distortion); + + DEBUG_SAD_HISTOGRAM_ADD(i4_mb_distortion, 3); + /* compute cost */ + i4_mb_cost = + i4_mb_distortion + + u4_lambda_motion * + (pu1_mv_bits[(ps_mv_list[i].i2_mvx << 2) - ps_mb_part->s_mv_pred.i2_mvx] + + pu1_mv_bits[(ps_mv_list[i].i2_mvy << 2) - ps_mb_part->s_mv_pred.i2_mvy]); + + if(i4_mb_cost < i4_mb_cost_least) + { + i4_mb_cost_least = i4_mb_cost; + + i4_distortion_least = i4_mb_distortion; + + i4_srch_pos_idx = i; + } + } + } + + if(i4_mb_cost_least < ps_mb_part->i4_mb_cost) + { + ps_mb_part->i4_srch_pos_idx = i4_srch_pos_idx; + ps_mb_part->i4_mb_cost = i4_mb_cost_least; + ps_mb_part->i4_mb_distortion = i4_distortion_least; + ps_mb_part->s_mv_curr.i2_mvx = ps_mv_list[i4_srch_pos_idx].i2_mvx; + ps_mb_part->s_mv_curr.i2_mvy = ps_mv_list[i4_srch_pos_idx].i2_mvy; + } +} + +/** +******************************************************************************* +* +* @brief Searches for the best matching full pixel predictor within the search +* range +* +* @par Description: +* This function begins by computing the mv predict vector for the current mb. +* This is used for cost computations. Further basing on the algo. chosen, it +* looks through a set of candidate vectors that best represent the mb a least +* cost and returns this information. +* +* @param[in] ps_proc +* pointer to current proc ctxt +* +* @param[in] ps_me_ctxt +* pointer to me context +* +* @returns mv pair & corresponding distortion and cost +* +* @remarks none +* +******************************************************************************* +*/ +static void isvce_full_pel_motion_estimation_16x16(isvce_me_ctxt_t *ps_me_ctxt, WORD32 i4_ref_list) +{ + /* mb part info */ + mb_part_ctxt *ps_mb_part = &ps_me_ctxt->as_mb_part[i4_ref_list]; + + /******************************************************************/ + /* Modify Search range about initial candidate instead of zero mv */ + /******************************************************************/ + /* + * FIXME: The motion vectors in a way can become unbounded. It may so happen + * that MV might exceed the limit of the profile configured. + */ + ps_me_ctxt->i4_srch_range_w = + MAX(ps_me_ctxt->i4_srch_range_w, + -ps_me_ctxt->ai2_srch_boundaries[0] + ps_mb_part->s_mv_curr.i2_mvx); + ps_me_ctxt->i4_srch_range_e = + MIN(ps_me_ctxt->i4_srch_range_e, + ps_me_ctxt->ai2_srch_boundaries[0] + ps_mb_part->s_mv_curr.i2_mvx); + ps_me_ctxt->i4_srch_range_n = + MAX(ps_me_ctxt->i4_srch_range_n, + -ps_me_ctxt->ai2_srch_boundaries[1] + ps_mb_part->s_mv_curr.i2_mvy); + ps_me_ctxt->i4_srch_range_s = + MIN(ps_me_ctxt->i4_srch_range_s, + ps_me_ctxt->ai2_srch_boundaries[1] + ps_mb_part->s_mv_curr.i2_mvy); + + /************************************************************/ + /* Traverse about best initial candidate for mv */ + /************************************************************/ + + switch(ps_me_ctxt->u4_me_speed_preset) + { + case DMND_SRCH: + isvce_diamond_search_16x16(ps_me_ctxt, i4_ref_list); + break; + default: + assert(0); + break; + } +} + +/** +******************************************************************************* +* +* @brief Searches for the best matching sub pixel predictor within the search +* range +* +* @par Description: +* This function begins by searching across all sub pixel sample points +* around the full pel motion vector. The vector with least cost is chosen as +* the mv for the current mb. If the skip mode is not evaluated while analysing +* the initial search candidates then analyse it here and update the mv. +* +* @param[in] ps_proc +* pointer to current proc ctxt +* +* @param[in] ps_me_ctxt +* pointer to me context +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +static void isvce_sub_pel_motion_estimation_16x16(isvce_me_ctxt_t *ps_me_ctxt, WORD32 i4_reflist) +{ + /* pointers to src & ref macro block */ + UWORD8 *pu1_curr_mb = ps_me_ctxt->pu1_src_buf_luma; + + /* pointers to ref. half pel planes */ + UWORD8 *pu1_ref_mb_half_x; + UWORD8 *pu1_ref_mb_half_y; + UWORD8 *pu1_ref_mb_half_xy; + + /* pointers to ref. half pel planes */ + UWORD8 *pu1_ref_mb_half_x_temp; + UWORD8 *pu1_ref_mb_half_y_temp; + UWORD8 *pu1_ref_mb_half_xy_temp; + + /* strides */ + WORD32 i4_src_strd = ps_me_ctxt->i4_src_strd; + + WORD32 i4_ref_strd = ps_me_ctxt->u4_subpel_buf_strd; + + /* mb partitions info */ + mb_part_ctxt *ps_mb_part = &ps_me_ctxt->as_mb_part[i4_reflist]; + + /* SAD(distortion metric) of an mb */ + WORD32 i4_mb_distortion; + WORD32 i4_distortion_least = ps_mb_part->i4_mb_distortion; + + /* cost = distortion + u4_lambda_motion * rate */ + WORD32 i4_mb_cost; + WORD32 i4_mb_cost_least = ps_mb_part->i4_mb_cost; + + /*Best half pel buffer*/ + UWORD8 *pu1_best_hpel_buf = NULL; + + /* mv bits */ + UWORD8 *pu1_mv_bits = ps_me_ctxt->pu1_mv_bits; + + /* Motion vectors in full-pel units */ + WORD16 mv_x, mv_y; + + /* lambda - lagrange constant */ + UWORD32 u4_lambda_motion = ps_me_ctxt->u4_lambda_motion; + + /* Flags to check if half pel points needs to be evaluated */ + /**************************************/ + /* 1 bit for each half pel candidate */ + /* bit 0 - half x = 1, half y = 0 */ + /* bit 1 - half x = -1, half y = 0 */ + /* bit 2 - half x = 0, half y = 1 */ + /* bit 3 - half x = 0, half y = -1 */ + /* bit 4 - half x = 1, half y = 1 */ + /* bit 5 - half x = -1, half y = 1 */ + /* bit 6 - half x = 1, half y = -1 */ + /* bit 7 - half x = -1, half y = -1 */ + /**************************************/ + /* temp var */ + WORD16 i2_mv_u_x, i2_mv_u_y; + WORD32 i, j; + WORD32 ai4_sad[8]; + + WORD32 i4_srch_pos_idx = ps_mb_part->i4_srch_pos_idx; + + i2_mv_u_x = ps_mb_part->s_mv_curr.i2_mvx; + i2_mv_u_y = ps_mb_part->s_mv_curr.i2_mvy; + + /************************************************************/ + /* Evaluate half pel */ + /************************************************************/ + mv_x = ps_mb_part->s_mv_curr.i2_mvx >> 2; + mv_y = ps_mb_part->s_mv_curr.i2_mvy >> 2; + + /**************************************************************/ + /* ps_me_ctxt->pu1_half_x points to the half pel pixel on the */ + /* left side of full pel */ + /* ps_me_ctxt->pu1_half_y points to the half pel pixel on the */ + /* top side of full pel */ + /* ps_me_ctxt->pu1_half_xy points to the half pel pixel */ + /* on the top left side of full pel */ + /* for the function pf_ime_sub_pel_compute_sad_16x16 the */ + /* default postions are */ + /* ps_me_ctxt->pu1_half_x = right halp_pel */ + /* ps_me_ctxt->pu1_half_y = bottom halp_pel */ + /* ps_me_ctxt->pu1_half_xy = bottom right halp_pel */ + /* Hence corresponding adjustments made here */ + /**************************************************************/ + + pu1_ref_mb_half_x_temp = pu1_ref_mb_half_x = ps_me_ctxt->apu1_subpel_buffs[0] + 1; + pu1_ref_mb_half_y_temp = pu1_ref_mb_half_y = ps_me_ctxt->apu1_subpel_buffs[1] + 1 + i4_ref_strd; + pu1_ref_mb_half_xy_temp = pu1_ref_mb_half_xy = + ps_me_ctxt->apu1_subpel_buffs[2] + 1 + i4_ref_strd; + + ps_me_ctxt->pf_ime_sub_pel_compute_sad_16x16(pu1_curr_mb, pu1_ref_mb_half_x, pu1_ref_mb_half_y, + pu1_ref_mb_half_xy, i4_src_strd, i4_ref_strd, + ai4_sad); + + /* Half x plane */ + for(i = 0; i < 2; i++) + { + WORD32 mv_x_tmp = (mv_x << 2) + 2; + WORD32 mv_y_tmp = (mv_y << 2); + + mv_x_tmp -= (i * 4); + + i4_mb_distortion = ai4_sad[i]; + + /* compute cost */ + i4_mb_cost = i4_mb_distortion + + u4_lambda_motion * (pu1_mv_bits[mv_x_tmp - ps_mb_part->s_mv_pred.i2_mvx] + + pu1_mv_bits[mv_y_tmp - ps_mb_part->s_mv_pred.i2_mvy]); + + if(i4_mb_cost < i4_mb_cost_least) + { + i4_mb_cost_least = i4_mb_cost; + + i4_distortion_least = i4_mb_distortion; + + i2_mv_u_x = mv_x_tmp; + + i2_mv_u_y = mv_y_tmp; + + ps_me_ctxt->apu1_subpel_buffs[0] = pu1_ref_mb_half_x_temp - i; + pu1_best_hpel_buf = pu1_ref_mb_half_x_temp - i; + + i4_srch_pos_idx = 0; + } + } + + /* Half y plane */ + for(i = 0; i < 2; i++) + { + WORD32 mv_x_tmp = (mv_x << 2); + WORD32 mv_y_tmp = (mv_y << 2) + 2; + + mv_y_tmp -= (i * 4); + + i4_mb_distortion = ai4_sad[2 + i]; + + /* compute cost */ + i4_mb_cost = i4_mb_distortion + + u4_lambda_motion * (pu1_mv_bits[mv_x_tmp - ps_mb_part->s_mv_pred.i2_mvx] + + pu1_mv_bits[mv_y_tmp - ps_mb_part->s_mv_pred.i2_mvy]); + + if(i4_mb_cost < i4_mb_cost_least) + { + i4_mb_cost_least = i4_mb_cost; + + i4_distortion_least = i4_mb_distortion; + + i2_mv_u_x = mv_x_tmp; + + i2_mv_u_y = mv_y_tmp; + + ps_me_ctxt->apu1_subpel_buffs[1] = pu1_ref_mb_half_y_temp - i * (i4_ref_strd); + pu1_best_hpel_buf = pu1_ref_mb_half_y_temp - i * (i4_ref_strd); + + i4_srch_pos_idx = 1; + } + } + + /* Half xy plane */ + for(j = 0; j < 2; j++) + { + for(i = 0; i < 2; i++) + { + WORD32 mv_x_tmp = (mv_x << 2) + 2; + WORD32 mv_y_tmp = (mv_y << 2) + 2; + + mv_x_tmp -= (i * 4); + mv_y_tmp -= (j * 4); + + i4_mb_distortion = ai4_sad[4 + i + 2 * j]; + + /* compute cost */ + i4_mb_cost = i4_mb_distortion + + u4_lambda_motion * (pu1_mv_bits[mv_x_tmp - ps_mb_part->s_mv_pred.i2_mvx] + + pu1_mv_bits[mv_y_tmp - ps_mb_part->s_mv_pred.i2_mvy]); + + if(i4_mb_cost < i4_mb_cost_least) + { + i4_mb_cost_least = i4_mb_cost; + + i4_distortion_least = i4_mb_distortion; + + i2_mv_u_x = mv_x_tmp; + + i2_mv_u_y = mv_y_tmp; + + ps_me_ctxt->apu1_subpel_buffs[2] = pu1_ref_mb_half_xy_temp - j * (i4_ref_strd) -i; + pu1_best_hpel_buf = pu1_ref_mb_half_xy_temp - j * (i4_ref_strd) -i; + + i4_srch_pos_idx = 2; + } + } + } + + if(i4_mb_cost_least < ps_mb_part->i4_mb_cost) + { + ps_mb_part->i4_mb_cost = i4_mb_cost_least; + ps_mb_part->i4_mb_distortion = i4_distortion_least; + ps_mb_part->s_mv_curr.i2_mvx = i2_mv_u_x; + ps_mb_part->s_mv_curr.i2_mvy = i2_mv_u_y; + ps_mb_part->pu1_best_hpel_buf = pu1_best_hpel_buf; + ps_mb_part->i4_srch_pos_idx = i4_srch_pos_idx; + } +} + +/** +******************************************************************************* +* +* @brief This function computes cost of skip macroblocks +* +* @par Description: +* +* @param[in] ps_me_ctxt +* pointer to me ctxt +* +* +* @returns none +* +* @remarks +* NOTE: while computing the skip cost, do not enable early exit from compute +* sad function because, a negative bias gets added later +* Note tha the last ME candidate in me ctxt is taken as skip motion vector +* +******************************************************************************* +*/ +static void isvce_compute_skip_cost(isvce_me_ctxt_t *ps_me_ctxt, ime_mv_t *ps_skip_mv, + mb_part_ctxt *ps_smb_part_info, UWORD32 u4_use_stat_sad, + WORD32 i4_reflist, WORD32 i4_is_slice_type_b) +{ + /* SAD(distortion metric) of an mb */ + WORD32 i4_mb_distortion; + + /* cost = distortion + u4_lambda_motion * rate */ + WORD32 i4_mb_cost; + + /* temp var */ + UWORD8 *pu1_ref = NULL; + + ime_mv_t s_skip_mv; + + s_skip_mv.i2_mvx = (ps_skip_mv->i2_mvx + 2) >> 2; + s_skip_mv.i2_mvy = (ps_skip_mv->i2_mvy + 2) >> 2; + + /* Check if the skip mv is out of bounds or subpel */ + { + /* skip mv */ + ime_mv_t s_clip_skip_mv; + + s_clip_skip_mv.i2_mvx = + CLIP3(ps_me_ctxt->i4_srch_range_w, ps_me_ctxt->i4_srch_range_e, s_skip_mv.i2_mvx); + s_clip_skip_mv.i2_mvy = + CLIP3(ps_me_ctxt->i4_srch_range_n, ps_me_ctxt->i4_srch_range_s, s_skip_mv.i2_mvy); + + if((s_clip_skip_mv.i2_mvx != s_skip_mv.i2_mvx) || + (s_clip_skip_mv.i2_mvy != s_skip_mv.i2_mvy) || (ps_skip_mv->i2_mvx & 0x3) || + (ps_skip_mv->i2_mvy & 0x3)) + { + return; + } + } + + /* adjust ref pointer */ + pu1_ref = ps_me_ctxt->apu1_ref_buf_luma[i4_reflist] + s_skip_mv.i2_mvx + + (s_skip_mv.i2_mvy * ps_me_ctxt->ai4_rec_strd[i4_reflist]); + + if(u4_use_stat_sad == 1) + { + UWORD32 u4_is_nonzero; + + ps_me_ctxt->pf_ime_compute_sad_stat_luma_16x16( + ps_me_ctxt->pu1_src_buf_luma, pu1_ref, ps_me_ctxt->i4_src_strd, + ps_me_ctxt->ai4_rec_strd[i4_reflist], ps_me_ctxt->pu2_sad_thrsh, &i4_mb_distortion, + &u4_is_nonzero); + + if(u4_is_nonzero == 0 || i4_mb_distortion <= ps_me_ctxt->i4_min_sad) + { + ps_me_ctxt->u4_min_sad_reached = 1; /* found min sad */ + ps_me_ctxt->i4_min_sad = (u4_is_nonzero == 0) ? 0 : i4_mb_distortion; + } + } + else + { + ps_me_ctxt->pf_ime_compute_sad_16x16[ps_me_ctxt->u4_enable_fast_sad]( + ps_me_ctxt->pu1_src_buf_luma, pu1_ref, ps_me_ctxt->i4_src_strd, + ps_me_ctxt->ai4_rec_strd[i4_reflist], INT_MAX, &i4_mb_distortion); + + if(i4_mb_distortion <= ps_me_ctxt->i4_min_sad) + { + ps_me_ctxt->i4_min_sad = i4_mb_distortion; + ps_me_ctxt->u4_min_sad_reached = 1; /* found min sad */ + } + } + + /* for skip mode cost & distortion are identical + * But we shall add a bias to favor skip mode. + * Doc. JVT B118 Suggests SKIP_BIAS as 16. + * TODO : Empirical analysis of SKIP_BIAS is necessary */ + + i4_mb_cost = i4_mb_distortion - + (ps_me_ctxt->u4_lambda_motion * + (ps_me_ctxt->i4_skip_bias[0] + ps_me_ctxt->i4_skip_bias[1] * i4_is_slice_type_b)); + + if(i4_mb_cost <= ps_smb_part_info->i4_mb_cost) + { + ps_smb_part_info->i4_mb_cost = i4_mb_cost; + ps_smb_part_info->i4_mb_distortion = i4_mb_distortion; + ps_smb_part_info->s_mv_curr.i2_mvx = s_skip_mv.i2_mvx; + ps_smb_part_info->s_mv_curr.i2_mvy = s_skip_mv.i2_mvy; + } +} + +/** +******************************************************************************* +* +* @brief +* This function populates the length of the codewords for motion vectors in the +* range (-search range, search range) in pixels +* +* @param[in] ps_me +* Pointer to me ctxt +* +* @param[out] pu1_mv_bits +* length of the codeword for all mv's +* +* @remarks The length of the code words are derived from signed exponential +* goloumb codes. +* +******************************************************************************* +*/ +void isvce_init_mv_bits(isvce_me_ctxt_t *ps_me_ctxt) +{ + /* temp var */ + WORD32 i, codesize = 3, diff, limit; + UWORD32 u4_code_num, u4_range; + UWORD32 u4_uev_min, u4_uev_max, u4_sev_min, u4_sev_max; + + /* max srch range */ + diff = MAX(DEFAULT_MAX_SRCH_RANGE_X, DEFAULT_MAX_SRCH_RANGE_Y); + /* sub pel */ + diff <<= 2; + /* delta mv */ + diff <<= 1; + + /* codeNum for positive integer = 2x-1 : Table9-3 */ + u4_code_num = (diff << 1); + + /* get range of the bit string and put using put_bits() */ + GETRANGE(u4_range, u4_code_num); + + limit = 2 * u4_range - 1; + + /* init mv bits */ + ps_me_ctxt->pu1_mv_bits[0] = 1; + + while(codesize < limit) + { + u4_uev_min = (1 << (codesize >> 1)); + u4_uev_max = 2 * u4_uev_min - 1; + + u4_sev_min = u4_uev_min >> 1; + u4_sev_max = u4_uev_max >> 1; + + DEBUG("\n%d min, %d max %d codesize", u4_sev_min, u4_sev_max, codesize); + + for(i = u4_sev_min; i <= (WORD32) u4_sev_max; i++) + { + ps_me_ctxt->pu1_mv_bits[-i] = ps_me_ctxt->pu1_mv_bits[i] = codesize; + } + + codesize += 2; + } +} + +/** +******************************************************************************* +* +* @brief Adds valid MVs as initial search candidates for motion estimation by +* cheking if it is distinct or not. +* +* @param[in] ps_search_cand +* MV to add as search candidate +* +* @param[in] ps_me_ctxt +* pointer to ME context +* +* @param[in] u4_num_candidates +* Number of inital search candidates value +* +******************************************************************************* +*/ +static FORCEINLINE void isvce_add_me_init_search_cands(mv_t *ps_search_cand, + isvce_me_ctxt_t *ps_me_ctxt, + WORD32 i4_reflist, + UWORD32 *u4_num_candidates, + bool b_is_max_mv_diff_lt_4) +{ + WORD32 k; + WORD32 i4_mv_x, i4_mv_y; + + bool b_is_mv_identical = false; + + WORD32 i4_srch_range_n = ps_me_ctxt->i4_srch_range_n; + WORD32 i4_srch_range_s = ps_me_ctxt->i4_srch_range_s; + WORD32 i4_srch_range_e = ps_me_ctxt->i4_srch_range_e; + WORD32 i4_srch_range_w = ps_me_ctxt->i4_srch_range_w; + UWORD32 u4_num_init_search_cands = u4_num_candidates[0]; + + i4_mv_x = (ps_search_cand->i2_mvx + 2) >> 2; + i4_mv_y = (ps_search_cand->i2_mvy + 2) >> 2; + + i4_mv_x = CLIP3(i4_srch_range_w, i4_srch_range_e, i4_mv_x); + i4_mv_y = CLIP3(i4_srch_range_n, i4_srch_range_s, i4_mv_y); + + if(u4_num_init_search_cands == 0) + { + b_is_mv_identical = false; + } + else + { + for(k = u4_num_init_search_cands - 1; k >= 0; k--) + { + if((ps_me_ctxt->as_mv_init_search[i4_reflist][k].i2_mvx == i4_mv_x && + ps_me_ctxt->as_mv_init_search[i4_reflist][k].i2_mvy == i4_mv_y)) + { + b_is_mv_identical = true; + } + } + } + + if(!b_is_mv_identical) + { + if(USE_ILP_MV_IN_ME && ps_me_ctxt->ps_ilp_me_cands) + { + if(ps_me_ctxt->ps_ilp_me_cands->u4_num_ilp_mvs < 2 || b_is_max_mv_diff_lt_4) + { + if(u4_num_init_search_cands < MAX_CAND_IF_NUM_ILP_MV_LT_2) + { + ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_init_search_cands].i2_mvx = + i4_mv_x; + ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_init_search_cands].i2_mvy = + i4_mv_y; + + u4_num_candidates[0] += 1; + } + } + else if(ps_me_ctxt->ps_ilp_me_cands->u4_num_ilp_mvs >= 2 && !b_is_max_mv_diff_lt_4) + { + if(u4_num_init_search_cands < MAX_CAND_IF_NUM_ILP_MV_GTEQ_2) + { + ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_init_search_cands].i2_mvx = + i4_mv_x; + ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_init_search_cands].i2_mvy = + i4_mv_y; + + u4_num_candidates[0] += 1; + } + } + } + else + { + ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_init_search_cands].i2_mvx = i4_mv_x; + ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_init_search_cands].i2_mvy = i4_mv_y; + + u4_num_candidates[0] += 1; + } + } +} + +/** +******************************************************************************* +* +* @brief Determines the valid candidates for which the initial search shall +*happen. The best of these candidates is used to center the diamond pixel +*search. +* +* @par Description: The function sends the skip, (0,0), left, top and top-right +* neighbouring MBs MVs. The left, top and top-right MBs MVs are used because +* these are the same MVs that are used to form the MV predictor. This initial MV +* search candidates need not take care of slice boundaries and hence neighbor +* availability checks are not made here. +* +* @param[in] ps_left_mb_pu +* pointer to left mb motion vector info +* +* @param[in] ps_top_mb_pu +* pointer to top & top right mb motion vector info +* +* @param[in] ps_top_left_mb_pu +* pointer to top left mb motion vector info +* +* @param[out] ps_skip_mv +* pointer to skip motion vectors for the curr mb +* +* @param[in] i4_mb_x +* mb index x +* +* @param[in] i4_mb_y +* mb index y +* +* @param[in] i4_wd_mbs +* pic width in mbs +* +* @param[in] ps_motionEst +* pointer to me context +* +* @returns The list of MVs to be used of priming the full pel search and the +* number of such MVs +* +* @remarks +* Assumptions : 1. Assumes Only partition of size 16x16 +* +******************************************************************************* +*/ +static void isvce_get_search_candidates(isvce_process_ctxt_t *ps_proc, isvce_me_ctxt_t *ps_me_ctxt, + WORD32 i4_reflist) +{ + mv_t s_zero_mv; + mv_t *ps_left_mv, *ps_top_mv, *ps_top_left_mv, *ps_top_right_mv; + + UWORD32 i; + WORD32 i4_left_mode, i4_top_mode, i4_top_left_mode, i4_top_right_mode; + + isvce_codec_t *ps_codec = ps_proc->ps_codec; + block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl; + mb_part_ctxt *ps_mb_part = &ps_me_ctxt->as_mb_part[i4_reflist]; + ilp_me_cands_t *ps_ilp_me_cands = ps_me_ctxt->ps_ilp_me_cands; + + bool b_is_max_mv_diff_lt_4 = false; + WORD32 i4_mb_x = ps_proc->i4_mb_x; + WORD32 i4_cmpl_predmode = (i4_reflist == 0) ? L1 : L0; + UWORD32 u4_num_candidates = 0; + + s_zero_mv.i2_mvx = 0; + s_zero_mv.i2_mvy = 0; + ps_left_mv = &ps_proc->s_nbr_info.ps_left_mb_info->as_pu->as_me_info[i4_reflist].s_mv; + ps_top_mv = + &(ps_proc->s_nbr_info.ps_top_row_mb_info + i4_mb_x)->as_pu->as_me_info[i4_reflist].s_mv; + ps_top_left_mv = &ps_proc->s_nbr_info.ps_top_row_mb_info->as_pu->as_me_info[i4_reflist].s_mv; + ps_top_right_mv = + &(ps_proc->s_nbr_info.ps_top_row_mb_info + i4_mb_x + 1)->as_pu->as_me_info[i4_reflist].s_mv; + + i4_left_mode = + ps_ngbr_avbl->u1_mb_a + ? (ps_proc->s_nbr_info.ps_left_mb_info->as_pu->u1_pred_mode != i4_cmpl_predmode) + : 0; + i4_top_mode = ps_ngbr_avbl->u1_mb_b + ? ((ps_proc->s_nbr_info.ps_top_row_mb_info + i4_mb_x)->as_pu->u1_pred_mode != + i4_cmpl_predmode) + : 0; + i4_top_right_mode = + ps_ngbr_avbl->u1_mb_c + ? ((ps_proc->s_nbr_info.ps_top_row_mb_info + i4_mb_x + 1)->as_pu->u1_pred_mode != + i4_cmpl_predmode) + : 0; + i4_top_left_mode = + ps_ngbr_avbl->u1_mb_d + ? ((ps_proc->s_nbr_info.ps_top_row_mb_info + i4_mb_x - 1)->as_pu->u1_pred_mode != + i4_cmpl_predmode) + : 0; + + if(USE_ILP_MV_IN_ME && ps_ilp_me_cands) + { + if(ps_ilp_me_cands->u4_num_ilp_mvs >= 2) + { + b_is_max_mv_diff_lt_4 = isvce_check_max_mv_diff_lt_4(ps_ilp_me_cands, i4_reflist); + } + + /* Taking ILP MV Predictor as one of the candidates */ + if(ps_ilp_me_cands->u4_num_ilp_mvs < 2 || b_is_max_mv_diff_lt_4) + { + for(i = 0; i < ps_ilp_me_cands->u4_num_ilp_mvs_incl_nbrs; i++) + { + if(((ps_ilp_me_cands->ae_pred_mode[i] == ((PRED_MODE_T) i4_reflist)) || + ((ps_ilp_me_cands->ae_pred_mode[i] == BI)))) + { + isvce_add_me_init_search_cands(&ps_ilp_me_cands->as_mv[i][i4_reflist].s_mv, + ps_me_ctxt, i4_reflist, &u4_num_candidates, + b_is_max_mv_diff_lt_4); + } + } + } + } + + /* Taking the Top MV Predictor as one of the candidates */ + if(ps_ngbr_avbl->u1_mb_b && i4_top_mode) + { + isvce_add_me_init_search_cands(ps_top_mv, ps_me_ctxt, i4_reflist, &u4_num_candidates, + b_is_max_mv_diff_lt_4); + } + + /* Taking the Left MV Predictor as one of the candidates */ + if(ps_ngbr_avbl->u1_mb_a && i4_left_mode) + { + isvce_add_me_init_search_cands(ps_left_mv, ps_me_ctxt, i4_reflist, &u4_num_candidates, + b_is_max_mv_diff_lt_4); + } + + /********************************************************************/ + /* MV Prediction */ + /********************************************************************/ + isvce_mv_pred_me(ps_proc, i4_reflist); + + ps_mb_part->s_mv_pred.i2_mvx = ps_proc->ps_pred_mv[i4_reflist].s_mv.i2_mvx; + ps_mb_part->s_mv_pred.i2_mvy = ps_proc->ps_pred_mv[i4_reflist].s_mv.i2_mvy; + + /* Get the skip motion vector */ + { + ps_me_ctxt->i4_skip_type = + ps_codec->apf_find_skip_params_me[ps_proc->i4_slice_type](ps_proc, i4_reflist); + + /* Taking the Skip motion vector as one of the candidates */ + isvce_add_me_init_search_cands(&ps_proc->ps_skip_mv[i4_reflist].s_mv, ps_me_ctxt, + i4_reflist, &u4_num_candidates, b_is_max_mv_diff_lt_4); + + if(ps_proc->i4_slice_type == BSLICE) + { + /* Taking the temporal Skip motion vector as one of the candidates */ + isvce_add_me_init_search_cands(&ps_proc->ps_skip_mv[i4_reflist + 2].s_mv, ps_me_ctxt, + i4_reflist, &u4_num_candidates, b_is_max_mv_diff_lt_4); + } + } + + /* Taking ILP MV Predictor as one of the candidates */ + if(USE_ILP_MV_IN_ME && ps_ilp_me_cands && + (ps_ilp_me_cands->u4_num_ilp_mvs >= 2 && !b_is_max_mv_diff_lt_4)) + { + for(i = 0; i < ps_ilp_me_cands->u4_num_ilp_mvs_incl_nbrs; i++) + { + if(((ps_ilp_me_cands->ae_pred_mode[i] == ((PRED_MODE_T) i4_reflist)) || + ((ps_ilp_me_cands->ae_pred_mode[i] == BI)))) + { + isvce_add_me_init_search_cands(&ps_ilp_me_cands->as_mv[i][i4_reflist].s_mv, + ps_me_ctxt, i4_reflist, &u4_num_candidates, + b_is_max_mv_diff_lt_4); + } + } + } + + if(ps_ngbr_avbl->u1_mb_b && i4_top_mode) + { + /* Taking the TopRt MV Predictor as one of the candidates */ + if(ps_ngbr_avbl->u1_mb_c && i4_top_right_mode) + { + isvce_add_me_init_search_cands(ps_top_right_mv, ps_me_ctxt, i4_reflist, + &u4_num_candidates, b_is_max_mv_diff_lt_4); + } + + /* Taking the TopLt MV Predictor as one of the candidates */ + else if(ps_ngbr_avbl->u1_mb_d && i4_top_left_mode) + { + isvce_add_me_init_search_cands(ps_top_left_mv, ps_me_ctxt, i4_reflist, + &u4_num_candidates, b_is_max_mv_diff_lt_4); + } + } + + /* Taking the Zero motion vector as one of the candidates */ + isvce_add_me_init_search_cands(&s_zero_mv, ps_me_ctxt, i4_reflist, &u4_num_candidates, + b_is_max_mv_diff_lt_4); + + ASSERT(u4_num_candidates <= MAX_FPEL_SEARCH_CANDIDATES); + + ps_me_ctxt->u4_num_candidates[i4_reflist] = u4_num_candidates; +} + +/** +******************************************************************************* +* +* @brief The function computes parameters for a PSKIP MB +* +* @par Description: +* The function updates the skip motion vector and checks if the current +* MB can be a skip PSKIP mB or not +* +* @param[in] ps_proc +* Pointer to process context +* +* @param[in] u4_for_me +* Flag to indicate function is called for ME or not +* +* @param[out] i4_ref_list +* Current active refernce list +* +* @returns Flag indicating if the current MB can be marked as skip +* +* @remarks The code implements the logic as described in sec 8.4.1.2.2 in H264 +* specification. +* +******************************************************************************* +*/ +WORD32 isvce_find_pskip_params(isvce_process_ctxt_t *ps_proc, WORD32 i4_reflist) +{ + /* left mb motion vector */ + isvce_enc_pu_t *ps_left_mb_pu; + + /* top mb motion vector */ + isvce_enc_pu_t *ps_top_mb_pu; + + /* Skip mv */ + mv_t *ps_skip_mv = &ps_proc->ps_skip_mv[L0].s_mv; + + UNUSED(i4_reflist); + + ps_left_mb_pu = ps_proc->s_nbr_info.ps_left_mb_info->as_pu; + ps_top_mb_pu = (ps_proc->s_nbr_info.ps_top_row_mb_info + ps_proc->i4_mb_x)->as_pu; + + if((!ps_proc->ps_ngbr_avbl->u1_mb_a) || (!ps_proc->ps_ngbr_avbl->u1_mb_b) || + ((ps_left_mb_pu->as_me_info[L0].i1_ref_idx == 0) && + (ps_left_mb_pu->as_me_info[L0].s_mv.i2_mvx == 0) && + (ps_left_mb_pu->as_me_info[L0].s_mv.i2_mvy == 0)) || + ((ps_top_mb_pu->as_me_info[L0].i1_ref_idx == 0) && + (ps_top_mb_pu->as_me_info[L0].s_mv.i2_mvx == 0) && + (ps_top_mb_pu->as_me_info[L0].s_mv.i2_mvy == 0))) + + { + ps_skip_mv->i2_mvx = 0; + ps_skip_mv->i2_mvy = 0; + } + else + { + ps_skip_mv->i2_mvx = ps_proc->ps_pred_mv[L0].s_mv.i2_mvx; + ps_skip_mv->i2_mvy = ps_proc->ps_pred_mv[L0].s_mv.i2_mvy; + } + + if((ps_proc->ps_mb_info->as_pu->as_me_info[L0].s_mv.i2_mvx == ps_skip_mv->i2_mvx) && + (ps_proc->ps_mb_info->as_pu->as_me_info[L0].s_mv.i2_mvy == ps_skip_mv->i2_mvy)) + { + return 1; + } + + return 0; +} + +/** +******************************************************************************* +* +* @brief The function computes parameters for a PSKIP MB +* +* @par Description: +* The function updates the skip motion vector and checks if the current +* MB can be a skip PSKIP mB or not +* +* @param[in] ps_proc +* Pointer to process context +* +* @param[in] u4_for_me +* Flag to dincate fucntion is called for ME or not +* +* @param[out] i4_ref_list +* Current active refernce list +* +* @returns Flag indicating if the current MB can be marked as skip +* +* @remarks The code implements the logic as described in sec 8.4.1.2.2 in H264 +* specification. +* +******************************************************************************* +*/ +WORD32 isvce_find_pskip_params_me(isvce_process_ctxt_t *ps_proc, WORD32 i4_reflist) +{ + /* left mb motion vector */ + isvce_enc_pu_t *ps_left_mb_pu; + + /* top mb motion vector */ + isvce_enc_pu_t *ps_top_mb_pu; + + /* Skip mv */ + mv_t *ps_skip_mv = &ps_proc->ps_skip_mv[L0].s_mv; + + UNUSED(i4_reflist); + + ps_left_mb_pu = ps_proc->s_nbr_info.ps_left_mb_info->as_pu; + ps_top_mb_pu = (ps_proc->s_nbr_info.ps_top_row_mb_info + ps_proc->i4_mb_x)->as_pu; + + if((!ps_proc->ps_ngbr_avbl->u1_mb_a) || (!ps_proc->ps_ngbr_avbl->u1_mb_b) || + ((ps_left_mb_pu->as_me_info[L0].i1_ref_idx == 0) && + (ps_left_mb_pu->as_me_info[L0].s_mv.i2_mvx == 0) && + (ps_left_mb_pu->as_me_info[L0].s_mv.i2_mvy == 0)) || + ((ps_top_mb_pu->as_me_info[L0].i1_ref_idx == 0) && + (ps_top_mb_pu->as_me_info[L0].s_mv.i2_mvx == 0) && + (ps_top_mb_pu->as_me_info[L0].s_mv.i2_mvy == 0))) + + { + ps_skip_mv->i2_mvx = 0; + ps_skip_mv->i2_mvy = 0; + } + else + { + ps_skip_mv->i2_mvx = ps_proc->ps_pred_mv[L0].s_mv.i2_mvx; + ps_skip_mv->i2_mvy = ps_proc->ps_pred_mv[L0].s_mv.i2_mvy; + } + + return L0; +} + +/** +******************************************************************************* +* +* @brief motion vector predictor +* +* @par Description: +* The routine calculates the motion vector predictor for a given block, +* given the candidate MV predictors. +* +* @param[in] ps_left_mb_pu +* pointer to left mb motion vector info +* +* @param[in] ps_top_row_pu +* pointer to top & top right mb motion vector info +* +* @param[out] ps_pred_mv +* pointer to candidate predictors for the current block +* +* @returns The x & y components of the MV predictor. +* +* @remarks The code implements the logic as described in sec 8.4.1.3 in H264 +* specification. +* Assumptions : 1. Assumes Single reference frame +* 2. Assumes Only partition of size 16x16 +* +******************************************************************************* +*/ +void isvce_get_mv_predictor(isvce_enc_pu_mv_t *ps_pred_mv, isvce_enc_pu_mv_t *ps_neig_mv, + WORD32 pred_algo) +{ + switch(pred_algo) + { + case 0: + /* left */ + ps_pred_mv->s_mv.i2_mvx = ps_neig_mv[0].s_mv.i2_mvx; + ps_pred_mv->s_mv.i2_mvy = ps_neig_mv[0].s_mv.i2_mvy; + break; + case 1: + /* top */ + ps_pred_mv->s_mv.i2_mvx = ps_neig_mv[1].s_mv.i2_mvx; + ps_pred_mv->s_mv.i2_mvy = ps_neig_mv[1].s_mv.i2_mvy; + break; + case 2: + /* top right */ + ps_pred_mv->s_mv.i2_mvx = ps_neig_mv[2].s_mv.i2_mvx; + ps_pred_mv->s_mv.i2_mvy = ps_neig_mv[2].s_mv.i2_mvy; + break; + case 3: + /* median */ + MEDIAN(ps_neig_mv[0].s_mv.i2_mvx, ps_neig_mv[1].s_mv.i2_mvx, ps_neig_mv[2].s_mv.i2_mvx, + ps_pred_mv->s_mv.i2_mvx); + MEDIAN(ps_neig_mv[0].s_mv.i2_mvy, ps_neig_mv[1].s_mv.i2_mvy, ps_neig_mv[2].s_mv.i2_mvy, + ps_pred_mv->s_mv.i2_mvy); + + break; + default: + break; + } +} + +/** +******************************************************************************* +* +* @brief This function performs MV prediction +* +* @par Description: +* +* @param[in] ps_proc +* Process context corresponding to the job +* +* @returns none +* +* @remarks none +* This function will update the MB availability since intra inter decision +* should be done before the call +* +******************************************************************************* +*/ +void isvce_mv_pred(isvce_process_ctxt_t *ps_proc, WORD32 i4_slice_type) +{ + isvce_enc_pu_mv_t as_pu_mv[3]; + + UWORD8 u1_reflist, u1_cmpl_predmode; + WORD32 i; + + isvce_enc_pu_mv_t *ps_pred_mv = ps_proc->ps_pred_mv; + isvce_enc_pu_mv_t s_default_mv_info = {{0, 0}, -1}; + block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl; + isvce_mb_info_t *ps_top_mb = ps_proc->s_nbr_info.ps_top_row_mb_info + ps_proc->i4_mb_x; + isvce_mb_info_t *ps_top_left_mb = ps_top_mb - 1; + isvce_mb_info_t *ps_top_right_mb = ps_top_mb + 1; + isvce_mb_info_t *ps_left_mb = ps_proc->s_nbr_info.ps_left_mb_info; + + UWORD8 u1_left_is_intra = ps_left_mb->u1_is_intra; + UWORD8 u1_num_ref_lists = (i4_slice_type == PSLICE) ? 1 : 2; + + for(u1_reflist = 0; u1_reflist < u1_num_ref_lists; u1_reflist++) + { + WORD8 i1_cur_ref_idx = 0; + + WORD32 pred_algo = 3, a, b, c; + + for(i = 0; i < 3; i++) + { + as_pu_mv[i] = s_default_mv_info; + } + + u1_cmpl_predmode = (u1_reflist == 0) ? L1 : L0; + + /* Before performing mv prediction prepare the ngbr information and + * reset motion vectors basing on their availability */ + if(ps_ngbr_avbl->u1_mb_a && (u1_left_is_intra != 1) && + (ps_left_mb->as_pu->u1_pred_mode != u1_cmpl_predmode)) + { + /* left mv */ + as_pu_mv[0].s_mv = ps_left_mb->as_pu->as_me_info[u1_reflist].s_mv; + as_pu_mv[0].i1_ref_idx = ps_left_mb->as_pu->as_me_info[u1_reflist].i1_ref_idx; + + /* Only left available */ + if(!ps_ngbr_avbl->u1_mb_b && !ps_ngbr_avbl->u1_mb_c && !ps_ngbr_avbl->u1_mb_d) + { + as_pu_mv[1].s_mv = ps_left_mb->as_pu->as_me_info[u1_reflist].s_mv; + as_pu_mv[1].i1_ref_idx = ps_left_mb->as_pu->as_me_info[u1_reflist].i1_ref_idx; + + as_pu_mv[2].s_mv = ps_left_mb->as_pu->as_me_info[u1_reflist].s_mv; + as_pu_mv[2].i1_ref_idx = ps_left_mb->as_pu->as_me_info[u1_reflist].i1_ref_idx; + } + } + if(ps_ngbr_avbl->u1_mb_b && !ps_top_mb->u1_is_intra && + (ps_top_mb->as_pu[0].u1_pred_mode != u1_cmpl_predmode)) + { + /* top mv */ + as_pu_mv[1].s_mv = ps_top_mb->as_pu[0].as_me_info[u1_reflist].s_mv; + as_pu_mv[1].i1_ref_idx = ps_top_mb->as_pu[0].as_me_info[u1_reflist].i1_ref_idx; + } + + if(!ps_ngbr_avbl->u1_mb_c) + { + /* top right mv - When top right partition is not available for + * prediction if top left is available use it for prediction else + * set the mv information to -1 and (0, 0) + * */ + if(ps_ngbr_avbl->u1_mb_d && !ps_top_left_mb->u1_is_intra && + (ps_top_left_mb->as_pu->u1_pred_mode != u1_cmpl_predmode)) + { + as_pu_mv[2].s_mv = ps_top_left_mb->as_pu[0].as_me_info[u1_reflist].s_mv; + as_pu_mv[2].i1_ref_idx = ps_top_left_mb->as_pu[0].as_me_info[u1_reflist].i1_ref_idx; + } + } + else if(ps_top_right_mb->as_pu->u1_pred_mode != u1_cmpl_predmode && + !ps_top_right_mb->u1_is_intra) + { + as_pu_mv[2].s_mv = ps_top_right_mb->as_pu->as_me_info[u1_reflist].s_mv; + as_pu_mv[2].i1_ref_idx = ps_top_right_mb->as_pu->as_me_info[u1_reflist].i1_ref_idx; + } + + /* If only one of the candidate blocks has a reference frame equal to + * the current block then use the same block as the final predictor */ + a = (as_pu_mv[0].i1_ref_idx == i1_cur_ref_idx) ? 0 : -1; + b = (as_pu_mv[1].i1_ref_idx == i1_cur_ref_idx) ? 0 : -1; + c = (as_pu_mv[2].i1_ref_idx == i1_cur_ref_idx) ? 0 : -1; + if(a == 0 && b == -1 && c == -1) + pred_algo = 0; /* LEFT */ + else if(a == -1 && b == 0 && c == -1) + pred_algo = 1; /* TOP */ + else if(a == -1 && b == -1 && c == 0) + pred_algo = 2; + + isvce_get_mv_predictor(&ps_pred_mv[u1_reflist], &as_pu_mv[0], pred_algo); + + ps_pred_mv[u1_reflist].i1_ref_idx = i1_cur_ref_idx; + } +} + +/** +******************************************************************************* +* +* @brief This function approximates Pred. MV +* +* @par Description: +* +* @param[in] ps_proc +* Process context corresponding to the job +* +* @returns none +* +* @remarks none +* Motion estimation happens at nmb level. For cost calculations, mv is appro +* ximated using this function +* +******************************************************************************* +*/ +void isvce_mv_pred_me(isvce_process_ctxt_t *ps_proc, WORD32 i4_ref_list) +{ + isvce_enc_pu_mv_t as_pu_mv[3]; + + WORD32 i, a, b, c; + + isvce_enc_pu_mv_t *ps_pred_mv = ps_proc->ps_pred_mv; + isvce_enc_pu_mv_t s_default_mv_info = {{0, 0}, -1}; + block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl; + isvce_mb_info_t *ps_top_mb = ps_proc->s_nbr_info.ps_top_row_mb_info + ps_proc->i4_mb_x; + isvce_mb_info_t *ps_top_left_mb = ps_top_mb - 1; + isvce_mb_info_t *ps_top_right_mb = ps_top_mb + 1; + isvce_mb_info_t *ps_left_mb = ps_proc->s_nbr_info.ps_left_mb_info; + + WORD8 i1_cur_ref_idx = 0; + WORD32 i4_cmpl_predmode = (i4_ref_list == 0) ? L1 : L0; + WORD32 pred_algo = 3; + + for(i = 0; i < 3; i++) + { + as_pu_mv[i] = s_default_mv_info; + } + + if(ps_ngbr_avbl->u1_mb_a && !ps_left_mb->u1_is_intra && + (ps_left_mb->as_pu->u1_pred_mode != i4_cmpl_predmode)) + { + /* left mv */ + as_pu_mv[0].s_mv = ps_left_mb->as_pu->as_me_info[i4_ref_list].s_mv; + as_pu_mv[0].i1_ref_idx = ps_left_mb->as_pu->as_me_info[i4_ref_list].i1_ref_idx; + + /* Only left available */ + if(!ps_ngbr_avbl->u1_mb_b && !ps_ngbr_avbl->u1_mb_c && !ps_ngbr_avbl->u1_mb_d) + { + as_pu_mv[1].s_mv = ps_left_mb->as_pu->as_me_info[i4_ref_list].s_mv; + as_pu_mv[1].i1_ref_idx = ps_left_mb->as_pu->as_me_info[i4_ref_list].i1_ref_idx; + + as_pu_mv[2].s_mv = ps_left_mb->as_pu->as_me_info[i4_ref_list].s_mv; + as_pu_mv[2].i1_ref_idx = ps_left_mb->as_pu->as_me_info[i4_ref_list].i1_ref_idx; + } + } + if(ps_ngbr_avbl->u1_mb_b && !ps_top_mb->u1_is_intra && + (ps_top_mb->as_pu->u1_pred_mode != i4_cmpl_predmode)) + { + /* top mv */ + as_pu_mv[1].s_mv = ps_top_mb->as_pu->as_me_info[i4_ref_list].s_mv; + as_pu_mv[1].i1_ref_idx = ps_top_mb->as_pu->as_me_info[i4_ref_list].i1_ref_idx; + } + if(!ps_ngbr_avbl->u1_mb_c) + { + /* top right mv - When top right partition is not available for + * prediction if top left is available use it for prediction else + * set the mv information to -1 and (0, 0) + * */ + if(ps_ngbr_avbl->u1_mb_d && !ps_top_left_mb->u1_is_intra && + (ps_top_left_mb->as_pu->u1_pred_mode != i4_cmpl_predmode)) + { + as_pu_mv[2].s_mv = ps_top_left_mb->as_pu->as_me_info[i4_ref_list].s_mv; + as_pu_mv[2].i1_ref_idx = ps_top_left_mb->as_pu->as_me_info[i4_ref_list].i1_ref_idx; + } + } + else if(ps_top_right_mb->as_pu->u1_pred_mode != i4_cmpl_predmode && + !ps_top_right_mb->u1_is_intra) + { + as_pu_mv[2].s_mv = ps_top_right_mb->as_pu->as_me_info[i4_ref_list].s_mv; + as_pu_mv[2].i1_ref_idx = ps_top_right_mb->as_pu->as_me_info[i4_ref_list].i1_ref_idx; + } + + /* If only one of the candidate blocks has a reference frame equal to + * the current block then use the same block as the final predictor */ + a = (as_pu_mv[0].i1_ref_idx == i1_cur_ref_idx) ? 0 : -1; + b = (as_pu_mv[1].i1_ref_idx == i1_cur_ref_idx) ? 0 : -1; + c = (as_pu_mv[2].i1_ref_idx == i1_cur_ref_idx) ? 0 : -1; + + if(a == 0 && b == -1 && c == -1) + pred_algo = 0; /* LEFT */ + else if(a == -1 && b == 0 && c == -1) + pred_algo = 1; /* TOP */ + else if(a == -1 && b == -1 && c == 0) + pred_algo = 2; + + isvce_get_mv_predictor(&ps_pred_mv[i4_ref_list], &as_pu_mv[0], pred_algo); +} + +/** +******************************************************************************* +* +* @brief This function initializes me ctxt +* +* @par Description: +* Before dispatching the current job to me thread, the me context associated +* with the job is initialized. +* +* @param[in] ps_proc +* Process context corresponding to the job +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +void isvce_init_me(isvce_process_ctxt_t *ps_proc) +{ + isvce_me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt; + isvce_codec_t *ps_codec = ps_proc->ps_codec; + + ps_me_ctxt->i4_skip_bias[BSLICE] = SKIP_BIAS_B; + + if(ps_codec->s_cfg.u4_num_bframes == 0) + { + ps_me_ctxt->i4_skip_bias[PSLICE] = 4 * SKIP_BIAS_P; + } + else + { + ps_me_ctxt->i4_skip_bias[PSLICE] = SKIP_BIAS_P; + } + + ps_me_ctxt->pu1_src_buf_luma = ps_proc->s_src_buf_props.as_component_bufs[0].pv_data; + ps_me_ctxt->i4_src_strd = ps_proc->s_src_buf_props.as_component_bufs[0].i4_data_stride; + + ps_me_ctxt->apu1_ref_buf_luma[0] = ps_proc->as_ref_buf_props[0].as_component_bufs[0].pv_data; + ps_me_ctxt->apu1_ref_buf_luma[1] = ps_proc->as_ref_buf_props[1].as_component_bufs[0].pv_data; + + ps_me_ctxt->ai4_rec_strd[0] = ps_proc->as_ref_buf_props[0].as_component_bufs[0].i4_data_stride; + ps_me_ctxt->ai4_rec_strd[1] = ps_proc->as_ref_buf_props[1].as_component_bufs[0].i4_data_stride; + + ps_me_ctxt->u4_lambda_motion = gu1_qp0[ps_me_ctxt->u1_mb_qp]; +} + +/** +******************************************************************************* +* +* @brief This function performs motion estimation for the current mb using +* single reference list +* +* @par Description: +* The current mb is compared with a list of mb's in the reference frame for +* least cost. The mb that offers least cost is chosen as predicted mb and the +* displacement of the predicted mb from index location of the current mb is +* signaled as mv. The list of the mb's that are chosen in the reference frame +* are dependent on the speed of the ME configured. +* +* @param[in] ps_proc +* Process context corresponding to the job +* +* @returns motion vector of the pred mb, sad, cost. +* +* @remarks none +* +******************************************************************************* +*/ +void isvce_compute_me_single_reflist(isvce_process_ctxt_t *ps_proc) +{ + mb_part_ctxt s_skip_mbpart; + + /* source buffer for halp pel generation functions */ + UWORD8 *pu1_hpel_src; + + isvce_me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt; + isvce_codec_t *ps_codec = ps_proc->ps_codec; + quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0]; + isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns; + inter_pred_fxns_t *ps_inter_pred_fxns = &ps_isa_dependent_fxns->s_inter_pred_fxns; + + ps_me_ctxt->pu2_sad_thrsh = ps_qp_params->pu2_sad_thrsh; + + ASSERT(1 == MAX_REF_FRAMES_PER_PRED_DIR); + + { + WORD32 rows_above, rows_below, columns_left, columns_right; + + /* During evaluation for motion vectors do not search through padded regions + */ + /* Obtain number of rows and columns that are effective for computing for me + * evaluation */ + rows_above = MB_SIZE + ps_proc->i4_mb_y * MB_SIZE; + rows_below = (ps_proc->i4_ht_mbs - ps_proc->i4_mb_y) * MB_SIZE; + columns_left = MB_SIZE + ps_proc->i4_mb_x * MB_SIZE; + columns_right = (ps_proc->i4_wd_mbs - ps_proc->i4_mb_x) * MB_SIZE; + + /* init srch range */ + /* NOTE : For now, lets limit the search range by DEFAULT_MAX_SRCH_RANGE_X / + * 2 on all sides. + */ + ps_me_ctxt->i4_srch_range_w = -MIN(columns_left, DEFAULT_MAX_SRCH_RANGE_X >> 1); + ps_me_ctxt->i4_srch_range_e = MIN(columns_right, DEFAULT_MAX_SRCH_RANGE_X >> 1); + ps_me_ctxt->i4_srch_range_n = -MIN(rows_above, DEFAULT_MAX_SRCH_RANGE_Y >> 1); + ps_me_ctxt->i4_srch_range_s = MIN(rows_below, DEFAULT_MAX_SRCH_RANGE_Y >> 1); + + /* this is to facilitate fast sub pel computation with minimal loads */ + ps_me_ctxt->i4_srch_range_w += 1; + ps_me_ctxt->i4_srch_range_e -= 1; + ps_me_ctxt->i4_srch_range_n += 1; + ps_me_ctxt->i4_srch_range_s -= 1; + } + + /*********************************************************************** + * Compute ME for list L0 + ***********************************************************************/ + + /* Init SATQD for the current list */ + ps_me_ctxt->u4_min_sad_reached = 0; + ps_me_ctxt->i4_min_sad = ps_proc->ps_cur_mb->u4_min_sad; + + /* Get the seed motion vector candidates */ + isvce_get_search_candidates(ps_proc, ps_me_ctxt, L0); + + /* **************************************************************** + *Evaluate the SKIP for current list + * ****************************************************************/ + s_skip_mbpart.s_mv_curr.i2_mvx = 0; + s_skip_mbpart.s_mv_curr.i2_mvy = 0; + s_skip_mbpart.i4_mb_cost = INT_MAX; + s_skip_mbpart.i4_mb_distortion = INT_MAX; + + isvce_compute_skip_cost(ps_me_ctxt, (ime_mv_t *) (&ps_proc->ps_skip_mv[L0].s_mv), + &s_skip_mbpart, ps_codec->s_cfg.u4_enable_satqd, PRED_L0, + 0 /* Not a Bslice */); + + s_skip_mbpart.s_mv_curr.i2_mvx <<= 2; + s_skip_mbpart.s_mv_curr.i2_mvy <<= 2; + + /****************************************************************** + * Evaluate ME For current list + *****************************************************************/ + ps_me_ctxt->as_mb_part[L0].s_mv_curr.i2_mvx = 0; + ps_me_ctxt->as_mb_part[L0].s_mv_curr.i2_mvy = 0; + ps_me_ctxt->as_mb_part[L0].i4_mb_cost = INT_MAX; + ps_me_ctxt->as_mb_part[L0].i4_mb_distortion = INT_MAX; + + /* Init Hpel */ + ps_me_ctxt->as_mb_part[L0].pu1_best_hpel_buf = NULL; + + /* In case we found out the minimum SAD, exit the ME eval */ + if(!ps_me_ctxt->u4_min_sad_reached) + { + /* Evaluate search candidates for initial mv pt */ + isvce_evaluate_init_srchposn_16x16(ps_me_ctxt, L0); + + /********************************************************************/ + /* full pel motion estimation */ + /********************************************************************/ + isvce_full_pel_motion_estimation_16x16(ps_me_ctxt, L0); + + /* Scale the MV to qpel resolution */ + ps_me_ctxt->as_mb_part[L0].s_mv_curr.i2_mvx <<= 2; + ps_me_ctxt->as_mb_part[L0].s_mv_curr.i2_mvy <<= 2; + + if(ps_me_ctxt->u4_enable_hpel) + { + /* moving src pointer to the converged motion vector location*/ + pu1_hpel_src = + ps_me_ctxt->apu1_ref_buf_luma[L0] + + (ps_me_ctxt->as_mb_part[L0].s_mv_curr.i2_mvx >> 2) + + (ps_me_ctxt->as_mb_part[L0].s_mv_curr.i2_mvy >> 2) * ps_me_ctxt->ai4_rec_strd[L0]; + + ps_me_ctxt->apu1_subpel_buffs[0] = ps_proc->apu1_subpel_buffs[0]; + ps_me_ctxt->apu1_subpel_buffs[1] = ps_proc->apu1_subpel_buffs[1]; + ps_me_ctxt->apu1_subpel_buffs[2] = ps_proc->apu1_subpel_buffs[2]; + + ps_me_ctxt->u4_subpel_buf_strd = HP_BUFF_WD; + + /* half pel search is done for both sides of full pel, + * hence half_x of width x height = 17x16 is created + * starting from left half_x of converged full pel */ + pu1_hpel_src -= 1; + + /* computing half_x */ + ps_codec->pf_ih264e_sixtapfilter_horz(pu1_hpel_src, ps_me_ctxt->apu1_subpel_buffs[0], + ps_me_ctxt->ai4_rec_strd[L0], + ps_me_ctxt->u4_subpel_buf_strd); + + /* + * Halfpel search is done for both sides of full pel, + * hence half_y of width x height = 16x17 is created + * starting from top half_y of converged full pel + * for half_xy top_left is required + * hence it starts from pu1_hpel_src = full_pel_converged_point - + * i4_rec_strd - 1 + */ + pu1_hpel_src -= ps_me_ctxt->ai4_rec_strd[L0]; + + /* computing half_y , and half_xy*/ + ps_codec->pf_ih264e_sixtap_filter_2dvh_vert( + pu1_hpel_src, ps_me_ctxt->apu1_subpel_buffs[1], ps_me_ctxt->apu1_subpel_buffs[2], + ps_me_ctxt->ai4_rec_strd[L0], ps_me_ctxt->u4_subpel_buf_strd, + ps_proc->ai16_pred1 + 3, ps_me_ctxt->u4_subpel_buf_strd); + + isvce_sub_pel_motion_estimation_16x16(ps_me_ctxt, L0); + } + } + + /*********************************************************************** + * If a particular skiip Mv is giving better sad, copy to the corresponding + * MBPART + * In B slices this loop should go only to PREDL1: If we found min sad + * we will go to the skip ref list only + * Have to find a way to make it without too much change or new vars + **********************************************************************/ + if(s_skip_mbpart.i4_mb_cost < ps_me_ctxt->as_mb_part[L0].i4_mb_cost) + { + ps_me_ctxt->as_mb_part[L0].i4_mb_cost = s_skip_mbpart.i4_mb_cost; + ps_me_ctxt->as_mb_part[L0].i4_mb_distortion = s_skip_mbpart.i4_mb_distortion; + ps_me_ctxt->as_mb_part[L0].s_mv_curr = s_skip_mbpart.s_mv_curr; + } + else if(ps_me_ctxt->as_mb_part[L0].pu1_best_hpel_buf) + { + /* Now we have to copy the buffers */ + ps_inter_pred_fxns->pf_inter_pred_luma_copy( + ps_me_ctxt->as_mb_part[L0].pu1_best_hpel_buf, ps_proc->pu1_best_subpel_buf, + ps_me_ctxt->u4_subpel_buf_strd, ps_proc->u4_bst_spel_buf_strd, MB_SIZE, MB_SIZE, NULL, + 0); + } + + /********************************************************************** + * Now get the minimum of MB part sads by searching over all ref lists + **********************************************************************/ + ps_proc->ps_mb_info->as_pu->as_me_info[L0].s_mv.i2_mvx = + ps_me_ctxt->as_mb_part[L0].s_mv_curr.i2_mvx; + ps_proc->ps_mb_info->as_pu->as_me_info[L0].s_mv.i2_mvy = + ps_me_ctxt->as_mb_part[L0].s_mv_curr.i2_mvy; + ps_proc->ps_cur_mb->i4_mb_cost = ps_me_ctxt->as_mb_part[L0].i4_mb_cost; + ps_proc->ps_cur_mb->i4_mb_distortion = ps_me_ctxt->as_mb_part[L0].i4_mb_distortion; + ps_proc->ps_cur_mb->u4_mb_type = P16x16; + ps_proc->ps_mb_info->as_pu->u1_pred_mode = L0; + + /* Mark the reflists */ + ps_proc->ps_mb_info->as_pu->as_me_info[0].i1_ref_idx = 0; + ps_proc->ps_mb_info->as_pu->as_me_info[1].i1_ref_idx = -1; + + /* number of partitions */ + ps_proc->u4_num_sub_partitions = 1; + *(ps_proc->pu4_mb_pu_cnt) = 1; + + /* position in-terms of PU */ + ps_proc->ps_mb_info->as_pu->u1_pos_x_in_4x4 = 0; + ps_proc->ps_mb_info->as_pu->u1_pos_y_in_4x4 = 0; + + /* PU size */ + ps_proc->ps_mb_info->as_pu->u1_wd_in_4x4_m1 = 3; + ps_proc->ps_mb_info->as_pu->u1_ht_in_4x4_m1 = 3; + + /* Update min sad conditions */ + if(ps_me_ctxt->u4_min_sad_reached == 1) + { + ps_proc->ps_cur_mb->u4_min_sad_reached = 1; + ps_proc->ps_cur_mb->u4_min_sad = ps_me_ctxt->i4_min_sad; + } +} + +/** +******************************************************************************* +* +* @brief This function performs motion estimation for the current NMB +* +* @par Description: +* Intializes input and output pointers required by the function +*isvce_compute_me and calls the function isvce_compute_me in a loop to process +*NMBs. +* +* @param[in] ps_proc +* Process context corresponding to the job +* +* @returns +* +* @remarks none +* +******************************************************************************* +*/ +void isvce_compute_me_nmb(isvce_process_ctxt_t *ps_proc, UWORD32 u4_nmb_count) +{ + UWORD32 u4_i; + + isvce_codec_t *ps_codec = ps_proc->ps_codec; + isvce_mb_info_t *ps_mb_begin = ps_proc->ps_mb_info; + + UWORD32 *pu4_mb_pu_cnt_begin = ps_proc->pu4_mb_pu_cnt; + UWORD8 *pu1_me_map = ps_proc->pu1_me_map + (ps_proc->i4_mb_y * ps_proc->i4_wd_mbs); + + /* Spatial dependencies for skip are not met if nmb > 1 */ + ASSERT(1 == u4_nmb_count); + + if(ps_proc->i4_mb_x) + { + ps_proc->s_me_ctxt.u4_left_is_intra = ps_proc->s_nbr_info.ps_left_mb_info->u1_is_intra; + ps_proc->s_me_ctxt.u4_left_is_skip = + (ps_proc->s_nbr_info.ps_left_mb_info->u2_mb_type == PSKIP); + } + + for(u4_i = 0; u4_i < u4_nmb_count; u4_i++) + { + /* Wait for ME map */ + if(ps_proc->i4_mb_y > 0) + { + /* Wait for top right ME to be done */ + UWORD8 *pu1_me_map_tp_rw = + ps_proc->pu1_me_map + (ps_proc->i4_mb_y - 1) * ps_proc->i4_wd_mbs; + + while(1) + { + volatile UWORD8 *pu1_buf; + WORD32 idx = ps_proc->i4_mb_x + u4_i + 1; + + idx = MIN(idx, (ps_proc->i4_wd_mbs - 1)); + pu1_buf = pu1_me_map_tp_rw + idx; + if(*pu1_buf) break; + ithread_yield(); + } + } + + ps_proc->ps_skip_mv = &(ps_proc->ps_nmb_info[u4_i].as_skip_mv[0]); + ps_proc->ps_ngbr_avbl = &(ps_proc->ps_nmb_info[u4_i].s_ngbr_avbl); + ps_proc->ps_pred_mv = &(ps_proc->ps_nmb_info[u4_i].as_pred_mv[0]); + ps_proc->ps_cur_mb = &(ps_proc->ps_nmb_info[u4_i]); + + ps_proc->ps_cur_mb->u4_min_sad = ps_proc->u4_min_sad; + ps_proc->ps_cur_mb->u4_min_sad_reached = 0; + + ps_proc->ps_cur_mb->i4_mb_cost = INT_MAX; + ps_proc->ps_cur_mb->i4_mb_distortion = SHRT_MAX; + + /* Set the best subpel buf to the correct mb so that the buffer can be + * copied */ + ps_proc->pu1_best_subpel_buf = ps_proc->ps_nmb_info[u4_i].pu1_best_sub_pel_buf; + ps_proc->u4_bst_spel_buf_strd = ps_proc->ps_nmb_info[u4_i].u4_bst_spel_buf_strd; + + /* Set the min sad conditions */ + ps_proc->ps_cur_mb->u4_min_sad = ps_codec->u4_min_sad; + ps_proc->ps_cur_mb->u4_min_sad_reached = 0; + + isvce_derive_nghbr_avbl_of_mbs(ps_proc); + + isvce_init_me(ps_proc); + + /* Compute ME according to slice type */ + ps_codec->apf_compute_me[ps_proc->i4_slice_type](ps_proc); + + /* update top and left structs */ + if(u4_nmb_count > 1) + { + isvce_mb_info_t *ps_left_syn = ps_proc->s_nbr_info.ps_left_mb_info; + + ps_left_syn[0] = ps_proc->ps_mb_info[0]; + ps_left_syn[0].u1_is_intra = 0; + ps_left_syn[0].u2_mb_type = ps_proc->ps_cur_mb->u4_mb_type; + } + + /* Copy the min sad reached info */ + ps_proc->ps_nmb_info[u4_i].u4_min_sad_reached = ps_proc->ps_cur_mb->u4_min_sad_reached; + ps_proc->ps_nmb_info[u4_i].u4_min_sad = ps_proc->ps_cur_mb->u4_min_sad; + + /* + * To make sure that the MV map is properly sync to the + * cache we need to do a DDB + */ + { + DATA_SYNC(); + + pu1_me_map[ps_proc->i4_mb_x] = 1; + } + ps_proc->i4_mb_x++; + + ps_proc->s_me_ctxt.u4_left_is_intra = 0; + ps_proc->s_me_ctxt.u4_left_is_skip = (ps_proc->ps_cur_mb->u4_mb_type == PSKIP); + + /* update buffers pointers */ + ps_proc->s_src_buf_props.as_component_bufs[0].pv_data = + ((UWORD8 *) ps_proc->s_src_buf_props.as_component_bufs[0].pv_data) + MB_SIZE; + ps_proc->s_rec_buf_props.as_component_bufs[0].pv_data = + ((UWORD8 *) ps_proc->s_rec_buf_props.as_component_bufs[0].pv_data) + MB_SIZE; + ps_proc->as_ref_buf_props[0].as_component_bufs[0].pv_data = + ((UWORD8 *) ps_proc->as_ref_buf_props[0].as_component_bufs[0].pv_data) + MB_SIZE; + ps_proc->as_ref_buf_props[1].as_component_bufs[0].pv_data = + ((UWORD8 *) ps_proc->as_ref_buf_props[1].as_component_bufs[0].pv_data) + MB_SIZE; + + /* + * Note: Although chroma mb size is 8, as the chroma buffers are + * interleaved, the stride per MB is MB_SIZE + */ + ps_proc->s_src_buf_props.as_component_bufs[1].pv_data = + ((UWORD8 *) ps_proc->s_src_buf_props.as_component_bufs[1].pv_data) + MB_SIZE; + ps_proc->s_rec_buf_props.as_component_bufs[1].pv_data = + ((UWORD8 *) ps_proc->s_rec_buf_props.as_component_bufs[1].pv_data) + MB_SIZE; + ps_proc->as_ref_buf_props[0].as_component_bufs[1].pv_data = + ((UWORD8 *) ps_proc->as_ref_buf_props[0].as_component_bufs[1].pv_data) + MB_SIZE; + ps_proc->as_ref_buf_props[1].as_component_bufs[1].pv_data = + ((UWORD8 *) ps_proc->as_ref_buf_props[1].as_component_bufs[1].pv_data) + MB_SIZE; + + ps_proc->pu4_mb_pu_cnt++; + ps_proc->ps_mb_info++; + } + + ps_proc->ps_mb_info = ps_mb_begin; + ps_proc->pu4_mb_pu_cnt = pu4_mb_pu_cnt_begin; + ps_proc->i4_mb_x = ps_proc->i4_mb_x - u4_nmb_count; + + /* update buffers pointers */ + ps_proc->s_src_buf_props.as_component_bufs[0].pv_data = + ((UWORD8 *) ps_proc->s_src_buf_props.as_component_bufs[0].pv_data) - MB_SIZE * u4_nmb_count; + ps_proc->s_rec_buf_props.as_component_bufs[0].pv_data = + ((UWORD8 *) ps_proc->s_rec_buf_props.as_component_bufs[0].pv_data) - MB_SIZE * u4_nmb_count; + ps_proc->as_ref_buf_props[0].as_component_bufs[0].pv_data = + ((UWORD8 *) ps_proc->as_ref_buf_props[0].as_component_bufs[0].pv_data) - + MB_SIZE * u4_nmb_count; + ps_proc->as_ref_buf_props[1].as_component_bufs[0].pv_data = + ((UWORD8 *) ps_proc->as_ref_buf_props[1].as_component_bufs[0].pv_data) - + MB_SIZE * u4_nmb_count; + + /* + * Note: Although chroma mb size is 8, as the chroma buffers are + * interleaved, the stride per MB is MB_SIZE + */ + ps_proc->s_src_buf_props.as_component_bufs[1].pv_data = + ((UWORD8 *) ps_proc->s_src_buf_props.as_component_bufs[1].pv_data) - MB_SIZE * u4_nmb_count; + ps_proc->s_rec_buf_props.as_component_bufs[1].pv_data = + ((UWORD8 *) ps_proc->s_rec_buf_props.as_component_bufs[1].pv_data) - MB_SIZE * u4_nmb_count; + ps_proc->as_ref_buf_props[0].as_component_bufs[1].pv_data = + ((UWORD8 *) ps_proc->as_ref_buf_props[0].as_component_bufs[1].pv_data) - + MB_SIZE * u4_nmb_count; + ps_proc->as_ref_buf_props[1].as_component_bufs[1].pv_data = + ((UWORD8 *) ps_proc->as_ref_buf_props[1].as_component_bufs[1].pv_data) - + MB_SIZE * u4_nmb_count; +} + +/** +******************************************************************************* +* +* @brief The function computes parameters for a BSKIP MB +* +* @par Description: +* The function updates the skip motion vector for B Mb, check if the Mb can be +* marked as skip and returns it +* +* @param[in] ps_proc +* Pointer to process context +* +* @param[in] u4_for_me +* Dummy +* +* @param[in] i4_reflist +* Dummy +* +* @returns Flag indicating if the current Mb can be skip or not +* +* @remarks +* The code implements the logic as described in sec 8.4.1.2.2 +* It also computes co-located MB parmas according to sec 8.4.1.2.1 +* +* Need to add condition for this fucntion to be used in ME +* +*******************************************************************************/ +WORD32 isvce_find_bskip_params_me(isvce_process_ctxt_t *ps_proc, WORD32 i4_reflist) +{ + /* Colzero for co-located MB */ + WORD32 i4_colzeroflag; + + /* motion vectors for neighbouring MBs */ + isvce_enc_pu_t *ps_a_pu, *ps_c_pu, *ps_b_pu; + + /* Variables to check if a particular mB is available */ + WORD32 i4_a, i4_b, i4_c, i4_c_avail; + + /* Mode availability, init to no modes available */ + WORD32 i4_mode_avail; + + /* mb neighbor availability */ + block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl; + + /* Temp var */ + WORD32 i, i4_cmpl_mode, i4_skip_type = -1; + + /* + * Colocated motion vector + */ + mv_t s_mvcol; + + /* + * Colocated picture idx + */ + WORD32 i4_refidxcol; + + isvce_codec_t *ps_codec = ps_proc->ps_codec; + + UNUSED(i4_reflist); + + /************************************************************************** + *Find co-located MB parameters + * See sec 8.4.1.2.1 for reference + **************************************************************************/ + { + /* + * Find the co-located Mb and update the skip and pred appropriately + * 1) Default colpic is forward ref : Table 8-6 + * 2) Default mb col is current MB : Table 8-8 + */ + + if(ps_proc->ps_col_mb->u1_is_intra) + { + s_mvcol.i2_mvx = 0; + s_mvcol.i2_mvy = 0; + i4_refidxcol = -1; + } + else + { + if(ps_proc->ps_col_mb->as_pu->u1_pred_mode != L1) + { + s_mvcol = ps_proc->ps_col_mb->as_pu->as_me_info[L0].s_mv; + i4_refidxcol = 0; + } + else + { + s_mvcol = ps_proc->ps_col_mb->as_pu->as_me_info[L1].s_mv; + i4_refidxcol = 0; + } + } + + /* RefPicList1[ 0 ] is marked as "used for short-term reference", as + * default */ + i4_colzeroflag = + (!i4_refidxcol && (ABS(s_mvcol.i2_mvx) <= 1) && (ABS(s_mvcol.i2_mvy) <= 1)); + } + + /*************************************************************************** + * Evaluating skip params : Spatial Skip + **************************************************************************/ + { + /* Get the neighbouring MBS according to Section 8.4.1.2.2 */ + ps_a_pu = ps_proc->s_nbr_info.ps_left_mb_info->as_pu; + ps_b_pu = ps_proc->s_nbr_info.ps_top_row_mb_info[ps_proc->i4_mb_x].as_pu; + + i4_c_avail = 0; + if(ps_ngbr_avbl->u1_mb_c) + { + ps_c_pu = ps_proc->s_nbr_info.ps_top_row_mb_info[ps_proc->i4_mb_x + 1].as_pu; + i4_c_avail = 1; + } + else + { + ps_c_pu = ps_proc->s_nbr_info.ps_top_row_mb_info[ps_proc->i4_mb_x - 1].as_pu; + i4_c_avail = ps_ngbr_avbl->u1_mb_d; + } + + i4_a = ps_ngbr_avbl->u1_mb_a; + i4_b = ps_ngbr_avbl->u1_mb_b; + i4_c = i4_c_avail; + + /* Init to no mode avail */ + i4_mode_avail = 0; + for(i = 0; i < 2; i++) + { + i4_cmpl_mode = (i == 0) ? L1 : L0; + + i4_mode_avail |= (i4_a && (ps_a_pu->u1_pred_mode != i4_cmpl_mode) && + (ps_a_pu->as_me_info[i].i1_ref_idx == 0)) + << i; + i4_mode_avail |= (i4_b && (ps_b_pu->u1_pred_mode != i4_cmpl_mode) && + (ps_b_pu->as_me_info[i].i1_ref_idx == 0)) + << i; + i4_mode_avail |= (i4_c && (ps_c_pu->u1_pred_mode != i4_cmpl_mode) && + (ps_c_pu->as_me_info[i].i1_ref_idx == 0)) + << i; + } + + if(i4_mode_avail == 0x3 || i4_mode_avail == 0x0) + { + i4_skip_type = BI; + } + else if(i4_mode_avail == 0x1) + { + i4_skip_type = L0; + } + else if(i4_mode_avail == 0x2) + { + i4_skip_type = L1; + } + + /* Update skip MV for L0 */ + if((i4_mode_avail & 0x1) && (!i4_colzeroflag)) + { + ps_proc->ps_skip_mv[0].s_mv.i2_mvx = ps_proc->ps_pred_mv[0].s_mv.i2_mvx; + ps_proc->ps_skip_mv[0].s_mv.i2_mvy = ps_proc->ps_pred_mv[0].s_mv.i2_mvy; + } + else + { + ps_proc->ps_skip_mv[0].s_mv.i2_mvx = 0; + ps_proc->ps_skip_mv[0].s_mv.i2_mvy = 0; + } + + /* Update skip MV for L1 */ + if((i4_mode_avail & 0x2) && (!i4_colzeroflag)) + { + ps_proc->ps_skip_mv[1].s_mv.i2_mvx = ps_proc->ps_pred_mv[1].s_mv.i2_mvx; + ps_proc->ps_skip_mv[1].s_mv.i2_mvy = ps_proc->ps_pred_mv[1].s_mv.i2_mvy; + } + else + { + ps_proc->ps_skip_mv[1].s_mv.i2_mvx = 0; + ps_proc->ps_skip_mv[1].s_mv.i2_mvy = 0; + } + } + + /*************************************************************************** + * Evaluating skip params : Temporal skip + **************************************************************************/ + { + svc_au_buf_t *ps_ref_pic[MAX_REF_PIC_CNT]; + WORD32 i4_td, i4_tx, i4_tb, i4_dist_scale_factor; + isvce_enc_pu_mv_t *ps_skip_mv = &ps_proc->ps_skip_mv[2]; + + ps_ref_pic[L0] = ps_proc->aps_ref_pic[L0]; + ps_ref_pic[L1] = ps_proc->aps_ref_pic[L1]; + + i4_tb = ps_codec->i4_poc - ps_ref_pic[L0]->i4_abs_poc; + i4_td = ps_ref_pic[L1]->i4_abs_poc - ps_ref_pic[L0]->i4_abs_poc; + + i4_tb = CLIP3(-128, 127, i4_tb); + i4_td = CLIP3(-128, 127, i4_td); + + i4_tx = (16384 + ABS(i4_td / 2)) / i4_td; + i4_dist_scale_factor = CLIP3(-1024, 1023, (i4_tb * i4_tx + 32) >> 6); + + /* Motion vectors taken in full pel resolution , hence -> (& 0xfffc) + * operation */ + ps_skip_mv[L0].s_mv.i2_mvx = ((i4_dist_scale_factor * s_mvcol.i2_mvx + 128) >> 8) & 0xfffc; + ps_skip_mv[L0].s_mv.i2_mvy = ((i4_dist_scale_factor * s_mvcol.i2_mvy + 128) >> 8) & 0xfffc; + + ps_skip_mv[L1].s_mv.i2_mvx = (ps_skip_mv[L0].s_mv.i2_mvx - s_mvcol.i2_mvx) & 0xfffc; + ps_skip_mv[L1].s_mv.i2_mvy = (ps_skip_mv[L0].s_mv.i2_mvy - s_mvcol.i2_mvy) & 0xfffc; + } + + return i4_skip_type; +} + +/** +******************************************************************************* +* +* @brief The function computes the skip motion vectoe for B mb +* +* @par Description: +* The function gives the skip motion vector for B Mb, check if the Mb can be +* marked as skip +* +* @param[in] ps_proc +* Pointer to process context +* +* @param[in] u4_for_me +* Dummy +* +* @param[in] u4_for_me +* Dummy +* +* @returns Flag indicating if the current Mb can be skip or not +* +* @remarks The code implements the logic as described in sec 8.4.1.2.2 in H264 +* specification. It also computes co-located MB parmas according to +*sec 8.4.1.2.1 +* +*******************************************************************************/ +WORD32 isvce_find_bskip_params(isvce_process_ctxt_t *ps_proc, WORD32 i4_reflist) +{ + WORD32 i4_colzeroflag; + + /* motion vectors */ + isvce_enc_pu_t *ps_a_pu, *ps_c_pu, *ps_b_pu; + + /* Syntax elem */ + isvce_mb_info_t *ps_a_syn, *ps_b_syn, *ps_c_syn; + + /* Variables to check if a particular mB is available */ + WORD32 i4_a, i4_b, i4_c, i4_c_avail; + + /* Mode availability, init to no modes available */ + WORD32 i4_mode_avail; + + /* mb neighbor availability */ + block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl; + + /* Temp var */ + WORD32 i, i4_cmpl_mode; + + UNUSED(i4_reflist); + + /************************************************************************** + *Find co-locates parameters + * See sec 8.4.1.2.1 for reference + **************************************************************************/ + { + /* + * Find the co-located Mb and update the skip and pred appropriately + * 1) Default colpic is forward ref : Table 8-6 + * 2) Default mb col is current MB : Table 8-8 + */ + + mv_t s_mvcol; + WORD32 i4_refidxcol; + + if(ps_proc->ps_col_mb->u1_is_intra) + { + s_mvcol.i2_mvx = 0; + s_mvcol.i2_mvy = 0; + i4_refidxcol = -1; + } + else + { + if(ps_proc->ps_col_mb->as_pu->u1_pred_mode != L1) + { + s_mvcol = ps_proc->ps_col_mb->as_pu->as_me_info[L0].s_mv; + i4_refidxcol = 0; + } + else + { + s_mvcol = ps_proc->ps_col_mb->as_pu->as_me_info[L1].s_mv; + i4_refidxcol = 0; + } + } + + /* RefPicList1[ 0 ] is marked as "used for short-term reference", as + * default */ + i4_colzeroflag = + (!i4_refidxcol && (ABS(s_mvcol.i2_mvx) <= 1) && (ABS(s_mvcol.i2_mvy) <= 1)); + } + + /*************************************************************************** + * Evaluating skip params + **************************************************************************/ + /* Section 8.4.1.2.2 */ + ps_a_syn = ps_proc->s_nbr_info.ps_left_mb_info; + ps_a_pu = ps_proc->s_nbr_info.ps_left_mb_info->as_pu; + + ps_b_syn = ps_proc->s_nbr_info.ps_top_row_mb_info + ps_proc->i4_mb_x; + ps_b_pu = ps_b_syn->as_pu; + + i4_c_avail = 0; + if(ps_ngbr_avbl->u1_mb_c) + { + ps_c_syn = ps_b_syn + 1; + ps_c_pu = ps_c_syn->as_pu; + i4_c_avail = 1; + } + else + { + ps_c_syn = ps_b_syn - 1; + ps_c_pu = ps_c_syn->as_pu; + i4_c_avail = ps_ngbr_avbl->u1_mb_d; + } + + i4_a = ps_ngbr_avbl->u1_mb_a; + i4_a &= !ps_a_syn->u1_is_intra; + + i4_b = ps_ngbr_avbl->u1_mb_b; + i4_b &= !ps_b_syn->u1_is_intra; + + i4_c = i4_c_avail; + i4_c &= !ps_c_syn->u1_is_intra; + + /* Init to no mode avail */ + i4_mode_avail = 0; + for(i = 0; i < 2; i++) + { + i4_cmpl_mode = (i == 0) ? L1 : L0; + + i4_mode_avail |= (i4_a && (ps_a_pu->u1_pred_mode != i4_cmpl_mode) && + (ps_a_pu->as_me_info[i].i1_ref_idx == 0)) + << i; + i4_mode_avail |= (i4_b && (ps_b_pu->u1_pred_mode != i4_cmpl_mode) && + (ps_b_pu->as_me_info[i].i1_ref_idx == 0)) + << i; + i4_mode_avail |= (i4_c && (ps_c_pu->u1_pred_mode != i4_cmpl_mode) && + (ps_c_pu->as_me_info[i].i1_ref_idx == 0)) + << i; + } + + /* Update skip MV for L0 */ + if((i4_mode_avail & 0x1) && (!i4_colzeroflag)) + { + ps_proc->ps_skip_mv[0].s_mv.i2_mvx = ps_proc->ps_pred_mv[0].s_mv.i2_mvx; + ps_proc->ps_skip_mv[0].s_mv.i2_mvy = ps_proc->ps_pred_mv[0].s_mv.i2_mvy; + } + else + { + ps_proc->ps_skip_mv[0].s_mv.i2_mvx = 0; + ps_proc->ps_skip_mv[0].s_mv.i2_mvy = 0; + } + + /* Update skip MV for L1 */ + if((i4_mode_avail & 0x2) && (!i4_colzeroflag)) + { + ps_proc->ps_skip_mv[1].s_mv.i2_mvx = ps_proc->ps_pred_mv[1].s_mv.i2_mvx; + ps_proc->ps_skip_mv[1].s_mv.i2_mvy = ps_proc->ps_pred_mv[1].s_mv.i2_mvy; + } + else + { + ps_proc->ps_skip_mv[1].s_mv.i2_mvx = 0; + ps_proc->ps_skip_mv[1].s_mv.i2_mvy = 0; + } + + /* Now see if the ME information matches the SKIP information */ + switch(ps_proc->ps_mb_info->as_pu->u1_pred_mode) + { + case PRED_BI: + if((ps_proc->ps_mb_info->as_pu->as_me_info[0].s_mv.i2_mvx == + ps_proc->ps_skip_mv[0].s_mv.i2_mvx) && + (ps_proc->ps_mb_info->as_pu->as_me_info[0].s_mv.i2_mvy == + ps_proc->ps_skip_mv[0].s_mv.i2_mvy) && + (ps_proc->ps_mb_info->as_pu->as_me_info[1].s_mv.i2_mvx == + ps_proc->ps_skip_mv[1].s_mv.i2_mvx) && + (ps_proc->ps_mb_info->as_pu->as_me_info[1].s_mv.i2_mvy == + ps_proc->ps_skip_mv[1].s_mv.i2_mvy) && + (i4_mode_avail == 0x3 || i4_mode_avail == 0x0)) + { + return 1; + } + break; + + case PRED_L0: + if((ps_proc->ps_mb_info->as_pu->as_me_info[0].s_mv.i2_mvx == + ps_proc->ps_skip_mv[0].s_mv.i2_mvx) && + (ps_proc->ps_mb_info->as_pu->as_me_info[0].s_mv.i2_mvy == + ps_proc->ps_skip_mv[0].s_mv.i2_mvy) && + (i4_mode_avail == 0x1)) + { + return 1; + } + break; + + case PRED_L1: + if((ps_proc->ps_mb_info->as_pu->as_me_info[1].s_mv.i2_mvx == + ps_proc->ps_skip_mv[1].s_mv.i2_mvx) && + (ps_proc->ps_mb_info->as_pu->as_me_info[1].s_mv.i2_mvy == + ps_proc->ps_skip_mv[1].s_mv.i2_mvy) && + (i4_mode_avail == 0x2)) + { + return 1; + } + break; + } + + return 0; +} + +/** +******************************************************************************* +* +* @brief This function computes the best motion vector among the tentative mv +* candidates chosen. +* +* @par Description: +* This function determines the position in the search window at which the +*motion estimation should begin in order to minimise the number of search +*iterations. +* +* @param[in] ps_mb_part +* pointer to current mb partition ctxt with respect to ME +* +* @param[in] u4_lambda_motion +* lambda motion +* +* @param[in] u4_fast_flag +* enable/disable fast sad computation +* +* @returns mv pair & corresponding distortion and cost +* +* @remarks Currently onyl 4 search candiates are supported +* +******************************************************************************* +*/ +void isvce_evaluate_bipred(isvce_me_ctxt_t *ps_me_ctxt, isvce_process_ctxt_t *ps_proc, + mb_part_ctxt *ps_mb_ctxt_bi) +{ + UWORD32 i, u4_fast_sad; + + WORD32 i4_dest_buff; + + mv_t *ps_l0_pred_mv, *ps_l1_pred_mv, s_l0_mv, s_l1_mv; + + UWORD8 *pu1_ref_mb_l0, *pu1_ref_mb_l1; + + UWORD8 *pu1_dst_buf; + + WORD32 i4_ref_l0_stride, i4_ref_l1_stride; + + WORD32 i4_mb_distortion, i4_mb_cost; + + isvce_codec_t *ps_codec = ps_proc->ps_codec; + isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns; + inter_pred_fxns_t *ps_inter_pred_fxns = &ps_isa_dependent_fxns->s_inter_pred_fxns; + + u4_fast_sad = ps_me_ctxt->u4_enable_fast_sad; + + i4_dest_buff = 0; + for(i = 0; i < ps_me_ctxt->u4_num_candidates[BI]; i += 2) + { + pu1_dst_buf = ps_me_ctxt->apu1_subpel_buffs[i4_dest_buff]; + + s_l0_mv.i2_mvx = ps_me_ctxt->as_mv_init_search[BI][i].i2_mvx >> 2; + s_l0_mv.i2_mvy = ps_me_ctxt->as_mv_init_search[BI][i].i2_mvy >> 2; + s_l1_mv.i2_mvx = ps_me_ctxt->as_mv_init_search[BI][i + 1].i2_mvx >> 2; + s_l1_mv.i2_mvy = ps_me_ctxt->as_mv_init_search[BI][i + 1].i2_mvy >> 2; + + ps_l0_pred_mv = &ps_proc->ps_pred_mv[L0].s_mv; + ps_l1_pred_mv = &ps_proc->ps_pred_mv[L1].s_mv; + + if((ps_me_ctxt->as_mv_init_search[BI][i].i2_mvx & 0x3) || + (ps_me_ctxt->as_mv_init_search[BI][i].i2_mvy & 0x3)) + { + pu1_ref_mb_l0 = ps_me_ctxt->as_mb_part[L0].pu1_best_hpel_buf; + i4_ref_l0_stride = ps_me_ctxt->u4_subpel_buf_strd; + } + else + { + pu1_ref_mb_l0 = ps_me_ctxt->apu1_ref_buf_luma[L0] + (s_l0_mv.i2_mvx) + + ((s_l0_mv.i2_mvy) * ps_me_ctxt->ai4_rec_strd[L0]); + i4_ref_l0_stride = ps_me_ctxt->ai4_rec_strd[L0]; + } + + if((ps_me_ctxt->as_mv_init_search[BI][i + 1].i2_mvx & 0x3) || + (ps_me_ctxt->as_mv_init_search[BI][i + 1].i2_mvy & 0x3)) + { + pu1_ref_mb_l1 = ps_me_ctxt->as_mb_part[L1].pu1_best_hpel_buf; + i4_ref_l1_stride = ps_me_ctxt->u4_subpel_buf_strd; + } + else + { + pu1_ref_mb_l1 = ps_me_ctxt->apu1_ref_buf_luma[L1] + (s_l1_mv.i2_mvx) + + ((s_l1_mv.i2_mvy) * ps_me_ctxt->ai4_rec_strd[L1]); + i4_ref_l1_stride = ps_me_ctxt->ai4_rec_strd[L1]; + } + + ps_inter_pred_fxns->pf_inter_pred_luma_bilinear( + pu1_ref_mb_l0, pu1_ref_mb_l1, pu1_dst_buf, i4_ref_l0_stride, i4_ref_l1_stride, + ps_me_ctxt->u4_subpel_buf_strd, MB_SIZE, MB_SIZE); + + ps_me_ctxt->pf_ime_compute_sad_16x16[u4_fast_sad]( + ps_me_ctxt->pu1_src_buf_luma, pu1_dst_buf, ps_me_ctxt->i4_src_strd, + ps_me_ctxt->u4_subpel_buf_strd, INT_MAX, &i4_mb_distortion); + + /* compute cost */ + i4_mb_cost = + ps_me_ctxt + ->pu1_mv_bits[ps_me_ctxt->as_mv_init_search[BI][i].i2_mvx - ps_l0_pred_mv->i2_mvx]; + i4_mb_cost += + ps_me_ctxt + ->pu1_mv_bits[ps_me_ctxt->as_mv_init_search[BI][i].i2_mvy - ps_l0_pred_mv->i2_mvy]; + i4_mb_cost += ps_me_ctxt->pu1_mv_bits[ps_me_ctxt->as_mv_init_search[BI][i + 1].i2_mvx - + ps_l1_pred_mv->i2_mvx]; + i4_mb_cost += ps_me_ctxt->pu1_mv_bits[ps_me_ctxt->as_mv_init_search[BI][i + 1].i2_mvy - + ps_l1_pred_mv->i2_mvy]; + + i4_mb_cost -= + (ps_me_ctxt->i4_skip_bias[BSLICE]) * (ps_me_ctxt->i4_skip_type == BI) * (i == 0); + + i4_mb_cost *= ps_me_ctxt->u4_lambda_motion; + i4_mb_cost += i4_mb_distortion; + + if(i4_mb_cost < ps_mb_ctxt_bi->i4_mb_cost) + { + ps_mb_ctxt_bi->i4_srch_pos_idx = (i >> 1); + ps_mb_ctxt_bi->i4_mb_cost = i4_mb_cost; + ps_mb_ctxt_bi->i4_mb_distortion = i4_mb_distortion; + ps_mb_ctxt_bi->pu1_best_hpel_buf = pu1_dst_buf; + i4_dest_buff = (i4_dest_buff + 1) % 2; + } + } +} + +/** +******************************************************************************* +* +* @brief This function performs motion estimation for the current mb +* +* @par Description: +* The current mb is compared with a list of mb's in the reference frame for +* least cost. The mb that offers least cost is chosen as predicted mb and the +* displacement of the predicted mb from index location of the current mb is +* signaled as mv. The list of the mb's that are chosen in the reference frame +* are dependent on the speed of the ME configured. +* +* @param[in] ps_proc +* Process context corresponding to the job +* +* @returns motion vector of the pred mb, sad, cost. +* +* @remarks none +* +******************************************************************************* +*/ +void isvce_compute_me_multi_reflist(isvce_process_ctxt_t *ps_proc) +{ + /* me ctxt */ + isvce_me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt; + + /* codec context */ + isvce_codec_t *ps_codec = ps_proc->ps_codec; + isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns; + inter_pred_fxns_t *ps_inter_pred_fxns = &ps_isa_dependent_fxns->s_inter_pred_fxns; + + /* Temp variables for looping over ref lists */ + WORD32 i4_reflist, i4_max_reflist; + + /* source buffer for halp pel generation functions */ + UWORD8 *pu1_hpel_src; + + /* quantization parameters */ + quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0]; + + /* Mb part ctxts for SKIP */ + mb_part_ctxt as_skip_mbpart[2]; + + ASSERT(1 == MAX_REF_FRAMES_PER_PRED_DIR); + + /* Sad therholds */ + ps_me_ctxt->pu2_sad_thrsh = ps_qp_params->pu2_sad_thrsh; + + { + WORD32 rows_above, rows_below, columns_left, columns_right; + + /* During evaluation for motion vectors do not search through padded regions + */ + /* Obtain number of rows and columns that are effective for computing for me + * evaluation */ + rows_above = MB_SIZE + ps_proc->i4_mb_y * MB_SIZE; + rows_below = (ps_proc->i4_ht_mbs - ps_proc->i4_mb_y) * MB_SIZE; + columns_left = MB_SIZE + ps_proc->i4_mb_x * MB_SIZE; + columns_right = (ps_proc->i4_wd_mbs - ps_proc->i4_mb_x) * MB_SIZE; + + /* init srch range */ + /* NOTE : For now, lets limit the search range by DEFAULT_MAX_SRCH_RANGE_X / + * 2 on all sides. + */ + ps_me_ctxt->i4_srch_range_w = -MIN(columns_left, DEFAULT_MAX_SRCH_RANGE_X >> 1); + ps_me_ctxt->i4_srch_range_e = MIN(columns_right, DEFAULT_MAX_SRCH_RANGE_X >> 1); + ps_me_ctxt->i4_srch_range_n = -MIN(rows_above, DEFAULT_MAX_SRCH_RANGE_Y >> 1); + ps_me_ctxt->i4_srch_range_s = MIN(rows_below, DEFAULT_MAX_SRCH_RANGE_Y >> 1); + + /* this is to facilitate fast sub pel computation with minimal loads */ + if(ps_me_ctxt->u4_enable_hpel) + { + ps_me_ctxt->i4_srch_range_w += 1; + ps_me_ctxt->i4_srch_range_e -= 1; + ps_me_ctxt->i4_srch_range_n += 1; + ps_me_ctxt->i4_srch_range_s -= 1; + } + } + + /* Compute ME and store the MVs */ + { + /*********************************************************************** + * Compute ME for lists L0 and L1 + * For L0 -> L0 skip + L0 + * for L1 -> L0 skip + L0 + L1 skip + L1 + ***********************************************************************/ + i4_max_reflist = (ps_proc->i4_slice_type == PSLICE) ? L0 : L1; + + /* Init SATQD for the current list */ + ps_me_ctxt->u4_min_sad_reached = 0; + ps_me_ctxt->i4_min_sad = ps_proc->ps_cur_mb->u4_min_sad; + + for(i4_reflist = L0; i4_reflist <= i4_max_reflist; i4_reflist++) + { + /* Get the seed motion vector candidates */ + isvce_get_search_candidates(ps_proc, ps_me_ctxt, i4_reflist); + + /* **************************************************************** + *Evaluate the SKIP for current list + * ****************************************************************/ + as_skip_mbpart[i4_reflist].s_mv_curr.i2_mvx = 0; + as_skip_mbpart[i4_reflist].s_mv_curr.i2_mvy = 0; + as_skip_mbpart[i4_reflist].i4_mb_cost = INT_MAX; + as_skip_mbpart[i4_reflist].i4_mb_distortion = INT_MAX; + + if(ps_me_ctxt->i4_skip_type == i4_reflist) + { + isvce_compute_skip_cost( + ps_me_ctxt, (ime_mv_t *) (&ps_proc->ps_skip_mv[i4_reflist].s_mv), + &as_skip_mbpart[i4_reflist], ps_codec->s_cfg.u4_enable_satqd, i4_reflist, + (ps_proc->i4_slice_type == BSLICE)); + } + + as_skip_mbpart[i4_reflist].s_mv_curr.i2_mvx <<= 2; + as_skip_mbpart[i4_reflist].s_mv_curr.i2_mvy <<= 2; + + /****************************************************************** + * Evaluate ME For current list + *****************************************************************/ + ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvx = 0; + ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvy = 0; + ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_cost = INT_MAX; + ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_distortion = INT_MAX; + + /* Init Hpel */ + ps_me_ctxt->as_mb_part[i4_reflist].pu1_best_hpel_buf = NULL; + + /* In case we found out the minimum SAD, exit the ME eval */ + if(ps_me_ctxt->u4_min_sad_reached) + { + i4_max_reflist = i4_reflist; + break; + } + + /* Evaluate search candidates for initial mv pt */ + isvce_evaluate_init_srchposn_16x16(ps_me_ctxt, i4_reflist); + + /********************************************************************/ + /* full pel motion estimation */ + /********************************************************************/ + isvce_full_pel_motion_estimation_16x16(ps_me_ctxt, i4_reflist); + + DEBUG_MV_HISTOGRAM_ADD((ps_me_ctxt->s_mb_part.s_mv_curr.i2_mvx >> 2), + (ps_me_ctxt->s_mb_part.s_mv_curr.i2_mvy >> 2)); + + DEBUG_SAD_HISTOGRAM_ADD(ps_me_ctxt->s_mb_part.i4_mb_distortion, 1); + + /* Scale the MV to qpel resolution */ + ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvx <<= 2; + ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvy <<= 2; + + if(ps_me_ctxt->u4_enable_hpel) + { + /* moving src pointer to the converged motion vector location */ + pu1_hpel_src = ps_me_ctxt->apu1_ref_buf_luma[i4_reflist] + + (ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvx >> 2) + + ((ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvy >> 2) * + ps_me_ctxt->ai4_rec_strd[i4_reflist]); + + ps_me_ctxt->apu1_subpel_buffs[0] = ps_proc->apu1_subpel_buffs[0]; + ps_me_ctxt->apu1_subpel_buffs[1] = ps_proc->apu1_subpel_buffs[1]; + ps_me_ctxt->apu1_subpel_buffs[2] = ps_proc->apu1_subpel_buffs[2]; + + /* Init the search position to an invalid number */ + ps_me_ctxt->as_mb_part[i4_reflist].i4_srch_pos_idx = 3; + + /* Incase a buffer is still in use by L0, replace it with spare buff */ + ps_me_ctxt->apu1_subpel_buffs[ps_me_ctxt->as_mb_part[L0].i4_srch_pos_idx] = + ps_proc->apu1_subpel_buffs[3]; + + ps_me_ctxt->u4_subpel_buf_strd = HP_BUFF_WD; + + /* half pel search is done for both sides of full pel, + * hence half_x of width x height = 17x16 is created + * starting from left half_x of converged full pel */ + pu1_hpel_src -= 1; + + /* computing half_x */ + ps_codec->pf_ih264e_sixtapfilter_horz( + pu1_hpel_src, ps_me_ctxt->apu1_subpel_buffs[0], + ps_me_ctxt->ai4_rec_strd[i4_reflist], ps_me_ctxt->u4_subpel_buf_strd); + + /* + * Halfpel search is done for both sides of full pel, + * hence half_y of width x height = 16x17 is created + * starting from top half_y of converged full pel + * for half_xy top_left is required + * hence it starts from pu1_hpel_src = full_pel_converged_point - + * i4_rec_strd - 1 + */ + pu1_hpel_src -= ps_me_ctxt->ai4_rec_strd[i4_reflist]; + + /* computing half_y and half_xy */ + ps_codec->pf_ih264e_sixtap_filter_2dvh_vert( + pu1_hpel_src, ps_me_ctxt->apu1_subpel_buffs[1], + ps_me_ctxt->apu1_subpel_buffs[2], ps_me_ctxt->ai4_rec_strd[i4_reflist], + ps_me_ctxt->u4_subpel_buf_strd, ps_proc->ai16_pred1 + 3, + ps_me_ctxt->u4_subpel_buf_strd); + + isvce_sub_pel_motion_estimation_16x16(ps_me_ctxt, i4_reflist); + } + } + + /*********************************************************************** + * If a particular skiip Mv is giving better sad, copy to the corresponding + * MBPART + * In B slices this loop should go only to PREDL1: If we found min sad + * we will go to the skip ref list only + * Have to find a way to make it without too much change or new vars + **********************************************************************/ + for(i4_reflist = 0; i4_reflist <= i4_max_reflist; i4_reflist++) + { + if(as_skip_mbpart[i4_reflist].i4_mb_cost < + ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_cost) + { + ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_cost = + as_skip_mbpart[i4_reflist].i4_mb_cost; + ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_distortion = + as_skip_mbpart[i4_reflist].i4_mb_distortion; + ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr = as_skip_mbpart[i4_reflist].s_mv_curr; + } + } + + /*********************************************************************** + * Compute ME for BI + * In case of BI we do ME for two candidates + * 1) The best L0 and L1 Mvs + * 2) Skip L0 and L1 MVs + * + * TODO + * one of the search candidates is skip. Hence it may be duplicated + ***********************************************************************/ + if(i4_max_reflist == L1 && ps_me_ctxt->u4_min_sad_reached == 0) + { + WORD32 i, j = 0; + WORD32 l0_srch_pos_idx, l1_srch_pos_idx; + WORD32 i4_l0_skip_mv_idx, i4_l1_skip_mv_idx; + + /* Get the free buffers */ + l0_srch_pos_idx = ps_me_ctxt->as_mb_part[L0].i4_srch_pos_idx; + l1_srch_pos_idx = ps_me_ctxt->as_mb_part[L1].i4_srch_pos_idx; + + /* Search for the two free buffers in subpel list */ + for(i = 0; i < SUBPEL_BUFF_CNT; i++) + { + if(i != l0_srch_pos_idx && i != l1_srch_pos_idx) + { + ps_me_ctxt->apu1_subpel_buffs[j] = ps_proc->apu1_subpel_buffs[i]; + j++; + } + } + ps_me_ctxt->u4_subpel_buf_strd = HP_BUFF_WD; + + /* Copy the statial SKIP MV of each list */ + i4_l0_skip_mv_idx = ps_me_ctxt->u4_num_candidates[L0] - 2; + i4_l1_skip_mv_idx = ps_me_ctxt->u4_num_candidates[L1] - 2; + ps_me_ctxt->as_mv_init_search[BI][0].i2_mvx = + ps_me_ctxt->as_mv_init_search[L0][i4_l0_skip_mv_idx].i2_mvx << 2; + ps_me_ctxt->as_mv_init_search[BI][0].i2_mvy = + ps_me_ctxt->as_mv_init_search[L0][i4_l0_skip_mv_idx].i2_mvy << 2; + ps_me_ctxt->as_mv_init_search[BI][1].i2_mvx = + ps_me_ctxt->as_mv_init_search[L1][i4_l1_skip_mv_idx].i2_mvx << 2; + ps_me_ctxt->as_mv_init_search[BI][1].i2_mvy = + ps_me_ctxt->as_mv_init_search[L1][i4_l1_skip_mv_idx].i2_mvy << 2; + + /* Copy the SKIP MV temporal of each list */ + i4_l0_skip_mv_idx++; + i4_l1_skip_mv_idx++; + ps_me_ctxt->as_mv_init_search[BI][2].i2_mvx = + ps_me_ctxt->as_mv_init_search[L0][i4_l0_skip_mv_idx].i2_mvx << 2; + ps_me_ctxt->as_mv_init_search[BI][2].i2_mvy = + ps_me_ctxt->as_mv_init_search[L0][i4_l0_skip_mv_idx].i2_mvy << 2; + ps_me_ctxt->as_mv_init_search[BI][3].i2_mvx = + ps_me_ctxt->as_mv_init_search[L1][i4_l1_skip_mv_idx].i2_mvx << 2; + ps_me_ctxt->as_mv_init_search[BI][3].i2_mvy = + ps_me_ctxt->as_mv_init_search[L1][i4_l1_skip_mv_idx].i2_mvy << 2; + + /* Copy the best MV after ME */ + ps_me_ctxt->as_mv_init_search[BI][4] = ps_me_ctxt->as_mb_part[L0].s_mv_curr; + ps_me_ctxt->as_mv_init_search[BI][5] = ps_me_ctxt->as_mb_part[L1].s_mv_curr; + + ps_me_ctxt->u4_num_candidates[BI] = 6; + + ps_me_ctxt->as_mb_part[BI].i4_mb_cost = INT_MAX; + ps_me_ctxt->as_mb_part[BI].i4_mb_distortion = INT_MAX; + + isvce_evaluate_bipred(ps_me_ctxt, ps_proc, &ps_me_ctxt->as_mb_part[BI]); + + i4_max_reflist = BI; + } + + /********************************************************************** + * Now get the minimum of MB part sads by searching over all ref lists + **********************************************************************/ + ps_proc->ps_mb_info->as_pu->u1_pred_mode = 0x3; + + for(i4_reflist = 0; i4_reflist <= i4_max_reflist; i4_reflist++) + { + if(ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_cost < ps_proc->ps_cur_mb->i4_mb_cost) + { + ps_proc->ps_cur_mb->i4_mb_cost = ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_cost; + ps_proc->ps_cur_mb->i4_mb_distortion = + ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_distortion; + ps_proc->ps_cur_mb->u4_mb_type = + (ps_proc->i4_slice_type == PSLICE) ? P16x16 : B16x16; + ps_proc->ps_mb_info->as_pu->u1_pred_mode = i4_reflist; + } + } + + /********************************************************************** + * In case we have a BI MB, we have to copy the buffers and set proer MV's + * 1)In case its BI, we need to get the best MVs given by BI and update + * to their corresponding MB part + * 2)We also need to copy the buffer in which bipred buff is populated + * + * Not that if we have + **********************************************************************/ + if(ps_proc->ps_mb_info->as_pu->u1_pred_mode == BI) + { + WORD32 i4_srch_pos = ps_me_ctxt->as_mb_part[BI].i4_srch_pos_idx; + UWORD8 *pu1_bi_buf = ps_me_ctxt->as_mb_part[BI].pu1_best_hpel_buf; + + ps_me_ctxt->as_mb_part[L0].s_mv_curr = + ps_me_ctxt->as_mv_init_search[BI][i4_srch_pos << 1]; + ps_me_ctxt->as_mb_part[L1].s_mv_curr = + ps_me_ctxt->as_mv_init_search[BI][(i4_srch_pos << 1) + 1]; + + /* Now we have to copy the buffers */ + ps_inter_pred_fxns->pf_inter_pred_luma_copy( + pu1_bi_buf, ps_proc->pu1_best_subpel_buf, ps_me_ctxt->u4_subpel_buf_strd, + ps_proc->u4_bst_spel_buf_strd, MB_SIZE, MB_SIZE, NULL, 0); + } + else if(ps_me_ctxt->as_mb_part[ps_proc->ps_mb_info->as_pu->u1_pred_mode].pu1_best_hpel_buf) + { + /* Now we have to copy the buffers */ + ps_inter_pred_fxns->pf_inter_pred_luma_copy( + ps_me_ctxt->as_mb_part[ps_proc->ps_mb_info->as_pu->u1_pred_mode].pu1_best_hpel_buf, + ps_proc->pu1_best_subpel_buf, ps_me_ctxt->u4_subpel_buf_strd, + ps_proc->u4_bst_spel_buf_strd, MB_SIZE, MB_SIZE, NULL, 0); + } + } + + /************************************************************************** + *Now copy the MVs to the current PU with qpel scaling + ***************************************************************************/ + ps_proc->ps_mb_info->as_pu->as_me_info[L0].s_mv.i2_mvx = + (ps_me_ctxt->as_mb_part[L0].s_mv_curr.i2_mvx); + ps_proc->ps_mb_info->as_pu->as_me_info[L0].s_mv.i2_mvy = + (ps_me_ctxt->as_mb_part[L0].s_mv_curr.i2_mvy); + ps_proc->ps_mb_info->as_pu->as_me_info[L1].s_mv.i2_mvx = + (ps_me_ctxt->as_mb_part[L1].s_mv_curr.i2_mvx); + ps_proc->ps_mb_info->as_pu->as_me_info[L1].s_mv.i2_mvy = + (ps_me_ctxt->as_mb_part[L1].s_mv_curr.i2_mvy); + + ps_proc->ps_mb_info->as_pu->as_me_info[0].i1_ref_idx = + (ps_proc->ps_mb_info->as_pu->u1_pred_mode != L1) ? 0 : -1; + ps_proc->ps_mb_info->as_pu->as_me_info[1].i1_ref_idx = + (ps_proc->ps_mb_info->as_pu->u1_pred_mode != L0) ? 0 : -1; + + /* number of partitions */ + ps_proc->u4_num_sub_partitions = 1; + *(ps_proc->pu4_mb_pu_cnt) = 1; + + /* position in-terms of PU */ + ps_proc->ps_mb_info->as_pu->u1_pos_x_in_4x4 = 0; + ps_proc->ps_mb_info->as_pu->u1_pos_y_in_4x4 = 0; + + /* PU size */ + ps_proc->ps_mb_info->as_pu->u1_wd_in_4x4_m1 = 3; + ps_proc->ps_mb_info->as_pu->u1_ht_in_4x4_m1 = 3; + + /* Update min sad conditions */ + if(ps_me_ctxt->u4_min_sad_reached == 1) + { + ps_proc->ps_cur_mb->u4_min_sad_reached = 1; + ps_proc->ps_cur_mb->u4_min_sad = ps_me_ctxt->i4_min_sad; + } +} diff --git a/encoder/svc/isvce_me.h b/encoder/svc/isvce_me.h new file mode 100644 index 0000000..06e3b22 --- /dev/null +++ b/encoder/svc/isvce_me.h @@ -0,0 +1,381 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** + ******************************************************************************* + * @file + * isvce_me.h + * + * @brief + * Contains declarations of global variables for H264 encoder + * + * @author + * ittiam + * + * @remarks + * + ******************************************************************************* + */ + +#ifndef _ISVCE_ME_H_ +#define _ISVCE_ME_H_ + +#include "ih264_typedefs.h" + +#include "isvce_structs.h" + +/*****************************************************************************/ +/* Constant Macros */ +/*****************************************************************************/ + +/** +****************************************************************************** +* @brief Skip Bias value for P slice +****************************************************************************** +*/ +#define SKIP_BIAS_P 0 + +/** +****************************************************************************** +* @brief Skip Bias value for B slice +****************************************************************************** +*/ +#define SKIP_BIAS_B 0 + +/*****************************************************************************/ +/* Function Macros */ +/*****************************************************************************/ + +/** + ****************************************************************************** + * @brief compute median of 3 elements (a, b, c) and store the output + * in to result. This is used for mv prediction + ****************************************************************************** + */ + +#define MEDIAN(a, b, c, result) \ + if(a > b) \ + { \ + if(b > c) \ + result = b; \ + else \ + { \ + if(a > c) \ + result = c; \ + else \ + result = a; \ + } \ + } \ + else \ + { \ + if(c > b) \ + result = b; \ + else \ + { \ + if(c > a) \ + result = c; \ + else \ + result = a; \ + } \ + } + +/*****************************************************************************/ +/* Extern Function Declarations */ +/*****************************************************************************/ + +/** + ******************************************************************************* + * + * @brief + * This function populates the length of the codewords for motion vectors in + *the range (-search range, search range) in pixels + * + * @param[in] ps_me + * Pointer to me ctxt + * + * @param[out] pu1_mv_bits + * length of the codeword for all mv's + * + * @remarks The length of the code words are derived from signed exponential + * goloumb codes. + * + ******************************************************************************* + */ +void isvce_init_mv_bits(isvce_me_ctxt_t *ps_me); + +/** + ******************************************************************************* + * + * @brief The function computes the parameters for a P skip MB + * + * @par Description: + * The function computes the parameters for a P skip MB + * + * @param[in] ps_proc + * Process context + * + * @param[in] u4_for_me + * Flag to indicate the purpose of computing skip + * + * @param[out] ps_pred_mv + * Flag to indicate the current active refernce list + * + * @returns + * 1) Updates skip MV in proc + * 2) Returns if the current MB can be coded as skip or not + * + * @remarks The code implements the logic as described in sec 8.4.1.1 in H264 + * specification. + * + ******************************************************************************* + */ +FT_FIND_SKIP_PARAMS isvce_find_pskip_params; + +/** + ******************************************************************************* + * + * @brief The function computes the parameters for a P skip MB + * + * @par Description: + * The function computes the parameters for a P skip MB + * + * @param[in] ps_proc + * Process context + * + * @param[in] u4_for_me + * Flag to indicate the purpose of computing skip + * + * @param[out] ps_pred_mv + * Flag to indicate the current active refernce list + * + * @returns + * 1) Updates skip MV in proc + * 2) Returns if the current MB can be coded as skip or not + * + * @remarks The code implements the logic as described in sec 8.4.1.1 in H264 + * specification. + * + ******************************************************************************* + */ +FT_FIND_SKIP_PARAMS isvce_find_pskip_params_me; + +/** + ******************************************************************************* + * + * @brief The function computes the parameters for a B skip MB + * + * @par Description: + * The function computes the parameters for a B skip MB + * + * @param[in] ps_proc + * Process context + * + * @param[in] u4_for_me + * Flag to indicate the purpose of computing skip + * + * @param[out] ps_pred_mv + * Flag to indicate the current active refernce list + * + * @returns + * 1) Updates skip MV in proc + * 2) Returns if the current MB can be coded as skip or not + * + * @remarks The code implements the logic as described in sec 8.4.1.1 in H264 + * specification. + * + ******************************************************************************* + */ +FT_FIND_SKIP_PARAMS isvce_find_bskip_params; + +/** + ******************************************************************************* + * + * @brief The function computes the parameters for a B skip MB + * + * @par Description: + * The function computes the parameters for a B skip MB + * + * @param[in] ps_proc + * Process context + * + * @param[in] u4_for_me + * Flag to indicate the purpose of computing skip + * + * @param[out] ps_pred_mv + * Flag to indicate the current active refernce list + * + * @returns + * 1) Updates skip MV in proc + * 2) The type of SKIP [L0/L1/BI] + * + * @remarks + ******************************************************************************* + */ +FT_FIND_SKIP_PARAMS isvce_find_bskip_params_me; + +/** + ******************************************************************************* + * + * @brief motion vector predictor + * + * @par Description: + * The routine calculates the motion vector predictor for a given block, + * given the candidate MV predictors. + * + * @param[in] ps_left_mb_pu + * pointer to left mb motion vector info + * + * @param[in] ps_top_row_pu + * pointer to top & top right mb motion vector info + * + * @param[out] ps_pred_mv + * pointer to candidate predictors for the current block + * + * @returns The x & y components of the MV predictor. + * + * @remarks The code implements the logic as described in sec 8.4.1.3 in H264 + * specification. + * Assumptions : 1. Assumes Only partition of size 16x16 + * + ******************************************************************************* + */ +void isvce_get_mv_predictor(isvce_enc_pu_mv_t *ps_pred_mv, isvce_enc_pu_mv_t *ps_neig_mv, + WORD32 pred_algo); + +/** + ******************************************************************************* + * + * @brief This fucntion evalues ME for 2 reference lists + * + * @par Description: + * It evaluates skip, full-pel an half-pel and assigns the correct MV in proc + * + * @param[in] ps_proc + * Process context corresponding to the job + * + * @returns none + * + * @remarks none + * + ******************************************************************************* + */ +FT_ME_ALGORITHM isvce_compute_me_multi_reflist; + +/** + ******************************************************************************* + * + * @brief This fucntion evalues ME for single reflist [Pred L0] + * + * @par Description: + * It evaluates skip, full-pel an half-pel and assigns the correct MV in proc + * + * @param[in] ps_proc + * Process context corresponding to the job + * + * @returns none + * + * @remarks none + * + ******************************************************************************* + */ +FT_ME_ALGORITHM isvce_compute_me_single_reflist; + +/** + ******************************************************************************* + * + * @brief This function initializes me ctxt + * + * @par Description: + * Before dispatching the current job to me thread, the me context associated + * with the job is initialized. + * + * @param[in] ps_proc + * Process context corresponding to the job + * + * @returns none + * + * @remarks none + * + ******************************************************************************* + */ +void isvce_init_me(isvce_process_ctxt_t *ps_proc); + +/** + ******************************************************************************* + * + * @brief This function performs motion estimation for the current NMB + * + * @par Description: + * Intializes input and output pointers required by the function + *isvce_compute_me and calls the function isvce_compute_me in a loop to + *process NMBs. + * + * @param[in] ps_proc + * Process context corresponding to the job + * + * @returns + * + * @remarks none + * + ******************************************************************************* + */ +void isvce_compute_me_nmb(isvce_process_ctxt_t *ps_proc, UWORD32 u4_nmb_count); + +/** + ******************************************************************************* + * + * @brief This function performs MV prediction + * + * @par Description: + * + * @param[in] ps_proc + * Process context corresponding to the job + * + * @returns none + * + * @remarks none + * This function will update the MB availability since intra inter decision + * should be done before the call + * + ******************************************************************************* + */ +void isvce_mv_pred(isvce_process_ctxt_t *ps_proc, WORD32 i4_reflist); + +/** + ******************************************************************************* + * + * @brief This function approximates Pred. MV + * + * @par Description: + * + * @param[in] ps_proc + * Process context corresponding to the job + * + * @returns none + * + * @remarks none + * Motion estimation happens at nmb level. For cost calculations, mv is appro + * ximated using this function + * + ******************************************************************************* + */ +void isvce_mv_pred_me(isvce_process_ctxt_t *ps_proc, WORD32 i4_ref_list); + +#endif diff --git a/encoder/svc/isvce_mode_stat_visualiser.c b/encoder/svc/isvce_mode_stat_visualiser.c new file mode 100644 index 0000000..ed4a1f2 --- /dev/null +++ b/encoder/svc/isvce_mode_stat_visualiser.c @@ -0,0 +1,191 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** +******************************************************************************* +* @file +* isvce_mode_stat_visualiser.c +* +* @brief +* Contains functions used for synthesising analysis YUV +* +******************************************************************************* +*/ +#include "isvce_defs.h" + +#if ENABLE_MODE_STAT_VISUALISER +#include "ih264_typedefs.h" +#include "isvc_macros.h" +#include "ih264_debug.h" +#include "isvc_defs.h" +#include "isvc_structs.h" +#include "isvce_structs.h" +#include "isvce_structs.h" +#include "ih264e_fmt_conv.h" +#include "isvce_mode_stat_visualiser.h" + +#define MAX_NUM_MB_MODE_VISUALISATIONS 1 + +static const UWORD8 gau1_output_file_path[] = "out.yuv"; + +static const double gd_alpha = 0.5; + +static const UWORD8 gau1_colors[MAX_NUM_MB_MODE_VISUALISATIONS][NUM_COMPONENTS] = { + /* Red */ + {81, 90, 240}, +}; + +UWORD32 isvce_get_msv_ctxt_size(UWORD32 u4_wd, UWORD32 u4_ht) +{ + UWORD32 u4_size = sizeof(mode_stat_visualiser_t); + WORD32 i4_num_luma_samples = u4_wd * u4_ht; + WORD32 i4_num_chroma_samples = i4_num_luma_samples / 4; + + u4_size += (i4_num_luma_samples + i4_num_chroma_samples * 2) * sizeof(UWORD8); + + return u4_size; +} + +void isvce_msv_ctxt_init(isvce_codec_t *ps_codec, iv_mem_rec_t *ps_mem_rec) +{ + mode_stat_visualiser_t *ps_mode_stat_visualiser; + yuv_buf_props_t *ps_frame_buf; + + WORD32 i; + + UWORD32 u4_wd = ps_codec->s_cfg.u4_wd; + UWORD32 u4_ht = ps_codec->s_cfg.u4_ht; + WORD32 i4_num_luma_samples = u4_wd * u4_ht; + WORD32 i4_num_chroma_samples = i4_num_luma_samples / 4; + UWORD8 *pu1_buf = ps_mem_rec->pv_base; + WORD64 i8_alloc_mem_size = isvce_get_msv_ctxt_size(u4_wd, u4_ht); + + ps_mode_stat_visualiser = ps_codec->ps_mode_stat_visualiser = + (mode_stat_visualiser_t *) pu1_buf; + pu1_buf += sizeof(ps_mode_stat_visualiser[0]); + i8_alloc_mem_size -= sizeof(ps_mode_stat_visualiser[0]); + + ps_frame_buf = &ps_mode_stat_visualiser->s_frame_buf; + + ps_mode_stat_visualiser->ps_output_file = fopen((const char *) gau1_output_file_path, "w"); + + ps_frame_buf->e_color_format = IV_YUV_420P; + ps_frame_buf->u1_bit_depth = 8; + ps_frame_buf->u4_width = u4_wd; + ps_frame_buf->u4_height = u4_ht; + + for(i = 0; i < NUM_COMPONENTS; i++) + { + UWORD8 u1_is_chroma = (((COMPONENT_TYPE) i) != Y); + UWORD32 u4_buf_size = u1_is_chroma ? i4_num_chroma_samples : i4_num_luma_samples; + UWORD32 u4_stride = u4_wd >> u1_is_chroma; + + ps_frame_buf->as_component_bufs[i].pv_data = pu1_buf; + ps_frame_buf->as_component_bufs[i].i4_data_stride = u4_stride; + + pu1_buf += u4_buf_size; + i8_alloc_mem_size -= u4_buf_size; + } + + ASSERT(i8_alloc_mem_size >= 0); +} + +void isvce_msv_ctxt_delete(mode_stat_visualiser_t *ps_mode_stat_visualiser) +{ + fclose(ps_mode_stat_visualiser->ps_output_file); +} + +void isvce_msv_get_input_frame(mode_stat_visualiser_t *ps_mode_stat_visualiser, + isvce_inp_buf_t *ps_inp_buf) +{ + svc_params_t *ps_svc_params = &ps_inp_buf->s_svc_params; + yuv_buf_props_t *ps_target_layer_yuv_buf = + &ps_inp_buf->as_layer_yuv_buf_props[ps_svc_params->u1_num_spatial_layers - 1]; + yuv_buf_props_t *ps_frame_buf = &ps_mode_stat_visualiser->s_frame_buf; + + ASSERT(ps_target_layer_yuv_buf->u4_width == ps_frame_buf->u4_width); + ASSERT(ps_target_layer_yuv_buf->u4_height == ps_frame_buf->u4_height); + ASSERT(ps_target_layer_yuv_buf->u1_bit_depth == ps_frame_buf->u1_bit_depth); + ASSERT(ps_target_layer_yuv_buf->e_color_format == IV_YUV_420SP_UV); + ASSERT(ps_frame_buf->u1_bit_depth == IV_YUV_420P); + ASSERT(ps_target_layer_yuv_buf->as_component_bufs[U].i4_data_stride == + ps_target_layer_yuv_buf->as_component_bufs[V].i4_data_stride); + + isvce_fmt_conv_420sp_to_420p( + ps_target_layer_yuv_buf->as_component_bufs[Y].pv_data, + ps_target_layer_yuv_buf->as_component_bufs[UV].pv_data, + ps_frame_buf->as_component_bufs[Y].pv_data, ps_frame_buf->as_component_bufs[U].pv_data, + ps_frame_buf->as_component_bufs[V].pv_data, ps_frame_buf->u4_width, ps_frame_buf->u4_height, + ps_target_layer_yuv_buf->as_component_bufs[Y].i4_data_stride, + ps_target_layer_yuv_buf->as_component_bufs[UV].i4_data_stride, + ps_frame_buf->as_component_bufs[Y].i4_data_stride, + ps_frame_buf->as_component_bufs[U].i4_data_stride, 1, 0); +} + +void isvce_msv_set_mode(mode_stat_visualiser_t *ps_mode_stat_visualiser, + isvce_mb_info_t *ps_mb_info, coordinates_t *ps_mb_pos) +{ + UWORD32 i, j, k; + + for(i = 0; i < NUM_COMPONENTS; i++) + { + UWORD8 u1_is_chroma = (((COMPONENT_TYPE) i) != Y); + UWORD32 u4_wd = MB_SIZE >> u1_is_chroma; + UWORD32 u4_ht = MB_SIZE >> u1_is_chroma; + UWORD8 *pu1_buf = ps_mode_stat_visualiser->s_frame_buf.as_component_bufs[i].pv_data; + WORD32 i4_stride = ps_mode_stat_visualiser->s_frame_buf.as_component_bufs[i].i4_data_stride; + + pu1_buf += ps_mb_pos->i4_abscissa * u4_wd + ps_mb_pos->i4_ordinate * u4_ht * i4_stride; + + for(j = 0; j < u4_ht; j++) + { + for(k = 0; k < u4_wd; k++) + { + if(ps_mb_info->u1_residual_prediction_flag) + { + pu1_buf[k + j * i4_stride] = + (UWORD8) (gd_alpha * gau1_colors[0][i] + + (1. - gd_alpha) * pu1_buf[k + j * i4_stride] + 0.5); + } + } + } + } +} + +void isvce_msv_dump_visualisation(mode_stat_visualiser_t *ps_mode_stat_visualiser) +{ + WORD32 i; + + FILE *ps_output_file = ps_mode_stat_visualiser->ps_output_file; + yuv_buf_props_t *ps_frame_buf = &ps_mode_stat_visualiser->s_frame_buf; + + for(i = 0; i < NUM_COMPONENTS; i++) + { + UWORD8 u1_is_chroma = (((COMPONENT_TYPE) i) != Y); + UWORD32 u4_wd = ps_frame_buf->u4_width >> u1_is_chroma; + UWORD32 u4_ht = ps_frame_buf->u4_height >> u1_is_chroma; + UWORD32 u4_size = u4_wd * u4_ht; + + ASSERT(u4_wd == ps_frame_buf->as_component_bufs[i].i4_data_stride); + + fwrite(ps_frame_buf->as_component_bufs[i].pv_data, sizeof(UWORD8), u4_size, ps_output_file); + } +} +#endif diff --git a/encoder/svc/isvce_mode_stat_visualiser.h b/encoder/svc/isvce_mode_stat_visualiser.h new file mode 100644 index 0000000..3a01589 --- /dev/null +++ b/encoder/svc/isvce_mode_stat_visualiser.h @@ -0,0 +1,72 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** +******************************************************************************* +* @file +* isvce_mode_stat_visualiser.h +* +* @brief +* Contains function declarations for function declared in +* isvce_mode_stat_visualiser.c +* +* @author +* ittiam +* +* @remarks +* None +* +******************************************************************************* +*/ + +#ifndef _ISVCE_MODE_STAT_VISUALISER_H_ +#define _ISVCE_MODE_STAT_VISUALISER_H_ +#if ENABLE_MODE_STAT_VISUALISER + +#include + +#include "ih264_typedefs.h" +#include "isvc_structs.h" +#include "isvce_structs.h" + +typedef struct mode_stat_visualiser_t +{ + FILE *ps_output_file; + + yuv_buf_props_t s_frame_buf; + +} mode_stat_visualiser_t; + +extern UWORD32 isvce_get_msv_ctxt_size(UWORD32 u4_wd, UWORD32 u4_ht); + +extern void isvce_msv_ctxt_init(isvce_codec_t *ps_codec, iv_mem_rec_t *ps_mem_rec); + +extern void isvce_msv_ctxt_delete(mode_stat_visualiser_t *ps_mode_stat_visualiser); + +extern void isvce_msv_get_input_frame(mode_stat_visualiser_t *ps_mode_stat_visualiser, + isvce_inp_buf_t *ps_inp_buf); + +extern void isvce_msv_dump_visualisation(mode_stat_visualiser_t *ps_mode_stat_visualiser); + +extern void isvce_msv_set_mode(mode_stat_visualiser_t *ps_mode_stat_visualiser, + isvce_mb_info_t *ps_mb_info, coordinates_t *ps_mb_pos); +#endif + +#endif diff --git a/encoder/svc/isvce_nalu_stat_aggregator.c b/encoder/svc/isvce_nalu_stat_aggregator.c new file mode 100644 index 0000000..eabdd63 --- /dev/null +++ b/encoder/svc/isvce_nalu_stat_aggregator.c @@ -0,0 +1,124 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** +******************************************************************************* +* @file +* isvce_nalu_stat_aggregator.c +* +* @brief +* Contains objects used for aggregating nalu statistics +* +******************************************************************************* +*/ +#include +#include + +#include "ih264_typedefs.h" +#include "iv2.h" +#include "isvce_structs.h" +#include "isvce_nalu_stat_aggregator.h" + +void isvce_nalu_info_au_init(nalu_descriptors_t *ps_nalu_descriptor, UWORD8 u1_num_spatial_layers) +{ + WORD32 i; + + for(i = 0; i < u1_num_spatial_layers; i++) + { + ps_nalu_descriptor[i].u1_num_nalus = 0; + } +} + +void isvce_nalu_info_csv_translator(nalu_descriptors_t *ps_nalu_descriptor, + isvce_nalu_info_buf_t *ps_csv_buf) +{ + char ac_csv_string[MAX_BYTES_PER_NALU_INFO]; + WORD32 i; + + WORD64 i8_num_bytes_available = ps_csv_buf->u4_buf_size - ps_csv_buf->u4_num_bytes; + + for(i = 0; i < ps_nalu_descriptor->u1_num_nalus; i++) + { + if(ps_nalu_descriptor->as_nalu_info[i].b_is_vcl_nal) + { + snprintf(ac_csv_string, MAX_BYTES_PER_NALU_INFO, "%d,%u,%d,%d,%d,%d,%d\n", + ps_nalu_descriptor->as_nalu_info[i].e_nalu_type, + (UWORD32) (ps_nalu_descriptor->as_nalu_info[i].i8_num_bits / 8), + ps_nalu_descriptor->as_nalu_info[i].u1_spatial_layer_id, + ps_nalu_descriptor->as_nalu_info[i].u1_temporal_layer_id, + ps_nalu_descriptor->as_nalu_info[i].b_is_idr, 1, 1); + } + else + { + snprintf(ac_csv_string, MAX_BYTES_PER_NALU_INFO, "%d,%u,%d,%d,%d,%d,%d\n", + ps_nalu_descriptor->as_nalu_info[i].e_nalu_type, + (UWORD32) (ps_nalu_descriptor->as_nalu_info[i].i8_num_bits / 8), -1, -1, -1, + -1, -1); + } + + snprintf((char *) (ps_csv_buf->pu1_buf + ps_csv_buf->u4_num_bytes), i8_num_bytes_available, + "%s", ac_csv_string); + + ps_csv_buf->u4_num_bytes = (UWORD32) strlen((char *) ps_csv_buf->pu1_buf); + i8_num_bytes_available = ps_csv_buf->u4_buf_size - ps_csv_buf->u4_num_bytes; + + ASSERT(i8_num_bytes_available >= 0); + } +} + +nalu_info_t *isvce_get_next_nalu_info_buf(nalu_descriptors_t *ps_nalu_descriptor) +{ + return &ps_nalu_descriptor->as_nalu_info[ps_nalu_descriptor->u1_num_nalus]; +} + +void isvce_nalu_info_buf_init(nalu_info_t *ps_nalu_info, WORD64 i8_init_bits, + NAL_UNIT_TYPE_T e_nalu_type, UWORD8 u1_spatial_layer_id, + UWORD8 u1_temporal_layer_id, UWORD8 u1_num_slices, bool b_is_idr) +{ + ps_nalu_info->e_nalu_type = e_nalu_type; + ps_nalu_info->i8_num_bits = i8_init_bits; + ps_nalu_info->b_is_idr = b_is_idr; + + switch(e_nalu_type) + { + case NAL_SLICE_NON_IDR: + case NAL_SLICE_IDR: + case NAL_CODED_SLICE_EXTENSION: + { + ps_nalu_info->b_is_vcl_nal = true; + ps_nalu_info->u1_spatial_layer_id = u1_spatial_layer_id; + ps_nalu_info->u1_temporal_layer_id = u1_temporal_layer_id; + ps_nalu_info->u1_num_slices = u1_num_slices; + + break; + } + default: + { + ps_nalu_info->b_is_vcl_nal = false; + + break; + } + } +} + +void isvce_update_nalu_count(nalu_descriptors_t *ps_nalu_descriptor) +{ + ps_nalu_descriptor->u1_num_nalus++; +} diff --git a/encoder/svc/isvce_nalu_stat_aggregator.h b/encoder/svc/isvce_nalu_stat_aggregator.h new file mode 100644 index 0000000..b235218 --- /dev/null +++ b/encoder/svc/isvce_nalu_stat_aggregator.h @@ -0,0 +1,99 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** +******************************************************************************* +* @file +* isvce_nalu_stat_aggregator.h +* +* @brief +* Contains objects used for aggregating nalu statistics +* +******************************************************************************* +*/ + +#ifndef _ISVCE_NALU_STAT_AGGREGATOR_H_ +#define _ISVCE_NALU_STAT_AGGREGATOR_H_ + +#include + +#include "ih264_typedefs.h" +#include "isvce.h" +#include "isvc_defs.h" +#include "isvce_defs.h" + +/* Macros */ +/* +1 for '\0' */ +#define MAX_BYTES_PER_NALU_INFO (45 + 1) + +/* SPS + (MAX_NUM_SPATIAL_LAYERS - 1) * SUBSET_SPS + + * MAX_NUM_SPATIAL_LAYERS * PPS + */ +/* 1 PREFIX_NALU + 1 SLICE_[NON|]IDR + (MAX_NUM_SPATIAL_LAYERS - 1) * + * CODED_SLICE_EXTENSION */ +#define MAX_NALU_PER_LAYER 10 + +/* Structs */ +typedef struct nalu_info_t +{ + NAL_UNIT_TYPE_T e_nalu_type; + + WORD64 i8_num_bits; + + bool b_is_vcl_nal; + + bool b_is_idr; + + UWORD8 u1_spatial_layer_id; + + UWORD8 u1_temporal_layer_id; + + UWORD8 u1_num_slices; +} nalu_info_t; + +typedef struct nalu_descriptors_t +{ + nalu_info_t as_nalu_info[MAX_NALU_PER_LAYER]; + + UWORD8 u1_num_nalus; + +} nalu_descriptors_t; + +/* Function declarations */ +static FORCEINLINE UWORD32 isvce_get_nalu_info_buf_size(UWORD8 u1_num_spatial_layers) +{ + return MAX_NALU_PER_LAYER * u1_num_spatial_layers * MAX_BYTES_PER_NALU_INFO; +} + +extern void isvce_nalu_info_au_init(nalu_descriptors_t *ps_nalu_descriptor, + UWORD8 u1_num_spatial_layers); + +extern void isvce_nalu_info_csv_translator(nalu_descriptors_t *ps_nalu_descriptor, + isvce_nalu_info_buf_t *ps_csv_buf); + +extern nalu_info_t *isvce_get_next_nalu_info_buf(nalu_descriptors_t *ps_nalu_descriptor); + +extern void isvce_nalu_info_buf_init(nalu_info_t *ps_nalu_info, WORD64 i8_init_bytes, + NAL_UNIT_TYPE_T e_nalu_type, UWORD8 u1_spatial_layer_id, + UWORD8 u1_temporal_layer_id, UWORD8 u1_num_slices, + bool b_is_idr); + +extern void isvce_update_nalu_count(nalu_descriptors_t *ps_nalu_descriptor); + +#endif diff --git a/encoder/svc/isvce_pred_structs.h b/encoder/svc/isvce_pred_structs.h new file mode 100644 index 0000000..9e255d9 --- /dev/null +++ b/encoder/svc/isvce_pred_structs.h @@ -0,0 +1,156 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** +******************************************************************************* +* @file +* isvce_pred_structs.h +* +* @brief +* Contains struct definition used for prediction +* +* @author +* ittiam +* +* @remarks +* None +* +******************************************************************************* +*/ + +#ifndef _ISVCE_PRED_STRUCTS_H_ +#define _ISVCE_PRED_STRUCTS_H_ + +#include "ih264_typedefs.h" +#include "isvc_defs.h" +#include "isvc_structs.h" +#include "isvce_defs.h" + +/** + * PU information + */ +typedef struct +{ + /** + * Motion Vector + */ + mv_t s_mv; + + /** + * Ref index + */ + WORD8 i1_ref_idx; + +} isvce_enc_pu_mv_t; + +/* + * Total Pu info for an MB + */ +typedef struct isvce_enc_pu_t +{ + /* Array with ME info for all lists */ + isvce_enc_pu_mv_t as_me_info[NUM_PRED_DIRS]; + + UWORD8 au1_mvp_idx[NUM_PRED_DIRS]; + + /** + * PU X position in terms of min PU (4x4) units + */ + UWORD8 u1_pos_x_in_4x4; + + /** + * PU Y position in terms of min PU (4x4) units + */ + UWORD8 u1_pos_y_in_4x4; + + /** + * PU width in pixels = (u1_wd_in_4x4_m1 + 1) << 2 + */ + UWORD8 u1_wd_in_4x4_m1; + + /** + * PU height in pixels = (u1_ht_in_4x4_m1 + 1) << 2 + */ + UWORD8 u1_ht_in_4x4_m1; + + /** + * PRED_L0, PRED_L1, PRED_BI + */ + UWORD8 u1_pred_mode; + +} isvce_enc_pu_t; + +typedef struct intra4x4_mode_data_t +{ + UWORD8 u1_predicted_mode; + + UWORD8 u1_mode; + +} intra4x4_mode_data_t; + +typedef intra4x4_mode_data_t intra8x8_mode_data_t; + +typedef struct intra16x16_mode_data_t +{ + UWORD8 u1_mode; + +} intra16x16_mode_data_t; + +typedef struct enc_intra_pu_t +{ + intra4x4_mode_data_t as_i4x4_mode_data[MAX_TU_IN_MB]; + + intra8x8_mode_data_t as_i8x8_mode_data[MIN_TU_IN_MB]; + + intra16x16_mode_data_t s_i16x16_mode_data; + + UWORD8 u1_chroma_intra_mode; + +} enc_intra_pu_t; + +typedef struct isvce_mb_info_t +{ + isvce_enc_pu_t as_pu[ENC_MAX_PU_IN_MB]; + + enc_intra_pu_t s_intra_pu; + + UWORD32 u4_cbp; + + UWORD32 u4_csbp; + + UWORD32 u4_res_csbp; + + UWORD16 u2_mb_type; + + WORD32 i4_mb_distortion; + + UWORD8 u1_base_mode_flag; + + UWORD8 u1_residual_prediction_flag; + + UWORD8 u1_tx_size; + + UWORD8 u1_mb_qp; + + UWORD8 u1_is_intra; + +} isvce_mb_info_t; + +#endif diff --git a/encoder/svc/isvce_process.c b/encoder/svc/isvce_process.c new file mode 100644 index 0000000..bc3dcb0 --- /dev/null +++ b/encoder/svc/isvce_process.c @@ -0,0 +1,2794 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** +******************************************************************************* +* @file +* isvce_process.c +* +* @brief +* Contains functions for codec thread +* +* @author +* Harish +* +* @par List of Functions: +* - isvce_generate_sps_pps() +* - isvce_init_entropy_ctxt() +* - isvce_entropy() +* - isvce_pack_header_data() +* - isvce_update_proc_ctxt() +* - isvce_init_proc_ctxt() +* - isvce_pad_recon_buffer() +* - isvce_dblk_n_mbs() +* - isvce_process() +* - isvce_set_rc_pic_params() +* - isvce_update_rc_post_enc() +* - isvce_isvce_isvce_process_ctxt_thread() +* +* @remarks +* None +* +******************************************************************************* +*/ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ih264_typedefs.h" +/* Dependencies of ih264_buf_mgr.h */ +/* Dependencies of ih264_list.h */ +#include "ih264_error.h" +/* Dependencies of ih264_common_tables.h */ +#include "ih264_defs.h" +#include "ih264_structs.h" +#include "ih264_buf_mgr.h" +#include "ih264_common_tables.h" +#include "ih264_list.h" +#include "ih264_platform_macros.h" +#include "ih264_trans_data.h" +#include "ih264_size_defs.h" +/* Dependencies of ih264e_cabac_structs.h */ +#include "ih264_cabac_tables.h" +/* Dependencies of ime_structs.h */ +#include "ime_defs.h" +#include "ime_distortion_metrics.h" +/* Dependencies of ih264e_structs.h */ +#include "iv2.h" +#include "ive2.h" +#include "ih264_defs.h" +#include "ih264_deblk_edge_filters.h" +#include "ih264_inter_pred_filters.h" +#include "ih264_structs.h" +#include "ih264_trans_quant_itrans_iquant.h" +/* Dependencies of ih264e_bitstream.h */ +#include "ih264e_error.h" +#include "ih264e_bitstream.h" +#include "ih264e_cabac_structs.h" +#include "irc_cntrl_param.h" +#include "irc_frame_info_collector.h" +#include "ime_statistics.h" +#include "ime_structs.h" +/* Dependencies of 'ih264e_utils.h' */ +#include "ih264e_defs.h" +#include "ih264e_structs.h" +#include "ih264e_utils.h" +#include "ime.h" +#include "isvce_cabac.h" +#include "isvce_cavlc.h" +#include "isvce_deblk.h" +#include "isvce_defs.h" +#include "isvce_downscaler.h" +#include "isvce_encode_header.h" +#include "isvce_ibl_eval.h" +#include "isvce_ilp_mv.h" +#include "isvce_intra_modes_eval.h" +#include "isvce_me.h" +#include "isvce_rate_control.h" +#include "isvce_residual_pred.h" +#include "isvce_sub_pic_rc.h" +#include "isvce_utils.h" + +/*****************************************************************************/ +/* Function Definitions */ +/*****************************************************************************/ + +/** +****************************************************************************** +* +* @brief This function generates sps, pps set on request +* +* @par Description +* When the encoder is set in header generation mode, the following function +* is called. This generates sps and pps headers and returns the control back +* to caller. +* +* @param[in] ps_codec +* pointer to codec context +* +* @return success or failure error code +* +****************************************************************************** +*/ +IH264E_ERROR_T isvce_generate_sps_pps(isvce_codec_t *ps_codec, isvce_inp_buf_t *ps_inp_buf) +{ + sps_t *ps_sps; + pps_t *ps_pps; + subset_sps_t *ps_subset_sps; + + WORD32 i; + + isvce_process_ctxt_t *ps_proc = ps_codec->as_process; + isvce_entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy; + bitstrm_t *ps_bitstrm = ps_entropy->ps_bitstrm; + isvce_out_buf_t *ps_out_buf = ps_codec->as_out_buf; + + UWORD8 u1_profile_idc = IH264_PROFILE_BASELINE; + + ASSERT(1 == MAX_CTXT_SETS); + + ih264e_bitstrm_init(ps_bitstrm, ps_out_buf->as_bits_buf[ps_proc->u1_spatial_layer_id].pv_buf, + ps_out_buf->as_bits_buf[ps_proc->u1_spatial_layer_id].u4_bufsize); + + ps_sps = ps_codec->ps_sps_base; + isvce_populate_sps(ps_codec, ps_sps, 0, u1_profile_idc, ps_inp_buf, 0); + + ps_pps = ps_codec->ps_pps_base; + isvce_populate_pps(ps_codec, ps_pps, 0, 0, 0); + + for(i = 1; i < ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers; i++) + { + ps_subset_sps = ps_codec->ps_subset_sps_base + i; + isvce_populate_subset_sps(ps_codec, ps_subset_sps, i, ps_inp_buf, i); + + ps_pps = ps_codec->ps_pps_base + i; + isvce_populate_pps(ps_codec, ps_pps, i, i, i); + } + + ps_entropy->i4_error_code = IH264E_SUCCESS; + + ps_entropy->i4_error_code = isvce_generate_sps(ps_bitstrm, ps_sps, NAL_SPS); + if(ps_entropy->i4_error_code != IH264E_SUCCESS) + { + return ps_entropy->i4_error_code; + } + + ps_pps = ps_codec->ps_pps_base; + ps_entropy->i4_error_code = isvce_generate_pps(ps_bitstrm, ps_pps, ps_sps); + + for(i = 1; i < ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers; i++) + { + ps_subset_sps = ps_codec->ps_subset_sps_base + i; + isvce_generate_subset_sps(ps_bitstrm, ps_subset_sps); + + /* populate pps header */ + ps_pps = ps_codec->ps_pps_base + i; + isvce_generate_pps(ps_bitstrm, ps_pps, &ps_subset_sps->s_sps); + } + + /* queue output buffer */ + ps_out_buf->as_bits_buf[ps_proc->u1_spatial_layer_id].u4_bytes = ps_bitstrm->u4_strm_buf_offset; + + return ps_entropy->i4_error_code; +} + +/** +******************************************************************************* +* +* @brief initialize entropy context. +* +* @par Description: +* Before invoking the call to perform to entropy coding the entropy context +* associated with the job needs to be initialized. This involves the start +* mb address, end mb address, slice index and the pointer to location at +* which the mb residue info and mb header info are packed. +* +* @param[in] ps_proc +* Pointer to the current process context +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +IH264E_ERROR_T isvce_init_entropy_ctxt(isvce_process_ctxt_t *ps_proc) +{ + /* codec context */ + isvce_codec_t *ps_codec = ps_proc->ps_codec; + + /* entropy ctxt */ + isvce_entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy; + + /* start address */ + ps_entropy->i4_mb_start_add = ps_entropy->i4_mb_y * ps_entropy->i4_wd_mbs + ps_entropy->i4_mb_x; + + /* end address */ + ps_entropy->i4_mb_end_add = ps_entropy->i4_mb_start_add + ps_entropy->i4_mb_cnt; + + /* slice index */ + ps_entropy->i4_cur_slice_idx = ps_proc->pu1_slice_idx[ps_entropy->i4_mb_start_add]; + + /* sof */ + /* @ start of frame or start of a new slice, set sof flag */ + if(ps_entropy->i4_mb_start_add == 0) + { + ps_entropy->i4_sof = 1; + } + + if(ps_entropy->i4_mb_x == 0) + { + /* packed mb coeff data */ + ps_entropy->pv_mb_coeff_data = ((UWORD8 *) ps_entropy->pv_pic_mb_coeff_data) + + ps_entropy->i4_mb_y * ps_codec->u4_size_coeff_data; + + /* packed mb header data */ + ps_entropy->pv_mb_header_data = ((UWORD8 *) ps_entropy->pv_pic_mb_header_data) + + ps_entropy->i4_mb_y * ps_codec->u4_size_header_data; + } + + return IH264E_SUCCESS; +} + +/** +******************************************************************************* +* +* @brief +* Function to update rc context after encoding +* +* @par Description +* This function updates the rate control context after the frame is encoded. +* Number of bits consumed by the current frame, frame distortion, frame cost, +* number of intra/inter mb's, ... are passed on to rate control context for +* updating the rc model. +* +* @param[in] ps_codec +* Handle to codec context +* +* @param[in] ctxt_sel +* frame context selector +* +* @param[in] pic_cnt +* pic count +* +* @returns i4_stuffing_byte +* number of stuffing bytes (if necessary) +* +* @remarks +* +******************************************************************************* +*/ +WORD32 isvce_update_rc_post_enc(isvce_codec_t *ps_codec, WORD32 ctxt_sel, WORD32 i4_is_first_frm) +{ + WORD32 i4_proc_ctxt_sel_base = ctxt_sel ? (MAX_PROCESS_CTXT / 2) : 0; + + isvce_process_ctxt_t *ps_proc = &ps_codec->as_process[i4_proc_ctxt_sel_base]; + +#if ENABLE_RE_ENC_AS_SKIP + isvce_entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy; + + UWORD8 u1_is_post_enc_skip = 0; +#endif + + /* frame qp */ + UWORD8 u1_frame_qp = ps_codec->au4_frame_qp[ps_proc->u1_spatial_layer_id]; + + /* cbr rc return status */ + WORD32 i4_stuffing_byte = 0; + + /* current frame stats */ + frame_info_t s_frame_info; + picture_type_e rc_pic_type; + + /* temp var */ + WORD32 i, j; + + /********************************************************************/ + /* BEGIN INIT */ + /********************************************************************/ + + /* init frame info */ + irc_init_frame_info(&s_frame_info); + + /* get frame info */ + for(i = 0; i < (WORD32) ps_codec->s_cfg.u4_num_cores; i++) + { + /*****************************************************************/ + /* One frame can be encoded by max of u4_num_cores threads */ + /* Accumulating the num mbs, sad, qp and intra_mb_cost from */ + /* u4_num_cores threads */ + /*****************************************************************/ + for(j = 0; j < MAX_MB_TYPE; j++) + { + s_frame_info.num_mbs[j] += ps_proc[i].s_frame_info.num_mbs[j]; + + s_frame_info.tot_mb_sad[j] += ps_proc[i].s_frame_info.tot_mb_sad[j]; + + s_frame_info.qp_sum[j] += ps_proc[i].s_frame_info.qp_sum[j]; + } + + s_frame_info.intra_mb_cost_sum += ps_proc[i].s_frame_info.intra_mb_cost_sum; + + s_frame_info.activity_sum += ps_proc[i].s_frame_info.activity_sum; + + /*****************************************************************/ + /* gather number of residue and header bits consumed by the frame*/ + /*****************************************************************/ + isvce_update_rc_bits_info(&s_frame_info, &ps_proc[i].s_entropy); + } + + /* get pic type */ + switch(ps_codec->pic_type) + { + case PIC_I: + case PIC_IDR: + rc_pic_type = I_PIC; + break; + case PIC_P: + rc_pic_type = P_PIC; + break; + case PIC_B: + rc_pic_type = B_PIC; + break; + default: + assert(0); + break; + } + + /* update rc lib with current frame stats */ + i4_stuffing_byte = isvce_rc_post_enc( + ps_codec->s_rate_control.apps_rate_control_api[ps_proc->u1_spatial_layer_id], + &(s_frame_info), ps_codec->s_rate_control.pps_pd_frm_rate, + ps_codec->s_rate_control.pps_time_stamp, ps_codec->s_rate_control.pps_frame_time, + (ps_proc->i4_wd_mbs * ps_proc->i4_ht_mbs), &rc_pic_type, i4_is_first_frm, + &ps_codec->s_rate_control.post_encode_skip[ctxt_sel], u1_frame_qp, + &ps_codec->s_rate_control.ai4_num_intra_in_prev_frame[ps_proc->u1_spatial_layer_id], + &ps_codec->s_rate_control.ai4_avg_activity[ps_proc->u1_spatial_layer_id] +#if ENABLE_RE_ENC_AS_SKIP + , + &u1_is_post_enc_skip +#endif + ); + +#if ENABLE_RE_ENC_AS_SKIP + if(u1_is_post_enc_skip) + { + buffer_container_t s_dst; + + WORD32 i; + + isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns; + mem_fxns_t *ps_mem_fxns = &ps_isa_dependent_fxns->s_mem_fxns; + svc_ilp_data_t *ps_svc_ilp_data = &ps_codec->s_svc_ilp_data; + + UWORD8 u1_spatial_layer_id = ps_proc->u1_spatial_layer_id; + UWORD8 u1_num_spatial_layers = ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers; + + UWORD32 u4_wd = ps_codec->s_cfg.u4_wd; + UWORD32 u4_ht = ps_codec->s_cfg.u4_ht; + DOUBLE d_spatial_res_ratio = ps_codec->s_cfg.s_svc_params.d_spatial_res_ratio; + + WORD32 i4_layer_luma_wd = + (WORD32) (((DOUBLE) u4_wd / + pow(d_spatial_res_ratio, u1_num_spatial_layers - u1_spatial_layer_id - 1)) + + 0.99); + WORD32 i4_layer_luma_ht = + (WORD32) (((DOUBLE) u4_ht / + pow(d_spatial_res_ratio, u1_num_spatial_layers - u1_spatial_layer_id - 1)) + + 0.99); + + if(CABAC == ps_entropy->u1_entropy_coding_mode_flag) + { + isvce_reencode_as_skip_frame_cabac(ps_entropy); + } + else + { + isvce_reencode_as_skip_frame_cavlc(ps_entropy); + } + + if(u1_num_spatial_layers > 1) + { + for(i = 0; i < ps_proc->i4_ht_mbs; i++) + { + for(j = 0; j < ps_proc->i4_wd_mbs; j++) + { + isvce_update_ibl_info(ps_proc->ps_intra_pred_ctxt, u1_num_spatial_layers, + u1_spatial_layer_id, PSKIP, j, i, 0); + } + } + + if(ENABLE_ILP_MV) + { + svc_layer_data_t *ps_layer_data; + coordinates_t s_ref_frame_dims; + + WORD32 i4_num_mbs; + + svc_au_data_t *ps_svc_au_data = ps_svc_ilp_data->ps_svc_au_data; + + s_ref_frame_dims.i4_abscissa = + ps_svc_ilp_data->ps_residual_bufs[ps_entropy->u1_spatial_layer_id].u4_width; + s_ref_frame_dims.i4_ordinate = + ps_svc_ilp_data->ps_residual_bufs[ps_entropy->u1_spatial_layer_id].u4_height; + i4_num_mbs = (s_ref_frame_dims.i4_abscissa * s_ref_frame_dims.i4_ordinate) / + (MB_SIZE * MB_SIZE); + ps_layer_data = &ps_svc_au_data->ps_svc_layer_data[ps_entropy->u1_spatial_layer_id]; + + memset(ps_layer_data->ps_mb_info, 0, + i4_num_mbs * sizeof(ps_layer_data->ps_mb_info[0])); + + for(i = 0; i < i4_num_mbs; i++) + { + ps_layer_data->pu4_num_pus_in_mb[i] = 1; + } + } + } + + for(i = 0; i < NUM_SP_COMPONENTS; i++) + { + UWORD8 u1_is_chroma = (Y != ((COMPONENT_TYPE) i)); + WORD32 i4_src_strd = ps_proc->aps_ref_pic[0] + ->ps_layer_yuv_buf_props[u1_spatial_layer_id] + .as_component_bufs[i] + .i4_data_stride; + WORD32 i4_dst_strd = ps_proc->ps_cur_pic->ps_layer_yuv_buf_props[u1_spatial_layer_id] + .as_component_bufs[i] + .i4_data_stride; + + if(u1_spatial_layer_id < (u1_num_spatial_layers - 1)) + { + s_dst.i4_data_stride = ps_svc_ilp_data->ps_intra_recon_bufs[u1_spatial_layer_id] + .as_component_bufs[i] + .i4_data_stride; + s_dst.pv_data = + ((UWORD8 *) ps_svc_ilp_data->ps_intra_recon_bufs[u1_spatial_layer_id] + .as_component_bufs[i] + .pv_data); + + ps_mem_fxns->pf_memset_2d((UWORD8 *) s_dst.pv_data, s_dst.i4_data_stride, 0, + i4_layer_luma_wd, (i4_layer_luma_ht >> u1_is_chroma)); + + if(ENABLE_RESIDUAL_PREDICTION) + { + WORD16 *pi2_res; + yuv_buf_props_t *ps_residual_buf = + &ps_codec->s_svc_ilp_data.ps_residual_bufs[u1_spatial_layer_id]; + + pi2_res = ps_residual_buf->as_component_bufs[u1_is_chroma].pv_data; + + ps_mem_fxns->pf_memset_2d( + (UWORD8 *) pi2_res, + ps_residual_buf->as_component_bufs[u1_is_chroma].i4_data_stride * + (sizeof(WORD16) / sizeof(UWORD8)), + 0, + ps_residual_buf->as_component_bufs[u1_is_chroma].i4_data_stride * + (sizeof(WORD16) / sizeof(UWORD8)), + i4_layer_luma_ht >> u1_is_chroma); + } + } + + ps_mem_fxns->pf_copy_2d( + (UWORD8 *) (ps_proc->ps_cur_pic->ps_layer_yuv_buf_props[u1_spatial_layer_id] + .as_component_bufs[i] + .pv_data) - + PAD_LEFT - (PAD_TOP * i4_dst_strd), + i4_dst_strd, + (UWORD8 *) (ps_proc->aps_ref_pic[0] + ->ps_layer_yuv_buf_props[u1_spatial_layer_id] + .as_component_bufs[i] + .pv_data) - + PAD_LEFT - (PAD_TOP * i4_src_strd), + i4_src_strd, (i4_layer_luma_wd + PAD_WD), + (i4_layer_luma_ht >> u1_is_chroma) + PAD_HT); + } + + RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel); + } + +#endif + return i4_stuffing_byte; +} + +/** +******************************************************************************* +* +* @brief entry point for entropy coding +* +* @par Description +* This function calls lower level functions to perform entropy coding for a +* group (n rows) of mb's. After encoding 1 row of mb's, the function takes +* back the control, updates the ctxt and calls lower level functions again. +* This process is repeated till all the rows or group of mb's (which ever is +* minimum) are coded +* +* @param[in] ps_proc +* process context +* +* @returns error status +* +* @remarks +* +******************************************************************************* +*/ +IH264E_ERROR_T isvce_entropy(isvce_process_ctxt_t *ps_proc) +{ + svc_nalu_ext_t *aps_svc_nalu_ext[2]; + isvce_out_buf_t s_out_buf; + sei_params_t s_sei; + nalu_info_t *ps_slice_nalu_info; + nalu_info_t *ps_non_vcl_nalu_info; + + UWORD8 *pu1_proc_map; + UWORD8 *pu1_entropy_map_curr; + WORD32 i4_wd_mbs, i4_ht_mbs; + UWORD32 u4_mb_cnt, u4_mb_idx, u4_mb_end_idx, u4_insert_per_idr; + WORD32 bitstream_start_offset, bitstream_end_offset; + + isvce_codec_t *ps_codec = ps_proc->ps_codec; + isvce_entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy; + isvce_cabac_ctxt_t *ps_cabac_ctxt = ps_entropy->ps_cabac; + sps_t *ps_sps = ps_entropy->ps_sps_base; + subset_sps_t *ps_subset_sps = ps_entropy->ps_subset_sps_base; + pps_t *ps_pps = ps_entropy->ps_pps_base; + slice_header_t *ps_slice_hdr = + ps_entropy->ps_slice_hdr_base + (ps_entropy->i4_cur_slice_idx % SVC_MAX_SLICE_HDR_CNT); + svc_slice_header_t *ps_svc_slice_hdr = NULL; + bitstrm_t *ps_bitstrm = ps_entropy->ps_bitstrm; +#if ENABLE_RE_ENC_AS_SKIP + bitstrm_t *ps_bitstrm_after_slice_hdr = ps_entropy->ps_bitstrm_after_slice_hdr; +#endif + nalu_descriptors_t *ps_nalu_descriptor = + &ps_codec->as_nalu_descriptors[ps_proc->u1_spatial_layer_id]; + + WORD32 i4_slice_type = ps_proc->i4_slice_type; + WORD32 ctxt_sel = ps_proc->i4_encode_api_call_cnt % MAX_CTXT_SETS; + + aps_svc_nalu_ext[0] = + ps_entropy->ps_svc_nalu_ext_base + (ps_entropy->i4_cur_slice_idx % SVC_MAX_SLICE_HDR_CNT); + aps_svc_nalu_ext[1] = ps_entropy->ps_svc_nalu_ext_base + 1 + + (ps_entropy->i4_cur_slice_idx % SVC_MAX_SLICE_HDR_CNT); + + /********************************************************************/ + /* BEGIN INIT */ + /********************************************************************/ + + /* entropy encode start address */ + u4_mb_idx = ps_entropy->i4_mb_start_add; + + /* entropy encode end address */ + u4_mb_end_idx = ps_entropy->i4_mb_end_add; + + /* width in mbs */ + i4_wd_mbs = ps_entropy->i4_wd_mbs; + + /* height in mbs */ + i4_ht_mbs = ps_entropy->i4_ht_mbs; + + /* total mb cnt */ + u4_mb_cnt = i4_wd_mbs * i4_ht_mbs; + + /* proc map */ + pu1_proc_map = ps_proc->pu1_proc_map + ps_entropy->i4_mb_y * i4_wd_mbs; + + /* entropy map */ + pu1_entropy_map_curr = ps_entropy->pu1_entropy_map + ps_entropy->i4_mb_y * i4_wd_mbs; + + /********************************************************************/ + /* @ start of frame / slice, */ + /* initialize the output buffer, */ + /* initialize the bit stream buffer, */ + /* check if sps and pps headers have to be generated, */ + /* populate and generate slice header */ + /********************************************************************/ + if(ps_entropy->i4_sof) + { + /********************************************************************/ + /* initialize the output buffer */ + /********************************************************************/ + s_out_buf = ps_codec->as_out_buf[ctxt_sel]; + + /* is last frame to encode */ + s_out_buf.u4_is_last = ps_entropy->u4_is_last; + + /* frame idx */ + s_out_buf.u4_timestamp_high = ps_entropy->u4_timestamp_high; + s_out_buf.u4_timestamp_low = ps_entropy->u4_timestamp_low; + + /********************************************************************/ + /* initialize the bit stream buffer */ + /********************************************************************/ + ih264e_bitstrm_init(ps_bitstrm, s_out_buf.as_bits_buf[ps_proc->u1_spatial_layer_id].pv_buf, + s_out_buf.as_bits_buf[ps_proc->u1_spatial_layer_id].u4_bufsize); + + /********************************************************************/ + /* BEGIN HEADER GENERATION */ + /********************************************************************/ + if(1 == ps_entropy->i4_gen_header) + { + WORD32 i; + + ps_non_vcl_nalu_info = isvce_get_next_nalu_info_buf(ps_nalu_descriptor); + isvce_nalu_info_buf_init(ps_non_vcl_nalu_info, + -((WORD32) isvce_get_num_bits(ps_bitstrm)), NAL_SPS, + ps_proc->u1_spatial_layer_id, + ps_proc->ps_cur_pic->i1_temporal_id, 1, !!ps_proc->u4_is_idr); + + ps_entropy->i4_error_code = isvce_generate_sps(ps_bitstrm, ps_sps, NAL_SPS); + RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel); + + ps_non_vcl_nalu_info->i8_num_bits += isvce_get_num_bits(ps_bitstrm); + isvce_update_nalu_count(ps_nalu_descriptor); + + for(i = 1; i < ps_proc->s_svc_params.u1_num_spatial_layers; i++) + { + ps_subset_sps = ps_entropy->ps_subset_sps_base + i; + + ps_non_vcl_nalu_info = isvce_get_next_nalu_info_buf(ps_nalu_descriptor); + isvce_nalu_info_buf_init( + ps_non_vcl_nalu_info, -((WORD32) isvce_get_num_bits(ps_bitstrm)), + NAL_SUBSET_SPS, ps_proc->u1_spatial_layer_id, + ps_proc->ps_cur_pic->i1_temporal_id, 1, !!ps_proc->u4_is_idr); + + ps_entropy->i4_error_code = isvce_generate_subset_sps(ps_bitstrm, ps_subset_sps); + + ps_non_vcl_nalu_info->i8_num_bits += isvce_get_num_bits(ps_bitstrm); + isvce_update_nalu_count(ps_nalu_descriptor); + } + + ps_non_vcl_nalu_info = isvce_get_next_nalu_info_buf(ps_nalu_descriptor); + isvce_nalu_info_buf_init(ps_non_vcl_nalu_info, + -((WORD32) isvce_get_num_bits(ps_bitstrm)), NAL_PPS, + ps_proc->u1_spatial_layer_id, + ps_proc->ps_cur_pic->i1_temporal_id, 1, !!ps_proc->u4_is_idr); + + ps_entropy->i4_error_code = isvce_generate_pps(ps_bitstrm, ps_pps, ps_sps); + RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel); + + ps_non_vcl_nalu_info->i8_num_bits += isvce_get_num_bits(ps_bitstrm); + isvce_update_nalu_count(ps_nalu_descriptor); + + for(i = 1; i < ps_proc->s_svc_params.u1_num_spatial_layers; i++) + { + ps_pps = ps_entropy->ps_pps_base + i; + ps_subset_sps = ps_entropy->ps_subset_sps_base + i; + + ps_non_vcl_nalu_info = isvce_get_next_nalu_info_buf(ps_nalu_descriptor); + isvce_nalu_info_buf_init( + ps_non_vcl_nalu_info, -((WORD32) isvce_get_num_bits(ps_bitstrm)), NAL_PPS, + ps_proc->u1_spatial_layer_id, ps_proc->ps_cur_pic->i1_temporal_id, 1, + !!ps_proc->u4_is_idr); + + ps_entropy->i4_error_code = + isvce_generate_pps(ps_bitstrm, ps_pps, &ps_subset_sps->s_sps); + + RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel); + + ps_non_vcl_nalu_info->i8_num_bits += isvce_get_num_bits(ps_bitstrm); + isvce_update_nalu_count(ps_nalu_descriptor); + } + + ps_entropy->i4_gen_header = 0; + } + + ps_svc_slice_hdr = ps_entropy->ps_svc_slice_hdr_base + + (ps_entropy->i4_cur_slice_idx % SVC_MAX_SLICE_HDR_CNT); + + if((ps_codec->s_cfg.s_svc_params.u1_num_temporal_layers > 1) || + (ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers > 1)) + { + isvce_populate_svc_nalu_extension(ps_proc, aps_svc_nalu_ext[0], NAL_PREFIX, + ps_proc->u4_is_idr); + + if(ps_proc->u1_spatial_layer_id > 0) + { + isvce_populate_svc_nalu_extension(ps_proc, aps_svc_nalu_ext[1], + NAL_CODED_SLICE_EXTENSION, ps_proc->u4_is_idr); + } + } + else + { + isvce_populate_svc_nalu_extension(ps_proc, aps_svc_nalu_ext[0], NAL_PREFIX, + ps_proc->u4_is_idr); + } + + if(ps_proc->u1_spatial_layer_id > 0) + { + ps_subset_sps = ps_entropy->ps_subset_sps_base + ps_proc->u1_spatial_layer_id; + ps_pps = ps_entropy->ps_pps_base + ps_proc->u1_spatial_layer_id; + + ps_entropy->i4_error_code = isvce_populate_svc_slice( + ps_proc, ps_svc_slice_hdr, ps_pps, ps_subset_sps, aps_svc_nalu_ext[1]); + + RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel); + + ps_slice_hdr = &ps_svc_slice_hdr->s_slice_header; + } + else + { + ps_pps = ps_entropy->ps_pps_base; + ps_sps = ps_entropy->ps_sps_base; + + ps_entropy->i4_error_code = isvce_populate_slice_header( + ps_proc, ps_slice_hdr, ps_pps, ps_sps, aps_svc_nalu_ext[0]->u1_idr_flag); + + RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel); + } + + /* generate sei */ + u4_insert_per_idr = (NAL_SLICE_IDR == ps_slice_hdr->i1_nal_unit_type); + + memset(&s_sei, 0, sizeof(sei_params_t)); + s_sei.u1_sei_mdcv_params_present_flag = + ps_codec->s_cfg.s_sei.u1_sei_mdcv_params_present_flag; + s_sei.s_sei_mdcv_params = ps_codec->s_cfg.s_sei.s_sei_mdcv_params; + s_sei.u1_sei_cll_params_present_flag = ps_codec->s_cfg.s_sei.u1_sei_cll_params_present_flag; + s_sei.s_sei_cll_params = ps_codec->s_cfg.s_sei.s_sei_cll_params; + s_sei.u1_sei_ave_params_present_flag = ps_codec->s_cfg.s_sei.u1_sei_ave_params_present_flag; + s_sei.s_sei_ave_params = ps_codec->s_cfg.s_sei.s_sei_ave_params; + s_sei.u1_sei_ccv_params_present_flag = 0; + s_sei.s_sei_ccv_params = + ps_codec->as_inp_list[ps_codec->i4_poc % SVC_MAX_NUM_INP_FRAMES].s_inp_props.s_sei_ccv; + + if((1 == ps_sps->i1_vui_parameters_present_flag) && + (1 == ps_codec->s_cfg.s_vui.u1_video_signal_type_present_flag) && + (1 == ps_codec->s_cfg.s_vui.u1_colour_description_present_flag) && + (2 != ps_codec->s_cfg.s_vui.u1_colour_primaries) && + (2 != ps_codec->s_cfg.s_vui.u1_matrix_coefficients) && + (2 != ps_codec->s_cfg.s_vui.u1_transfer_characteristics) && + (4 != ps_codec->s_cfg.s_vui.u1_transfer_characteristics) && + (5 != ps_codec->s_cfg.s_vui.u1_transfer_characteristics)) + { + s_sei.u1_sei_ccv_params_present_flag = + ps_codec->as_inp_list[ps_codec->i4_poc % SVC_MAX_NUM_INP_FRAMES] + .s_inp_props.u1_sei_ccv_params_present_flag; + } + + if((1 == s_sei.u1_sei_mdcv_params_present_flag && u4_insert_per_idr) || + (1 == s_sei.u1_sei_cll_params_present_flag && u4_insert_per_idr) || + (1 == s_sei.u1_sei_ave_params_present_flag && u4_insert_per_idr) || + (1 == s_sei.u1_sei_ccv_params_present_flag)) + { + ps_non_vcl_nalu_info = isvce_get_next_nalu_info_buf(ps_nalu_descriptor); + isvce_nalu_info_buf_init(ps_non_vcl_nalu_info, + -((WORD32) isvce_get_num_bits(ps_bitstrm)), NAL_SEI, + ps_proc->u1_spatial_layer_id, + ps_proc->ps_cur_pic->i1_temporal_id, 1, !!ps_proc->u4_is_idr); + + ps_entropy->i4_error_code = ih264e_generate_sei(ps_bitstrm, &s_sei, u4_insert_per_idr); + RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel); + + ps_non_vcl_nalu_info->i8_num_bits += isvce_get_num_bits(ps_bitstrm); + isvce_update_nalu_count(ps_nalu_descriptor); + } + + ps_codec->as_inp_list[ps_codec->i4_poc % SVC_MAX_NUM_INP_FRAMES] + .s_inp_props.u1_sei_ccv_params_present_flag = 0; + + if((ps_proc->u1_spatial_layer_id == 0) && + (ps_codec->s_cfg.s_svc_params.u1_num_temporal_layers > 1 || + ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers > 1)) + { + ps_non_vcl_nalu_info = isvce_get_next_nalu_info_buf(ps_nalu_descriptor); + isvce_nalu_info_buf_init(ps_non_vcl_nalu_info, + -((WORD32) isvce_get_num_bits(ps_bitstrm)), NAL_PREFIX, + ps_proc->u1_spatial_layer_id, + ps_proc->ps_cur_pic->i1_temporal_id, 1, !!ps_proc->u4_is_idr); + + ps_entropy->i4_error_code = + isvce_generate_svc_nalu_extension(ps_bitstrm, aps_svc_nalu_ext[0], NAL_PREFIX); + + ps_entropy->i4_error_code = isvce_generate_prefix_nal( + ps_bitstrm, aps_svc_nalu_ext[0], ps_slice_hdr, ps_sps->u1_max_num_ref_frames, + ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers); + RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel); + + ps_non_vcl_nalu_info->i8_num_bits += isvce_get_num_bits(ps_bitstrm); + isvce_update_nalu_count(ps_nalu_descriptor); + } + + ps_slice_nalu_info = isvce_get_next_nalu_info_buf(ps_nalu_descriptor); + isvce_nalu_info_buf_init(ps_slice_nalu_info, -((WORD32) isvce_get_num_bits(ps_bitstrm)), + ps_slice_hdr->i1_nal_unit_type, ps_proc->u1_spatial_layer_id, + ps_proc->ps_cur_pic->i1_temporal_id, 1, !!ps_proc->u4_is_idr); + + if(ps_proc->u1_spatial_layer_id > 0) + { + ps_subset_sps = ps_entropy->ps_subset_sps_base + ps_proc->u1_spatial_layer_id; + ps_pps = ps_entropy->ps_pps_base + ps_proc->u1_spatial_layer_id; + + ps_entropy->i4_error_code = isvce_generate_svc_nalu_extension( + ps_bitstrm, aps_svc_nalu_ext[1], NAL_CODED_SLICE_EXTENSION); + + ps_entropy->i4_error_code = isvce_generate_slice_header_svc( + ps_bitstrm, ps_pps, aps_svc_nalu_ext[1], ps_svc_slice_hdr, ps_subset_sps); + + RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel); + } + else + { + /* generate slice header */ + ps_entropy->i4_error_code = isvce_generate_slice_header( + ps_bitstrm, ps_slice_hdr, ps_pps, ps_sps, aps_svc_nalu_ext[0]->u1_idr_flag); + + RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel); + } + + /* once start of frame / slice is done, you can reset it */ + /* it is the responsibility of the caller to set this flag */ + ps_entropy->i4_sof = 0; + + if(CABAC == ps_entropy->u1_entropy_coding_mode_flag) + { + BITSTREAM_BYTE_ALIGN(ps_bitstrm); + BITSTREAM_FLUSH(ps_bitstrm, ps_entropy->i4_error_code); + isvce_init_cabac_ctxt(ps_entropy, ps_slice_hdr); + } + +#if ENABLE_RE_ENC_AS_SKIP + ps_bitstrm_after_slice_hdr[0] = ps_bitstrm[0]; +#endif + } + + /* begin entropy coding for the mb set */ + while(u4_mb_idx < u4_mb_end_idx) + { + mb_bits_info_t s_mb_bits = { + .i8_header_bits = -((WORD64) ps_entropy->u4_header_bits[i4_slice_type == PSLICE]), + .i8_texture_bits = -((WORD64) ps_entropy->u4_residue_bits[i4_slice_type == PSLICE])}; + + /* init ptrs/indices */ + if(ps_entropy->i4_mb_x == i4_wd_mbs) + { + ps_entropy->i4_mb_y++; + ps_entropy->i4_mb_x = 0; + + /* packed mb coeff data */ + ps_entropy->pv_mb_coeff_data = ((UWORD8 *) ps_entropy->pv_pic_mb_coeff_data) + + ps_entropy->i4_mb_y * ps_codec->u4_size_coeff_data; + + /* packed mb header data */ + ps_entropy->pv_mb_header_data = ((UWORD8 *) ps_entropy->pv_pic_mb_header_data) + + ps_entropy->i4_mb_y * ps_codec->u4_size_header_data; + + /* proc map */ + pu1_proc_map = ps_proc->pu1_proc_map + ps_entropy->i4_mb_y * i4_wd_mbs; + + /* entropy map */ + pu1_entropy_map_curr = ps_entropy->pu1_entropy_map + ps_entropy->i4_mb_y * i4_wd_mbs; + } + + DEBUG("\nmb indices x, y %d, %d", ps_entropy->i4_mb_x, ps_entropy->i4_mb_y); + ENTROPY_TRACE("mb index x %d", ps_entropy->i4_mb_x); + ENTROPY_TRACE("mb index y %d", ps_entropy->i4_mb_y); + + /* wait until the curr mb is core coded */ + /* The wait for curr mb to be core coded is essential when entropy is + * launched as a separate job + */ + while(1) + { + volatile UWORD8 *pu1_buf1; + WORD32 idx = ps_entropy->i4_mb_x; + + pu1_buf1 = pu1_proc_map + idx; + if(*pu1_buf1) break; + ithread_yield(); + } + + /* write mb layer */ + ps_entropy->i4_error_code = + ps_codec->pf_write_mb_syntax_layer[ps_entropy->u1_entropy_coding_mode_flag] + [i4_slice_type](ps_entropy); + RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel); + + /* Starting bitstream offset for header in bits */ + bitstream_start_offset = isvce_get_num_bits(ps_bitstrm); + + /* set entropy map */ + pu1_entropy_map_curr[ps_entropy->i4_mb_x] = 1; + ASSERT(ps_entropy->i4_mb_x < i4_wd_mbs); + + u4_mb_idx++; + ps_entropy->i4_mb_x++; + /* check for eof */ + if(CABAC == ps_entropy->u1_entropy_coding_mode_flag) + { + if(ps_entropy->i4_mb_x < i4_wd_mbs) + { + isvce_cabac_encode_terminate(ps_cabac_ctxt, 0); + } + } + + if(ps_entropy->i4_mb_x == i4_wd_mbs) + { + /* if slices are enabled */ + if(ps_codec->s_cfg.e_slice_mode == IVE_SLICE_MODE_BLOCKS) + { + /* current slice index */ + WORD32 i4_curr_slice_idx = ps_entropy->i4_cur_slice_idx; + + /* slice map */ + UWORD8 *pu1_slice_idx = ps_entropy->pu1_slice_idx; + + /* No need to open a slice at end of frame. The current slice can be + * closed at the time of signaling eof flag. + */ + if((u4_mb_idx != u4_mb_cnt) && (i4_curr_slice_idx != pu1_slice_idx[u4_mb_idx])) + { + if(CAVLC == ps_entropy->u1_entropy_coding_mode_flag) + { /* mb skip run */ + if((i4_slice_type != ISLICE) && *ps_entropy->pi4_mb_skip_run) + { + if(*ps_entropy->pi4_mb_skip_run) + { + PUT_BITS_UEV(ps_bitstrm, *ps_entropy->pi4_mb_skip_run, + ps_entropy->i4_error_code, "mb skip run"); + *ps_entropy->pi4_mb_skip_run = 0; + RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel); + } + } + /* put rbsp trailing bits for the previous slice */ + ps_entropy->i4_error_code = ih264e_put_rbsp_trailing_bits(ps_bitstrm); + RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel); + } + else + { + isvce_cabac_encode_terminate(ps_cabac_ctxt, 1); + } + + /* update slice header pointer */ + i4_curr_slice_idx = pu1_slice_idx[u4_mb_idx]; + ps_entropy->i4_cur_slice_idx = i4_curr_slice_idx; + ps_slice_hdr = + ps_entropy->ps_slice_hdr_base + (i4_curr_slice_idx % SVC_MAX_SLICE_HDR_CNT); + + ps_entropy->u1_spatial_layer_id = ps_proc->u1_spatial_layer_id; + + /* populate slice header */ + ps_entropy->i4_mb_start_add = u4_mb_idx; + + /* generate slice header */ + if(ps_proc->u1_spatial_layer_id > 0) + { + ps_entropy->i4_error_code = + isvce_generate_slice_header_svc(ps_bitstrm, ps_pps, aps_svc_nalu_ext[1], + ps_svc_slice_hdr, ps_subset_sps); + + RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel); + + ps_slice_hdr = &ps_svc_slice_hdr->s_slice_header; + } + else + { + ps_entropy->i4_error_code = + isvce_populate_slice_header(ps_proc, ps_slice_hdr, ps_pps, ps_sps, + aps_svc_nalu_ext[0]->u1_idr_flag); + + RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel); + + ps_entropy->i4_error_code = + isvce_generate_slice_header(ps_bitstrm, ps_slice_hdr, ps_pps, ps_sps, + aps_svc_nalu_ext[0]->u1_idr_flag); + + RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel); + } + + if(CABAC == ps_entropy->u1_entropy_coding_mode_flag) + { + BITSTREAM_BYTE_ALIGN(ps_bitstrm); + BITSTREAM_FLUSH(ps_bitstrm, ps_entropy->i4_error_code); + isvce_init_cabac_ctxt(ps_entropy, ps_slice_hdr); + } + } + else + { + if(CABAC == ps_entropy->u1_entropy_coding_mode_flag && u4_mb_idx != u4_mb_cnt) + { + isvce_cabac_encode_terminate(ps_cabac_ctxt, 0); + } + } + } + } + + /* Ending bitstream offset for header in bits */ + bitstream_end_offset = isvce_get_num_bits(ps_bitstrm); + ps_entropy->u4_header_bits[i4_slice_type == PSLICE] += + bitstream_end_offset - bitstream_start_offset; + + { + svc_sub_pic_rc_ctxt_t *ps_sub_pic_rc_ctxt = ps_proc->ps_sub_pic_rc_ctxt; + svc_sub_pic_rc_entropy_variables_t *ps_sub_pic_rc_variables = + &ps_sub_pic_rc_ctxt->s_sub_pic_rc_entropy_variables; + + s_mb_bits.i8_header_bits += ps_entropy->u4_header_bits[i4_slice_type == PSLICE]; + s_mb_bits.i8_texture_bits += ps_entropy->u4_residue_bits[i4_slice_type == PSLICE]; + + ps_sub_pic_rc_variables->s_mb_bits = s_mb_bits; + ps_sub_pic_rc_variables->u1_spatial_layer_id = ps_proc->u1_spatial_layer_id; + ps_sub_pic_rc_variables->s_mb_pos.i4_abscissa = ps_entropy->i4_mb_x - 1; + ps_sub_pic_rc_variables->s_mb_pos.i4_ordinate = ps_entropy->i4_mb_y; + + isvce_sub_pic_rc_get_entropy_data(ps_proc->ps_sub_pic_rc_ctxt); + } + } + + /* check for eof */ + if(u4_mb_idx == u4_mb_cnt) + { + /* set end of frame flag */ + ps_entropy->i4_eof = 1; + } + else + { + if(CABAC == ps_entropy->u1_entropy_coding_mode_flag && + ps_codec->s_cfg.e_slice_mode != IVE_SLICE_MODE_BLOCKS) + { + isvce_cabac_encode_terminate(ps_cabac_ctxt, 0); + } + } + + if(ps_entropy->i4_eof) + { + if(CAVLC == ps_entropy->u1_entropy_coding_mode_flag) + { + /* mb skip run */ + if((i4_slice_type != ISLICE) && *ps_entropy->pi4_mb_skip_run) + { + if(*ps_entropy->pi4_mb_skip_run) + { + PUT_BITS_UEV(ps_bitstrm, *ps_entropy->pi4_mb_skip_run, + ps_entropy->i4_error_code, "mb skip run"); + *ps_entropy->pi4_mb_skip_run = 0; + RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel); + } + } + /* put rbsp trailing bits */ + ps_entropy->i4_error_code = ih264e_put_rbsp_trailing_bits(ps_bitstrm); + RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel); + } + else + { + isvce_cabac_encode_terminate(ps_cabac_ctxt, 1); + } + + /* update current frame stats to rc library */ + /* number of bytes to stuff */ + { + WORD32 i4_stuff_bytes; + + /* update */ + i4_stuff_bytes = isvce_update_rc_post_enc(ps_codec, ctxt_sel, (ps_codec->i4_poc == 0)); + + if(ps_proc->u1_spatial_layer_id == (ps_proc->s_svc_params.u1_num_spatial_layers - 1)) + { + /* cbr rc - house keeping */ + if(ps_codec->s_rate_control.post_encode_skip[ctxt_sel]) + { + ps_entropy->ps_bitstrm->u4_strm_buf_offset = 0; + } + else if(i4_stuff_bytes > 0) + { + /* add filler nal units */ + ps_entropy->i4_error_code = + ih264e_add_filler_nal_unit(ps_bitstrm, i4_stuff_bytes); + RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel); + } + } + } + + /* + *Frame number is to be incremented only if the current frame is a + * reference frame. After each successful frame encode, we increment + * frame number by 1 + */ + if(!ps_codec->s_rate_control.post_encode_skip[ctxt_sel] && ps_codec->u4_is_curr_frm_ref && + (ps_proc->u1_spatial_layer_id == ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers - 1)) + { + ps_codec->i4_frame_num++; + } + + /********************************************************************/ + /* signal the output */ + /********************************************************************/ + ps_codec->as_out_buf[ctxt_sel].as_bits_buf[ps_entropy->u1_spatial_layer_id].u4_bytes = + ps_bitstrm->u4_strm_buf_offset; + + ps_slice_nalu_info = isvce_get_next_nalu_info_buf(ps_nalu_descriptor); + ps_slice_nalu_info->i8_num_bits += isvce_get_num_bits(ps_bitstrm); + isvce_update_nalu_count(ps_nalu_descriptor); + + DEBUG("entropy status %x", ps_entropy->i4_error_code); + ps_entropy->i4_eof = 0; + } + + /* Dont execute any further instructions until store synchronization took + * place */ + DATA_SYNC(); + + /* allow threads to dequeue entropy jobs */ + ps_codec->au4_entropy_thread_active[ctxt_sel] = 0; + + return ps_entropy->i4_error_code; +} + +/** +******************************************************************************* +* +* @brief Packs header information of a mb in to a buffer +* +* @par Description: +* After the deciding the mode info of a macroblock, the syntax elements +* associated with the mb are packed and stored. The entropy thread unpacks +* this buffer and generates the end bit stream. +* +* @param[in] ps_proc +* Pointer to the current process context +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +IH264E_ERROR_T isvce_pack_header_data(isvce_process_ctxt_t *ps_proc) +{ + /* curr mb type */ + UWORD32 u4_mb_type = ps_proc->ps_mb_info->u2_mb_type; + + /* pack mb syntax layer of curr mb (used for entropy coding) */ + if(u4_mb_type == I4x4) + { + /* pointer to mb header storage space */ + UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data; + isvce_mb_hdr_i4x4_t *ps_mb_hdr = (isvce_mb_hdr_i4x4_t *) ps_proc->pv_mb_header_data; + + /* temp var */ + WORD32 i4, byte; + + /* mb type plus mode */ + ps_mb_hdr->common.u1_mb_type_mode = (ps_proc->u1_c_i8_mode << 6) + u4_mb_type; + + /* cbp */ + ps_mb_hdr->common.u1_cbp = ps_proc->u4_cbp; + + /* mb qp delta */ + ps_mb_hdr->common.u1_mb_qp = ps_proc->u1_mb_qp; + + ps_mb_hdr->common.u1_residual_prediction_flag = + ps_proc->ps_mb_info->u1_residual_prediction_flag; + + ps_mb_hdr->common.u1_base_mode_flag = ps_proc->ps_mb_info->u1_base_mode_flag; + + /* sub mb modes */ + for(i4 = 0; i4 < 16; i4++) + { + byte = 0; + + if(ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4] == + ps_proc->au1_intra_luma_mb_4x4_modes[i4]) + { + byte |= 1; + } + else + { + if(ps_proc->au1_intra_luma_mb_4x4_modes[i4] < + ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4]) + { + byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] << 1); + } + else + { + byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] - 1) << 1; + } + } + + i4++; + + if(ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4] == + ps_proc->au1_intra_luma_mb_4x4_modes[i4]) + { + byte |= 16; + } + else + { + if(ps_proc->au1_intra_luma_mb_4x4_modes[i4] < + ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4]) + { + byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] << 5); + } + else + { + byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] - 1) << 5; + } + } + + ps_mb_hdr->au1_sub_blk_modes[i4 >> 1] = byte; + } + + /* end of mb layer */ + pu1_ptr += sizeof(isvce_mb_hdr_i4x4_t); + ps_proc->pv_mb_header_data = pu1_ptr; + } + else if(u4_mb_type == I16x16) + { + /* pointer to mb header storage space */ + UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data; + isvce_mb_hdr_i16x16_t *ps_mb_hdr = (isvce_mb_hdr_i16x16_t *) ps_proc->pv_mb_header_data; + + /* mb type plus mode */ + ps_mb_hdr->common.u1_mb_type_mode = + (ps_proc->u1_c_i8_mode << 6) + (ps_proc->u1_l_i16_mode << 4) + u4_mb_type; + + /* cbp */ + ps_mb_hdr->common.u1_cbp = ps_proc->u4_cbp; + + /* mb qp delta */ + ps_mb_hdr->common.u1_mb_qp = ps_proc->u1_mb_qp; + + ps_mb_hdr->common.u1_residual_prediction_flag = + ps_proc->ps_mb_info->u1_residual_prediction_flag; + + ps_mb_hdr->common.u1_base_mode_flag = ps_proc->ps_mb_info->u1_base_mode_flag; + + /* end of mb layer */ + pu1_ptr += sizeof(isvce_mb_hdr_i16x16_t); + ps_proc->pv_mb_header_data = pu1_ptr; + } + else if(u4_mb_type == P16x16) + { + /* pointer to mb header storage space */ + UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data; + isvce_mb_hdr_p16x16_t *ps_mb_hdr = (isvce_mb_hdr_p16x16_t *) ps_proc->pv_mb_header_data; + + /* mb type */ + ps_mb_hdr->common.u1_mb_type_mode = u4_mb_type; + + /* cbp */ + ps_mb_hdr->common.u1_cbp = ps_proc->u4_cbp; + + /* mb qp delta */ + ps_mb_hdr->common.u1_mb_qp = ps_proc->u1_mb_qp; + + ps_mb_hdr->common.u1_residual_prediction_flag = + ps_proc->ps_mb_info->u1_residual_prediction_flag; + + ps_mb_hdr->common.u1_base_mode_flag = ps_proc->ps_mb_info->u1_base_mode_flag; + + ps_mb_hdr->u1_mvp_idx = ps_proc->ps_mb_info->as_pu->au1_mvp_idx[L0]; + + if(0 == ps_proc->ps_mb_info->as_pu->au1_mvp_idx[L0]) + { + ps_mb_hdr->ai2_mvd[0] = ps_proc->ps_mb_info->as_pu->as_me_info[L0].s_mv.i2_mvx - + ps_proc->ps_pred_mv[L0].s_mv.i2_mvx; + ps_mb_hdr->ai2_mvd[1] = ps_proc->ps_mb_info->as_pu->as_me_info[L0].s_mv.i2_mvy - + ps_proc->ps_pred_mv[L0].s_mv.i2_mvy; + } + else + { + ps_mb_hdr->ai2_mvd[0] = ps_proc->ps_mb_info->as_pu->as_me_info[L0].s_mv.i2_mvx - + ps_proc->ps_ilp_mv->as_mv[0][L0].s_mv.i2_mvx; + ps_mb_hdr->ai2_mvd[1] = ps_proc->ps_mb_info->as_pu->as_me_info[L0].s_mv.i2_mvy - + ps_proc->ps_ilp_mv->as_mv[0][L0].s_mv.i2_mvy; + } + + /* end of mb layer */ + pu1_ptr += sizeof(isvce_mb_hdr_p16x16_t); + ps_proc->pv_mb_header_data = pu1_ptr; + } + else if(u4_mb_type == PSKIP) + { + /* pointer to mb header storage space */ + UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data; + isvce_mb_hdr_pskip_t *ps_mb_hdr = (isvce_mb_hdr_pskip_t *) ps_proc->pv_mb_header_data; + + /* mb type */ + ps_mb_hdr->common.u1_mb_type_mode = u4_mb_type; + + ps_mb_hdr->common.u1_residual_prediction_flag = + ps_proc->ps_mb_info->u1_residual_prediction_flag; + + ps_mb_hdr->common.u1_base_mode_flag = ps_proc->ps_mb_info->u1_base_mode_flag; + + /* end of mb layer */ + pu1_ptr += sizeof(isvce_mb_hdr_pskip_t); + ps_proc->pv_mb_header_data = pu1_ptr; + } + else if(u4_mb_type == B16x16) + { + WORD32 i; + + /* pointer to mb header storage space */ + UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data; + isvce_mb_hdr_b16x16_t *ps_mb_hdr = (isvce_mb_hdr_b16x16_t *) ps_proc->pv_mb_header_data; + + UWORD32 u4_pred_mode = ps_proc->ps_mb_info->as_pu->u1_pred_mode; + + /* mb type plus mode */ + ps_mb_hdr->common.u1_mb_type_mode = (u4_pred_mode << 4) + u4_mb_type; + + /* cbp */ + ps_mb_hdr->common.u1_cbp = ps_proc->u4_cbp; + + /* mb qp delta */ + ps_mb_hdr->common.u1_mb_qp = ps_proc->u1_mb_qp; + + ps_mb_hdr->common.u1_residual_prediction_flag = + ps_proc->ps_mb_info->u1_residual_prediction_flag; + + ps_mb_hdr->common.u1_base_mode_flag = ps_proc->ps_mb_info->u1_base_mode_flag; + + for(i = 0; i < NUM_PRED_DIRS; i++) + { + PRED_MODE_T e_pred_mode = (PRED_MODE_T) i; + PRED_MODE_T e_cmpl_pred_mode = (e_pred_mode == L0) ? L1 : L0; + + if(u4_pred_mode != e_pred_mode) + { + ps_mb_hdr->au1_mvp_idx[e_cmpl_pred_mode] = + ps_proc->ps_mb_info->as_pu->au1_mvp_idx[e_cmpl_pred_mode]; + + if(0 == ps_proc->ps_mb_info->as_pu->au1_mvp_idx[e_cmpl_pred_mode]) + { + ps_mb_hdr->ai2_mvd[e_cmpl_pred_mode][0] = + ps_proc->ps_mb_info->as_pu->as_me_info[e_cmpl_pred_mode].s_mv.i2_mvx - + ps_proc->ps_pred_mv[e_cmpl_pred_mode].s_mv.i2_mvx; + ps_mb_hdr->ai2_mvd[e_cmpl_pred_mode][1] = + ps_proc->ps_mb_info->as_pu->as_me_info[e_cmpl_pred_mode].s_mv.i2_mvy - + ps_proc->ps_pred_mv[e_cmpl_pred_mode].s_mv.i2_mvy; + } + else + { + ps_mb_hdr->ai2_mvd[e_cmpl_pred_mode][0] = + ps_proc->ps_mb_info->as_pu->as_me_info[e_cmpl_pred_mode].s_mv.i2_mvx - + ps_proc->ps_ilp_mv->as_mv[0][e_cmpl_pred_mode].s_mv.i2_mvx; + ps_mb_hdr->ai2_mvd[e_cmpl_pred_mode][1] = + ps_proc->ps_mb_info->as_pu->as_me_info[e_cmpl_pred_mode].s_mv.i2_mvy - + ps_proc->ps_ilp_mv->as_mv[0][e_cmpl_pred_mode].s_mv.i2_mvy; + } + } + } + + /* end of mb layer */ + pu1_ptr += sizeof(isvce_mb_hdr_b16x16_t); + ps_proc->pv_mb_header_data = pu1_ptr; + } + else if(u4_mb_type == BDIRECT) + { + /* pointer to mb header storage space */ + UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data; + isvce_mb_hdr_bdirect_t *ps_mb_hdr = (isvce_mb_hdr_bdirect_t *) ps_proc->pv_mb_header_data; + + /* mb type plus mode */ + ps_mb_hdr->common.u1_mb_type_mode = u4_mb_type; + + /* cbp */ + ps_mb_hdr->common.u1_cbp = ps_proc->u4_cbp; + + /* mb qp delta */ + ps_mb_hdr->common.u1_mb_qp = ps_proc->u1_mb_qp; + + ps_mb_hdr->common.u1_residual_prediction_flag = + ps_proc->ps_mb_info->u1_residual_prediction_flag; + + ps_mb_hdr->common.u1_base_mode_flag = ps_proc->ps_mb_info->u1_base_mode_flag; + + /* end of mb layer */ + pu1_ptr += sizeof(isvce_mb_hdr_bdirect_t); + ps_proc->pv_mb_header_data = pu1_ptr; + } + else if(u4_mb_type == BSKIP) + { + UWORD32 u4_pred_mode = ps_proc->ps_mb_info->as_pu->u1_pred_mode; + + /* pointer to mb header storage space */ + UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data; + isvce_mb_hdr_bskip_t *ps_mb_hdr = (isvce_mb_hdr_bskip_t *) ps_proc->pv_mb_header_data; + + /* mb type plus mode */ + ps_mb_hdr->common.u1_mb_type_mode = (u4_pred_mode << 4) + u4_mb_type; + + ps_mb_hdr->common.u1_residual_prediction_flag = + ps_proc->ps_mb_info->u1_residual_prediction_flag; + + ps_mb_hdr->common.u1_base_mode_flag = ps_proc->ps_mb_info->u1_base_mode_flag; + + /* end of mb layer */ + pu1_ptr += sizeof(isvce_mb_hdr_bskip_t); + ps_proc->pv_mb_header_data = pu1_ptr; + } + else if(u4_mb_type == BASE_MODE) + { + isvce_mb_hdr_base_mode_t *ps_mb_hdr = + (isvce_mb_hdr_base_mode_t *) ps_proc->pv_mb_header_data; + + UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data; + + ASSERT(ps_proc->ps_mb_info->u1_base_mode_flag == 1); + + ps_mb_hdr->common.u1_mb_type_mode = u4_mb_type; + + ps_mb_hdr->common.u1_cbp = ps_proc->u4_cbp; + + ps_mb_hdr->common.u1_mb_qp = ps_proc->u1_mb_qp; + + ps_mb_hdr->common.u1_residual_prediction_flag = + ps_proc->ps_mb_info->u1_residual_prediction_flag; + + ps_mb_hdr->common.u1_base_mode_flag = ps_proc->ps_mb_info->u1_base_mode_flag; + + pu1_ptr += sizeof(isvce_mb_hdr_base_mode_t); + ps_proc->pv_mb_header_data = pu1_ptr; + } + + return IH264E_SUCCESS; +} + +/** +******************************************************************************* +* +* @brief update process context after encoding an mb. This involves preserving +* the current mb information for later use, initialize the proc ctxt elements to +* encode next mb. +* +* @par Description: +* This function performs house keeping tasks after encoding an mb. +* After encoding an mb, various elements of the process context needs to be +* updated to encode the next mb. For instance, the source, recon and reference +* pointers, mb indices have to be adjusted to the next mb. The slice index of +* the current mb needs to be updated. If mb qp modulation is enabled, then if +* the qp changes the quant param structure needs to be updated. Also to +*encoding the next mb, the current mb info is used as part of mode prediction or +*mv prediction. Hence the current mb info has to preserved at top/top left/left +* locations. +* +* @param[in] ps_proc +* Pointer to the current process context +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +WORD32 isvce_update_proc_ctxt(isvce_process_ctxt_t *ps_proc) +{ + /* error status */ + WORD32 error_status = IH264_SUCCESS; + + /* codec context */ + isvce_codec_t *ps_codec = ps_proc->ps_codec; + isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns; + mem_fxns_t *ps_mem_fxns = &ps_isa_dependent_fxns->s_mem_fxns; + + /* curr mb indices */ + WORD32 i4_mb_x = ps_proc->i4_mb_x; + WORD32 i4_mb_y = ps_proc->i4_mb_y; + + /* mb syntax elements of neighbors */ + isvce_mb_info_t *ps_left_syn = ps_proc->s_nbr_info.ps_left_mb_info; + isvce_mb_info_t *ps_top_syn = + ps_proc->s_nbr_info_base.ps_layer_nbr_info[ps_proc->u1_spatial_layer_id] + .ps_top_row_mb_info + + i4_mb_x + i4_mb_y * ps_proc->i4_wd_mbs; + + /* curr mb type */ + UWORD32 u4_mb_type = ps_proc->ps_mb_info->u2_mb_type; + + /* curr mb type */ + UWORD32 u4_is_intra = ps_proc->ps_mb_info->u1_is_intra; + + /* width in mbs */ + WORD32 i4_wd_mbs = ps_proc->i4_wd_mbs; + + /*height in mbs*/ + WORD32 i4_ht_mbs = ps_proc->i4_ht_mbs; + + /* proc map */ + UWORD8 *pu1_proc_map = ps_proc->pu1_proc_map + (i4_mb_y * i4_wd_mbs); + + /* deblk context */ + isvce_deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt; + + /* deblk bs context */ + isvce_bs_ctxt_t *ps_bs = &(ps_deblk->s_bs_ctxt); + + /* sub mb modes */ + UWORD8 *pu1_top_mb_intra_modes = + (ps_proc->s_nbr_info_base.ps_layer_nbr_info[ps_proc->u1_spatial_layer_id] + .ps_top_mb_intra_modes + + i4_mb_x + i4_mb_y * ps_proc->i4_wd_mbs) + ->au1_intra_modes; + + /*************************************************************/ + /* During MV prediction, when top right mb is not available, */ + /* top left mb info. is used for prediction. Hence the curr */ + /* top, which will be top left for the next mb needs to be */ + /* preserved before updating it with curr mb info. */ + /*************************************************************/ + + /*************************************************/ + /* update top and left with curr mb info results */ + /*************************************************/ + ps_left_syn[0] = ps_top_syn[0] = ps_proc->ps_mb_info[0]; + ps_left_syn->u2_mb_type = ps_top_syn->u2_mb_type = u4_mb_type; + ps_left_syn->i4_mb_distortion = ps_top_syn->i4_mb_distortion = ps_proc->i4_mb_distortion; + + if(u4_is_intra) + { + /* mb / sub mb modes */ + if(I16x16 == u4_mb_type) + { + pu1_top_mb_intra_modes[0] = + ps_proc->s_nbr_info.ps_left_mb_intra_modes->au1_intra_modes[0] = + ps_proc->u1_l_i16_mode; + } + else if(I4x4 == u4_mb_type) + { + ps_mem_fxns->pf_mem_cpy_mul8( + ps_proc->s_nbr_info.ps_left_mb_intra_modes->au1_intra_modes, + ps_proc->au1_intra_luma_mb_4x4_modes, 16); + ps_mem_fxns->pf_mem_cpy_mul8(pu1_top_mb_intra_modes, + ps_proc->au1_intra_luma_mb_4x4_modes, 16); + } + else if(I8x8 == u4_mb_type) + { + memcpy(ps_proc->s_nbr_info.ps_left_mb_intra_modes->au1_intra_modes, + ps_proc->au1_intra_luma_mb_8x8_modes, 4); + memcpy(pu1_top_mb_intra_modes, ps_proc->au1_intra_luma_mb_8x8_modes, 4); + } + + *ps_proc->pu4_mb_pu_cnt = 1; + } + + /* + * Mark that the MB has been coded intra + * So that future AIRs can skip it + */ + ps_proc->pu1_is_intra_coded[i4_mb_x + (i4_mb_y * i4_wd_mbs)] = u4_is_intra; + + /**************************************************/ + /* pack mb header info. for entropy coding */ + /**************************************************/ + isvce_pack_header_data(ps_proc); + + /* + * We need to sync the cache to make sure that the nmv content of proc + * is updated to cache properly + */ + DATA_SYNC(); + + /* Just before finishing the row, enqueue the job in to entropy queue. + * The master thread depending on its convenience shall dequeue it and + * performs entropy. + * + * WARN !! Placing this block post proc map update can cause queuing of + * entropy jobs in out of order. + */ + if(i4_mb_x == i4_wd_mbs - 1) + { + /* job structures */ + job_t s_job; + + /* job class */ + s_job.i4_cmd = CMD_ENTROPY; + + /* number of mbs to be processed in the current job */ + s_job.i2_mb_cnt = ps_proc->i4_wd_mbs; + + /* job start index x */ + s_job.i2_mb_x = 0; + + /* job start index y */ + s_job.i2_mb_y = ps_proc->i4_mb_y; + + /* queue the job */ + error_status = ih264_list_queue(ps_proc->pv_entropy_jobq, &s_job, 1); + + if(error_status != IH264_SUCCESS) + { + return error_status; + } + + if(ps_proc->i4_mb_y == (i4_ht_mbs - 1)) + { + ih264_list_terminate(ps_codec->pv_entropy_jobq); + } + } + + /* update proc map */ + pu1_proc_map[i4_mb_x] = 1; + ASSERT(i4_mb_x < i4_wd_mbs); + + /**************************************************/ + /* update proc ctxt elements for encoding next mb */ + /**************************************************/ + /* update indices */ + i4_mb_x++; + ps_proc->i4_mb_x = i4_mb_x; + + if(ps_proc->i4_mb_x == i4_wd_mbs) + { + ps_proc->i4_mb_y++; + ps_proc->i4_mb_x = 0; + } + + /* update slice index */ + ps_proc->i4_cur_slice_idx = + ps_proc->pu1_slice_idx[ps_proc->i4_mb_y * i4_wd_mbs + ps_proc->i4_mb_x]; + + /* update buffers pointers */ + ps_proc->s_src_buf_props.as_component_bufs[0].pv_data = + ((UWORD8 *) ps_proc->s_src_buf_props.as_component_bufs[0].pv_data) + MB_SIZE; + ps_proc->s_rec_buf_props.as_component_bufs[0].pv_data = + ((UWORD8 *) ps_proc->s_rec_buf_props.as_component_bufs[0].pv_data) + MB_SIZE; + ps_proc->as_ref_buf_props[0].as_component_bufs[0].pv_data = + ((UWORD8 *) ps_proc->as_ref_buf_props[0].as_component_bufs[0].pv_data) + MB_SIZE; + ps_proc->as_ref_buf_props[1].as_component_bufs[0].pv_data = + ((UWORD8 *) ps_proc->as_ref_buf_props[1].as_component_bufs[0].pv_data) + MB_SIZE; + + /* + * Note: Although chroma mb size is 8, as the chroma buffers are + * interleaved, the stride per MB is MB_SIZE + */ + ps_proc->s_src_buf_props.as_component_bufs[1].pv_data = + ((UWORD8 *) ps_proc->s_src_buf_props.as_component_bufs[1].pv_data) + MB_SIZE; + ps_proc->s_rec_buf_props.as_component_bufs[1].pv_data = + ((UWORD8 *) ps_proc->s_rec_buf_props.as_component_bufs[1].pv_data) + MB_SIZE; + ps_proc->as_ref_buf_props[0].as_component_bufs[1].pv_data = + ((UWORD8 *) ps_proc->as_ref_buf_props[0].as_component_bufs[1].pv_data) + MB_SIZE; + ps_proc->as_ref_buf_props[1].as_component_bufs[1].pv_data = + ((UWORD8 *) ps_proc->as_ref_buf_props[1].as_component_bufs[1].pv_data) + MB_SIZE; + + /* Reset cost, distortion params */ + ps_proc->i4_mb_cost = INT_MAX; + ps_proc->i4_mb_distortion = SHRT_MAX; + + ps_proc->ps_mb_info++; + ps_proc->pu4_mb_pu_cnt++; + + /* Update colocated pu */ + if(ps_proc->i4_slice_type == BSLICE) + { + ps_proc->ps_col_mb++; + } + + if(ps_proc->u4_disable_deblock_level != 1) + { + ps_bs->i4_mb_x = ps_proc->i4_mb_x; + ps_bs->i4_mb_y = ps_proc->i4_mb_y; + +#ifndef N_MB_ENABLE /* For N MB processing update take place inside deblocking \ + function */ + ASSERT(0); + ps_deblk->i4_mb_x++; + + ((UWORD8 *) ps_deblk->s_rec_pic_buf_props.as_component_bufs[0].pv_data) += MB_SIZE; + /* + * Note: Although chroma mb size is 8, as the chroma buffers are + * interleaved, the stride per MB is MB_SIZE + */ + ((UWORD8 *) ps_deblk->s_rec_pic_buf_props.as_component_bufs[1].pv_data) += MB_SIZE; +#endif + } + + return error_status; +} + +/** +******************************************************************************* +* +* @brief This function performs luma & chroma padding +* +* @par Description: +* +* @param[in] ps_proc +* Process context corresponding to the job +* +* @param[in] pu1_curr_pic_luma +* Pointer to luma buffer +* +* @param[in] pu1_curr_pic_chroma +* Pointer to chroma buffer +* +* @param[in] i4_mb_x +* mb index x +* +* @param[in] i4_mb_y +* mb index y +* +* @param[in] i4_pad_ht +* number of rows to be padded +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +IH264E_ERROR_T isvce_pad_recon_buffer(isvce_process_ctxt_t *ps_proc, UWORD8 *pu1_curr_pic_luma, + WORD32 i4_luma_stride, UWORD8 *pu1_curr_pic_chroma, + WORD32 i4_chroma_stride, WORD32 i4_mb_x, WORD32 i4_mb_y, + WORD32 i4_pad_ht) +{ + /* codec context */ + isvce_codec_t *ps_codec = ps_proc->ps_codec; + + if(i4_mb_x == 0) + { + /* padding left luma */ + ps_codec->pf_pad_left_luma(pu1_curr_pic_luma, i4_luma_stride, i4_pad_ht, PAD_LEFT); + + /* padding left chroma */ + ps_codec->pf_pad_left_chroma(pu1_curr_pic_chroma, i4_chroma_stride, i4_pad_ht >> 1, + PAD_LEFT); + } + if(i4_mb_x == ps_proc->i4_wd_mbs - 1) + { + /* padding right luma */ + ps_codec->pf_pad_right_luma(pu1_curr_pic_luma + MB_SIZE, i4_luma_stride, i4_pad_ht, + PAD_RIGHT); + + /* padding right chroma */ + ps_codec->pf_pad_right_chroma(pu1_curr_pic_chroma + MB_SIZE, i4_chroma_stride, + i4_pad_ht >> 1, PAD_RIGHT); + + if(i4_mb_y == ps_proc->i4_ht_mbs - 1) + { + UWORD8 *pu1_rec_luma = + pu1_curr_pic_luma + MB_SIZE + PAD_RIGHT + ((i4_pad_ht - 1) * i4_luma_stride); + UWORD8 *pu1_rec_chroma = pu1_curr_pic_chroma + MB_SIZE + PAD_RIGHT + + (((i4_pad_ht >> 1) - 1) * i4_chroma_stride); + + /* padding bottom luma */ + ps_codec->pf_pad_bottom(pu1_rec_luma, i4_luma_stride, i4_luma_stride, PAD_BOT); + + /* padding bottom chroma */ + ps_codec->pf_pad_bottom(pu1_rec_chroma, i4_chroma_stride, i4_chroma_stride, + (PAD_BOT >> 1)); + } + } + + if(i4_mb_y == 0) + { + UWORD8 *pu1_rec_luma = pu1_curr_pic_luma; + UWORD8 *pu1_rec_chroma = pu1_curr_pic_chroma; + WORD32 wd = MB_SIZE; + + if(i4_mb_x == 0) + { + pu1_rec_luma -= PAD_LEFT; + pu1_rec_chroma -= PAD_LEFT; + + wd += PAD_LEFT; + } + if(i4_mb_x == ps_proc->i4_wd_mbs - 1) + { + wd += PAD_RIGHT; + } + + /* padding top luma */ + ps_codec->pf_pad_top(pu1_rec_luma, i4_luma_stride, wd, PAD_TOP); + + /* padding top chroma */ + ps_codec->pf_pad_top(pu1_rec_chroma, i4_chroma_stride, wd, (PAD_TOP >> 1)); + } + + return IH264E_SUCCESS; +} + +/** +******************************************************************************* +* +* @brief This function performs deblocking +* +* @par Description: +* +* @param[in] ps_proc +* Process context corresponding to the job +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +static IH264E_ERROR_T isvce_dblk_n_mbs(isvce_process_ctxt_t *ps_proc, + UWORD8 u1_inter_layer_deblk_flag) +{ + WORD32 i; + WORD32 row, col; + + n_mb_process_ctxt_t *ps_n_mb_ctxt = &ps_proc->s_n_mb_ctxt; + isvce_deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt; + + UWORD8 *pu1_deblk_map = ps_proc->pu1_deblk_map + ps_deblk->i4_mb_y * ps_proc->i4_wd_mbs; + UWORD8 *pu1_deblk_map_prev_row = pu1_deblk_map - ps_proc->i4_wd_mbs; + WORD32 u4_deblk_prev_row = 0; + WORD32 i4_n_mbs = ps_n_mb_ctxt->i4_n_mbs; + WORD32 i4_n_mb_process_count = 0; + WORD32 i4_mb_x = ps_proc->i4_mb_x; + WORD32 i4_mb_y = ps_proc->i4_mb_y; + + ASSERT(i4_n_mbs == ps_proc->i4_wd_mbs); + + if(ps_proc->u4_disable_deblock_level != 1) + { + if((i4_mb_y > 0) || (i4_mb_y == (ps_proc->i4_ht_mbs - 1))) + { + /* if number of mb's to be processed are less than 'N', go back. + * exception to the above clause is end of row */ + if(((i4_mb_x - (ps_n_mb_ctxt->i4_mb_x - 1)) < i4_n_mbs) && + (i4_mb_x < (ps_proc->i4_wd_mbs - 1))) + { + return IH264E_SUCCESS; + } + else + { + WORD32 i4_num_deblk_rows = 1; + + if(i4_mb_y == (ps_proc->i4_ht_mbs - 1)) + { + i4_num_deblk_rows += (ps_proc->i4_ht_mbs > 1); + } + + if(1 == ps_proc->i4_ht_mbs) + { + ps_deblk->i4_mb_y = 0; + pu1_deblk_map_prev_row = pu1_deblk_map; + } + + for(i = 0; i < i4_num_deblk_rows; i++) + { + if(i == 1) + { + /* Deblock last row */ + ps_n_mb_ctxt->i4_mb_x = 0; + ps_n_mb_ctxt->i4_mb_y = ps_proc->i4_mb_y; + ps_deblk->i4_mb_x = 0; + ps_deblk->i4_mb_y = ps_proc->i4_mb_y; + pu1_deblk_map_prev_row = pu1_deblk_map; + pu1_deblk_map += ps_proc->i4_wd_mbs; + } + + i4_n_mb_process_count = MIN(i4_mb_x - (ps_n_mb_ctxt->i4_mb_x - 1), i4_n_mbs); + + /* performing deblocking for required number of MBs */ + u4_deblk_prev_row = 1; + + /* checking whether the top rows are deblocked */ + for(col = 0; col < i4_n_mb_process_count; col++) + { + u4_deblk_prev_row &= pu1_deblk_map_prev_row[ps_deblk->i4_mb_x + col]; + } + + /* checking whether the top right MB is deblocked */ + if((ps_deblk->i4_mb_x + i4_n_mb_process_count) != ps_proc->i4_wd_mbs) + { + u4_deblk_prev_row &= + pu1_deblk_map_prev_row[ps_deblk->i4_mb_x + i4_n_mb_process_count]; + } + + /* Top or Top right MBs not deblocked */ + if((u4_deblk_prev_row != 1) && (i4_mb_y > 0)) + { + return IH264E_SUCCESS; + } + + for(row = 0; row < i4_n_mb_process_count; row++) + { + isvce_deblock_mb(ps_proc, ps_deblk, u1_inter_layer_deblk_flag); + + pu1_deblk_map[ps_deblk->i4_mb_x] = 1; + + ps_deblk->i4_mb_x++; + } + } + } + } + } + + return IH264E_SUCCESS; +} + +/** +******************************************************************************* +* +* @brief This function performs 'intra base' deblocking +* +* @par Description: +* +* @param[in] ps_proc +* Process context corresponding to the job +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +static IH264E_ERROR_T isvce_intra_base_dblk(isvce_process_ctxt_t *ps_proc) +{ + isvce_codec_t *ps_codec = ps_proc->ps_codec; + isvce_deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt; + + IH264E_ERROR_T e_ret = IH264E_SUCCESS; + + if(ps_proc->u1_spatial_layer_id < (ps_proc->s_svc_params.u1_num_spatial_layers - 1)) + { + ps_deblk->i4_mb_x = ps_proc->i4_mb_x; + ps_deblk->i4_mb_y = ps_proc->i4_mb_y - 1; + + ps_deblk->s_rec_pic_buf_props = + ps_codec->s_svc_ilp_data.ps_intra_recon_bufs[ps_proc->u1_spatial_layer_id]; + + e_ret = isvce_dblk_n_mbs(ps_proc, 1); + + ps_deblk->s_rec_pic_buf_props = ps_proc->s_rec_pic_buf_props; + } + + return e_ret; +} + +/** +******************************************************************************* +* +* @brief This function performs luma & chroma core coding for a set of mb's. +* +* @par Description: +* The mb to be coded is taken and is evaluated over a predefined set of modes +* (intra (i16, i4, i8)/inter (mv, skip)) for best cost. The mode with least +*cost is selected and using intra/inter prediction filters, prediction is +*carried out. The deviation between src and pred signal constitutes error +*signal. This error signal is transformed (hierarchical transform if necessary) +*and quantized. The quantized residue is packed in to entropy buffer for entropy +*coding. This is repeated for all the mb's enlisted under the job. +* +* @param[in] ps_proc +* Process context corresponding to the job +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +WORD32 isvce_process(isvce_process_ctxt_t *ps_proc) +{ + UWORD32 u4_cbp_l, u4_cbp_c; + WORD32 i4_mb_idx; + WORD32 luma_idx, chroma_idx, is_intra; + + isvce_codec_t *ps_codec = ps_proc->ps_codec; + isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns; + enc_loop_fxns_t *ps_enc_loop_fxns = &ps_isa_dependent_fxns->s_enc_loop_fxns; + + WORD32 error_status = IH264_SUCCESS; + WORD32 i4_wd_mbs = ps_proc->i4_wd_mbs; + WORD32 i4_mb_cnt = ps_proc->i4_mb_cnt; + UWORD32 u4_valid_modes = 0; + WORD32 i4_gate_threshold = 0; + WORD32 ctxt_sel = ps_proc->i4_encode_api_call_cnt % MAX_CTXT_SETS; + bool b_enable_intra4x4_eval = true; + + /* + * list of modes for evaluation + * ------------------------------------------------------------------------- + * Note on enabling I4x4 and I16x16 + * At very low QP's the hadamard transform in I16x16 will push up the maximum + * coeff value very high. CAVLC may not be able to represent the value and + * hence the stream may not be decodable in some clips. + * Hence at low QPs, we will enable I4x4 and disable I16x16 irrespective of + * preset. + */ + if(ps_proc->i4_slice_type == ISLICE) + { + u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0; + + /* enable intra 8x8 */ + u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_8x8 ? (1 << I8x8) : 0; + + /* enable intra 4x4 */ + u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0; + u4_valid_modes |= (ps_proc->u1_frame_qp <= 10) << I4x4; + } + else if(ps_proc->i4_slice_type == PSLICE) + { + u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0; + + /* enable intra 4x4 */ + if(ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST) + { + u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0; + } + u4_valid_modes |= (ps_proc->u1_frame_qp <= 10) << I4x4; + + /* enable inter P16x16 */ + u4_valid_modes |= (1 << P16x16); + } + else if(ps_proc->i4_slice_type == BSLICE) + { + u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0; + + /* enable intra 4x4 */ + if(ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST) + { + u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0; + } + u4_valid_modes |= (ps_proc->u1_frame_qp <= 10) << I4x4; + + /* enable inter B16x16 */ + u4_valid_modes |= (1 << B16x16); + } + + ps_proc->s_entropy.i4_mb_x = ps_proc->i4_mb_x; + ps_proc->s_entropy.i4_mb_y = ps_proc->i4_mb_y; + ps_proc->s_entropy.i4_mb_cnt = MIN(ps_proc->i4_nmb_ntrpy, i4_wd_mbs - ps_proc->i4_mb_x); + + /* compute recon when : + * 1. current frame is to be used as a reference + * 2. dump recon for bit stream sanity check + */ + ps_proc->u4_compute_recon = ((ps_proc->s_svc_params.u1_num_spatial_layers > 1) && + (ENABLE_RESIDUAL_PREDICTION || ENABLE_IBL_MODE)) || + ps_codec->u4_is_curr_frm_ref || ps_codec->s_cfg.u4_enable_recon; + + for(i4_mb_idx = 0; i4_mb_idx < i4_mb_cnt; i4_mb_idx++) + { + /* since we have not yet found sad, we have not yet got min sad */ + /* we need to initialize these variables for each MB */ + /* TODO how to get the min sad into the codec */ + ps_proc->u4_min_sad = ps_codec->s_cfg.i4_min_sad; + ps_proc->u4_min_sad_reached = 0; + + ps_proc->ps_mb_info->u1_mb_qp = ps_proc->u1_mb_qp; + + /* wait until the proc of [top + 1] mb is computed. + * We wait till the proc dependencies are satisfied */ + if(ps_proc->i4_mb_y > 0) + { + UWORD8 *pu1_proc_map_top; + + pu1_proc_map_top = ps_proc->pu1_proc_map + ((ps_proc->i4_mb_y - 1) * i4_wd_mbs); + + while(1) + { + volatile UWORD8 *pu1_buf; + WORD32 idx = MIN(i4_mb_cnt - 1, i4_mb_idx + 1); + + idx = MIN(idx, ((WORD32) ps_codec->s_cfg.i4_wd_mbs - 1)); + pu1_buf = pu1_proc_map_top + idx; + if(*pu1_buf) break; + ithread_yield(); + } + } + + if(ENABLE_ILP_MV && (ps_proc->u1_spatial_layer_id > 0) && + (ps_proc->i4_slice_type != ISLICE)) + { + svc_ilp_mv_ctxt_t *ps_svc_ilp_mv_ctxt = ps_proc->ps_svc_ilp_mv_ctxt; + coordinates_t s_mb_pos = {ps_proc->i4_mb_x, ps_proc->i4_mb_y}; + + ps_svc_ilp_mv_ctxt->s_ilp_mv_variables.ps_svc_ilp_data = &ps_codec->s_svc_ilp_data; + ps_svc_ilp_mv_ctxt->s_ilp_mv_variables.s_mb_pos = s_mb_pos; + ps_svc_ilp_mv_ctxt->s_ilp_mv_variables.u1_spatial_layer_id = + ps_proc->u1_spatial_layer_id; + + isvce_get_mb_ilp_mv(ps_svc_ilp_mv_ctxt); + + ps_proc->ps_ilp_mv = &ps_svc_ilp_mv_ctxt->s_ilp_mv_outputs.s_ilp_mv; + ps_proc->s_me_ctxt.ps_ilp_me_cands = + &ps_svc_ilp_mv_ctxt->s_ilp_mv_outputs.s_ilp_me_cands; + } + else + { + ps_proc->ps_ilp_mv = NULL; + ps_proc->s_me_ctxt.ps_ilp_me_cands = NULL; + } + + ps_proc->ps_mb_info->u2_mb_type = INVALID_MB_TYPE; + ps_proc->i4_mb_distortion = SHRT_MAX; + + { + WORD32 i4_mb_id = ps_proc->i4_mb_x + ps_proc->i4_mb_y * i4_wd_mbs; + + WORD32 i4_air_enable_inter = + (ps_codec->s_cfg.e_air_mode == IVE_AIR_MODE_NONE) || + (ps_codec->pu2_intr_rfrsh_map[i4_mb_id] != ps_codec->i4_air_pic_cnt); + + if((u4_valid_modes & (1 << P16x16)) || (u4_valid_modes & (1 << B16x16))) + { + if(ps_proc->i4_mb_x % ps_proc->u4_nmb_me == 0) + { + isvce_compute_me_nmb( + ps_proc, MIN((WORD32) ps_proc->u4_nmb_me, i4_wd_mbs - ps_proc->i4_mb_x)); + } + + { + UWORD32 u4_mb_index = ps_proc->i4_mb_x % ps_proc->u4_nmb_me; + + ps_proc->u4_min_sad_reached = + ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad_reached; + ps_proc->u4_min_sad = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad; + + ps_proc->ps_skip_mv = &(ps_proc->ps_nmb_info[u4_mb_index].as_skip_mv[0]); + ps_proc->ps_ngbr_avbl = &(ps_proc->ps_nmb_info[u4_mb_index].s_ngbr_avbl); + ps_proc->ps_pred_mv = &(ps_proc->ps_nmb_info[u4_mb_index].as_pred_mv[0]); + + ps_proc->i4_mb_distortion = ps_proc->ps_nmb_info[u4_mb_index].i4_mb_distortion; + + ps_proc->i4_mb_cost = ps_proc->ps_nmb_info[u4_mb_index].i4_mb_cost; + ps_proc->u4_min_sad = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad; + ps_proc->u4_min_sad_reached = + ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad_reached; + ps_proc->ps_mb_info->u2_mb_type = ps_proc->ps_nmb_info[u4_mb_index].u4_mb_type; + + ps_proc->pu1_best_subpel_buf = + ps_proc->ps_nmb_info[u4_mb_index].pu1_best_sub_pel_buf; + ps_proc->u4_bst_spel_buf_strd = + ps_proc->ps_nmb_info[u4_mb_index].u4_bst_spel_buf_strd; + } + + isvce_derive_nghbr_avbl_of_mbs(ps_proc); + } + else + { + ps_proc->ps_ngbr_avbl = &ps_proc->s_ngbr_avbl; + + isvce_derive_nghbr_avbl_of_mbs(ps_proc); + } + + /* + * If air says intra, we need to force the following code path to evaluate + * intra The easy way is just to say that the inter cost is too much + */ + if(!i4_air_enable_inter) + { + ps_proc->u4_min_sad_reached = 0; + ps_proc->i4_mb_cost = INT_MAX; + ps_proc->i4_mb_distortion = INT_MAX; + } + else if(ps_proc->ps_mb_info->u2_mb_type == PSKIP) + { + ps_proc->ps_mb_info->u1_base_mode_flag = 0; + ps_proc->ps_mb_info->u1_residual_prediction_flag = 0; + goto UPDATE_MB_INFO; + } + + /* If we already have the minimum sad, there is no point in searching for + * sad again */ + if((ps_proc->u4_min_sad_reached == 0) || + (ps_codec->s_cfg.u4_enc_speed_preset != IVE_FASTEST)) + { + /* intra gating in inter slices */ + /* No need of gating if we want to force intra, we need to find the + * threshold only if inter is enabled by AIR*/ + if((ps_proc->i4_slice_type != ISLICE) && + (FORCE_DISTORTION_BASED_INTRA_4X4_GATING || + (i4_air_enable_inter && ps_codec->u4_inter_gate))) + { + WORD32 i4_distortion[4]; + + if((ps_proc->i4_mb_x > 0) && (ps_proc->i4_mb_y > 0)) + { + i4_distortion[0] = ps_proc->s_nbr_info.ps_left_mb_info->i4_mb_distortion; + + i4_distortion[1] = ps_proc->s_nbr_info.ps_top_row_mb_info[ps_proc->i4_mb_x] + .i4_mb_distortion; + + i4_distortion[2] = + ps_proc->s_nbr_info.ps_top_row_mb_info[ps_proc->i4_mb_x + 1] + .i4_mb_distortion; + + i4_distortion[3] = + ps_proc->s_nbr_info.ps_top_row_mb_info[ps_proc->i4_mb_x - 1] + .i4_mb_distortion; + + i4_gate_threshold = (i4_distortion[0] + i4_distortion[1] + + i4_distortion[2] + i4_distortion[3]) / + 4; + } + } + + b_enable_intra4x4_eval = true; + + if(ENABLE_IBL_MODE && (ps_proc->u1_spatial_layer_id > 0) && + (ps_proc->s_svc_params.d_spatial_res_ratio == 2.) && !ps_proc->ps_ilp_mv) + { + isvce_evaluate_IBL_mode(ps_proc); + } + else + { + ps_proc->ps_mb_info->u1_base_mode_flag = 0; + } + + if(u4_valid_modes & (1 << I16x16)) + { + isvce_evaluate_intra16x16_modes_for_least_cost_rdoptoff(ps_proc); + + if(ENABLE_INTRA16X16_BASED_INTRA4X4_GATING && + (ps_proc->i4_slice_type != ISLICE) && + (ps_proc->ps_mb_info->u2_mb_type == I16x16)) + { + b_enable_intra4x4_eval = false; + } + } + + if(u4_valid_modes & (1 << I8x8)) + { + isvce_evaluate_intra8x8_modes_for_least_cost_rdoptoff(ps_proc); + } + + if(ENABLE_ILP_BASED_INTRA4X4_GATING && (ps_proc->i4_slice_type != ISLICE)) + { + b_enable_intra4x4_eval = + !(ps_proc->ps_ilp_mv && (INVALID_MB_TYPE != ps_proc->ps_ilp_mv->e_mb_type)); + } + + /* If we are going to force intra we need to evaluate intra irrespective + * of gating */ + if((!i4_air_enable_inter) || + ((i4_gate_threshold + 16 * ((WORD32) ps_proc->u4_lambda)) < + ps_proc->i4_mb_distortion)) + { + if(b_enable_intra4x4_eval && (u4_valid_modes & (1 << I4x4))) + { + if(!FORCE_FAST_INTRA4X4 && + (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST)) + { + isvce_evaluate_intra4x4_modes_for_least_cost_rdopton(ps_proc); + } + else + { + isvce_evaluate_intra4x4_modes_for_least_cost_rdoptoff(ps_proc); + } + } + } + } + } + + if(ps_proc->ps_mb_info->u2_mb_type == I4x4 || ps_proc->ps_mb_info->u2_mb_type == I16x16 || + ps_proc->ps_mb_info->u2_mb_type == I8x8) + { + luma_idx = ps_proc->ps_mb_info->u2_mb_type; + chroma_idx = 0; + is_intra = 1; + + isvce_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff(ps_proc); + } + else if(ps_proc->ps_mb_info->u2_mb_type == BASE_MODE) + { + luma_idx = 3; + chroma_idx = 1; + is_intra = 1; + ps_proc->u4_min_sad_reached = 0; + } + else + { + luma_idx = 3; + chroma_idx = 1; + is_intra = 0; + } + + ps_proc->ps_mb_info->u1_is_intra = is_intra; + + if(is_intra) + { + ps_proc->ps_mb_info->as_pu->as_me_info[L0].i1_ref_idx = -1; + ps_proc->ps_mb_info->as_pu->as_me_info[L0].s_mv.i2_mvx = 0; + ps_proc->ps_mb_info->as_pu->as_me_info[L0].s_mv.i2_mvy = 0; + + ps_proc->ps_mb_info->as_pu->as_me_info[L1].i1_ref_idx = -1; + ps_proc->ps_mb_info->as_pu->as_me_info[L1].s_mv.i2_mvx = 0; + ps_proc->ps_mb_info->as_pu->as_me_info[L1].s_mv.i2_mvy = 0; + } + else + { + isvce_mv_pred(ps_proc, ps_proc->i4_slice_type); + } + + if(ENABLE_RESIDUAL_PREDICTION && !is_intra && (ps_proc->u1_spatial_layer_id > 0) && + (ps_proc->i4_slice_type == PSLICE) && (ps_proc->ps_mb_info->u2_mb_type != PSKIP)) + { + svc_res_pred_ctxt_t *ps_res_pred_ctxt = ps_proc->ps_res_pred_ctxt; + + UWORD32 u4_res_pred_sad; + + isvce_me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt; + yuv_buf_props_t s_pred = ps_proc->s_src_buf_props; + + if(!(ps_proc->ps_mb_info->as_pu->as_me_info[L0].s_mv.i2_mvx % 4) && + !(ps_proc->ps_mb_info->as_pu->as_me_info[L0].s_mv.i2_mvy % 4)) + { + s_pred.as_component_bufs[Y].pv_data = + ps_me_ctxt->apu1_ref_buf_luma[L0] + + (ps_me_ctxt->as_mb_part[L0].s_mv_curr.i2_mvx >> 2) + + (ps_me_ctxt->as_mb_part[L0].s_mv_curr.i2_mvy >> 2) * + ps_me_ctxt->ai4_rec_strd[L0]; + s_pred.as_component_bufs[Y].i4_data_stride = ps_me_ctxt->ai4_rec_strd[L0]; + } + else + { + s_pred.as_component_bufs[Y].pv_data = ps_proc->pu1_best_subpel_buf; + s_pred.as_component_bufs[Y].i4_data_stride = ps_proc->u4_bst_spel_buf_strd; + } + + s_pred.as_component_bufs[U].pv_data = s_pred.as_component_bufs[V].pv_data = NULL; + + ps_res_pred_ctxt->s_res_pred_variables.ps_svc_ilp_data = &ps_codec->s_svc_ilp_data; + ps_res_pred_ctxt->s_res_pred_variables.s_mb_pos.i4_abscissa = ps_proc->i4_mb_x; + ps_res_pred_ctxt->s_res_pred_variables.s_mb_pos.i4_ordinate = ps_proc->i4_mb_y; + ps_res_pred_ctxt->s_res_pred_variables.u1_spatial_layer_id = + ps_proc->u1_spatial_layer_id; + + if(ps_proc->s_svc_params.d_spatial_res_ratio == 2.) + { + isvce_get_mb_residual_pred(ps_proc->ps_res_pred_ctxt); + } + else + { + isvce_get_mb_residual_pred_non_dyadic(ps_proc->ps_res_pred_ctxt); + } + + isvce_residual_pred_eval(ps_proc->ps_res_pred_ctxt, &ps_proc->s_src_buf_props, &s_pred, + ps_proc->ps_mb_res_buf, &u4_res_pred_sad, + &ps_proc->ps_mb_info->u1_residual_prediction_flag, + ps_proc->i4_mb_distortion); + + if(ps_proc->ps_mb_info->u1_residual_prediction_flag) + { + ps_proc->i4_mb_cost -= ps_proc->i4_mb_distortion; + ps_proc->i4_mb_cost += (WORD32) u4_res_pred_sad; + ps_proc->i4_mb_distortion = (WORD32) u4_res_pred_sad; + } + } + else + { + ps_proc->ps_mb_info->u1_residual_prediction_flag = 0; + } + + if(isvce_is_ilp_mv_winning_mv(ps_proc->ps_mb_info, ps_proc->ps_ilp_mv)) + { + ps_proc->ps_mb_info->as_pu->as_me_info[L0] = ps_proc->ps_ilp_mv->as_mv[0][L0]; + ps_proc->ps_mb_info->as_pu->as_me_info[L1] = ps_proc->ps_ilp_mv->as_mv[0][L1]; + + ps_proc->ps_mb_info->u1_base_mode_flag = 1; + ps_proc->ps_mb_info->u2_mb_type = BASE_MODE; + } + else if(ps_proc->ps_mb_info->u2_mb_type != BASE_MODE) + { + ps_proc->ps_mb_info->u1_base_mode_flag = 0; + } + + isvce_mvp_idx_eval(ps_proc->ps_mb_info, ps_proc->ps_pred_mv, ps_proc->ps_ilp_mv->as_mv[0], + ps_proc->s_me_ctxt.pu1_mv_bits); + + /* 8x8 Tx is not supported, and I8x8 is also unsupported */ + ASSERT((luma_idx == 0) || (luma_idx == 1) || (luma_idx == 3)); + ps_proc->ps_mb_info->u1_tx_size = 4; + + /* Perform luma mb core coding */ + u4_cbp_l = (ps_enc_loop_fxns->apf_luma_energy_compaction)[luma_idx](ps_proc); + + /* Perform chroma mb core coding */ + u4_cbp_c = (ps_enc_loop_fxns->apf_chroma_energy_compaction)[chroma_idx](ps_proc); + + ps_proc->u4_cbp = (u4_cbp_c << 4) | u4_cbp_l; + ps_proc->ps_mb_info->u4_cbp = (u4_cbp_c << 4) | u4_cbp_l; + ps_proc->ps_mb_info->u4_csbp = isvce_calculate_csbp(ps_proc); + + if(ps_proc->ps_mb_info->u1_is_intra) + { + switch(ps_proc->ps_mb_info->u2_mb_type) + { + case I16x16: + { + ps_proc->ps_mb_info->s_intra_pu.s_i16x16_mode_data.u1_mode = + ps_proc->u1_l_i16_mode; + + break; + } + case I4x4: + { + WORD32 i; + + for(i = 0; i < MAX_TU_IN_MB; i++) + { + ps_proc->ps_mb_info->s_intra_pu.as_i4x4_mode_data[i].u1_mode = + ps_proc->au1_intra_luma_mb_4x4_modes[i]; + ps_proc->ps_mb_info->s_intra_pu.as_i4x4_mode_data[i].u1_predicted_mode = + ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i]; + } + + break; + } + case BASE_MODE: + { + break; + } + default: + { + ASSERT(false); + } + } + + ps_proc->ps_mb_info->s_intra_pu.u1_chroma_intra_mode = ps_proc->u1_c_i8_mode; + } + + if(!ps_proc->ps_mb_info->u1_is_intra && !ps_proc->ps_mb_info->u1_residual_prediction_flag) + { + if(ps_proc->i4_slice_type == BSLICE) + { + if(isvce_find_bskip_params(ps_proc, L0)) + { + ps_proc->ps_mb_info->u2_mb_type = (ps_proc->u4_cbp) ? BDIRECT : BSKIP; + } + } + else if(!ps_proc->u4_cbp) + { + if(isvce_find_pskip_params(ps_proc, L0)) + { + ps_proc->ps_mb_info->u2_mb_type = PSKIP; + } + } + } + + UPDATE_MB_INFO: + isvce_svc_ilp_buf_update(ps_proc); + + isvce_update_ibl_info( + ps_proc->ps_intra_pred_ctxt, ps_proc->s_svc_params.u1_num_spatial_layers, + ps_proc->u1_spatial_layer_id, ps_proc->ps_mb_info->u2_mb_type, ps_proc->i4_mb_x, + ps_proc->i4_mb_y, ps_proc->ps_mb_info->u1_base_mode_flag); + + isvce_update_res_pred_info(ps_proc); + + /* Update mb sad, mb qp and intra mb cost. Will be used by rate control */ + isvce_update_rc_mb_info(&ps_proc->s_frame_info, ps_proc); + + { + svc_sub_pic_rc_ctxt_t *ps_sub_pic_rc_ctxt = ps_proc->ps_sub_pic_rc_ctxt; + svc_sub_pic_rc_mb_variables_t *ps_sub_pic_rc_variables = + &ps_sub_pic_rc_ctxt->s_sub_pic_rc_variables.s_mb_variables; + + ps_sub_pic_rc_variables->ps_mb_info = ps_proc->ps_mb_info; + ps_sub_pic_rc_variables->s_mb_pos.i4_abscissa = ps_proc->i4_mb_x; + ps_sub_pic_rc_variables->s_mb_pos.i4_ordinate = ps_proc->i4_mb_y; + ps_sub_pic_rc_variables->u4_cbp = ps_proc->u4_cbp; + ps_sub_pic_rc_variables->aps_mvps[0] = ps_proc->ps_pred_mv; +#if MAX_MVP_IDX == 1 + ps_sub_pic_rc_variables->aps_mvps[1] = ps_proc->ps_ilp_mv->as_mv[0]; +#endif + ps_sub_pic_rc_variables->apu1_nnzs[Y] = (UWORD8 *) ps_proc->au4_nnz; + ps_sub_pic_rc_variables->apu1_nnzs[UV] = ps_proc->au1_chroma_nnz; + + /* Quant coeffs are arranged TU by TU */ + switch(ps_proc->ps_mb_info->u2_mb_type) + { + case I16x16: + case I4x4: + case P16x16: + case B16x16: + case BASE_MODE: + { + ps_sub_pic_rc_variables->as_quant_coeffs[Y].pv_data = + ps_proc->pi2_res_buf_intra_4x4; + ps_sub_pic_rc_variables->as_quant_coeffs[Y].i4_data_stride = + ps_proc->i4_res_strd; + ps_sub_pic_rc_variables->as_quant_coeffs[UV].pv_data = ps_proc->pi2_res_buf; + ps_sub_pic_rc_variables->as_quant_coeffs[UV].i4_data_stride = + ps_proc->i4_res_strd; + + break; + } + case PSKIP: + case BSKIP: + { + ps_sub_pic_rc_variables->as_quant_coeffs[Y].pv_data = NULL; + ps_sub_pic_rc_variables->as_quant_coeffs[UV].pv_data = NULL; + + break; + } + default: + { + ASSERT(false); + + break; + } + } + + isvce_sub_pic_rc_ctxt_update(ps_proc->ps_sub_pic_rc_ctxt); + } + +#if ENABLE_MODE_STAT_VISUALISER + if(ps_proc->u1_spatial_layer_id == (ps_proc->s_svc_params.u1_num_spatial_layers - 1)) + { + coordinates_t s_mb_pos = {ps_proc->i4_mb_x, ps_proc->i4_mb_y}; + + isvce_msv_set_mode(ps_codec->ps_mode_stat_visualiser, ps_proc->ps_mb_info, &s_mb_pos); + } +#endif + + /**********************************************************************/ + /* if disable deblock level is '0' this implies enable deblocking for */ + /* all edges of all macroblocks with out any restrictions */ + /* */ + /* if disable deblock level is '1' this implies disable deblocking for*/ + /* all edges of all macroblocks with out any restrictions */ + /* */ + /* if disable deblock level is '2' this implies enable deblocking for */ + /* all edges of all macroblocks except edges overlapping with slice */ + /* boundaries. This option is not currently supported by the encoder */ + /* hence the slice map should be of no significance to perform debloc */ + /* king */ + /**********************************************************************/ + + if(ps_proc->u4_compute_recon) + { + /* compute blocking strength */ + if(ps_proc->u4_disable_deblock_level != 1) + { + isvce_compute_bs(ps_proc, 0); + + if(ENABLE_INTRA_BASE_DEBLOCK && (ps_proc->u1_spatial_layer_id < + (ps_proc->s_svc_params.u1_num_spatial_layers - 1))) + { + isvce_compute_bs(ps_proc, 1); + } + } + /* nmb deblocking and hpel and padding */ + isvce_dblk_n_mbs(ps_proc, 0); + + if(ENABLE_INTRA_BASE_DEBLOCK && + (ps_proc->u1_spatial_layer_id < (ps_proc->s_svc_params.u1_num_spatial_layers - 1))) + { + isvce_intra_base_dblk(ps_proc); + } + + if(ps_proc->i4_mb_x == (ps_proc->i4_wd_mbs - 1) && + ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1)) + { + isvce_svc_pad_frame(ps_proc); + + isvce_pad_mb_mode_buf(ps_proc->ps_intra_pred_ctxt, ps_proc->u1_spatial_layer_id, + ps_proc->s_svc_params.u1_num_spatial_layers, + ps_proc->s_svc_params.d_spatial_res_ratio, + ps_codec->s_cfg.u4_wd, ps_codec->s_cfg.u4_ht); + } + } + + /* update the context after for coding next mb */ + error_status = isvce_update_proc_ctxt(ps_proc); + + if(error_status != IH264E_SUCCESS) + { + return error_status; + } + + { + UWORD8 u1_new_mb_qp; + + u1_new_mb_qp = + isvce_sub_pic_rc_get_mb_qp(ps_proc->ps_sub_pic_rc_ctxt, ps_proc->u1_mb_qp); + + if(u1_new_mb_qp != ps_proc->u1_mb_qp) + { + ps_proc->u1_mb_qp = u1_new_mb_qp; + ps_proc->u4_lambda = gu1_qp0[u1_new_mb_qp]; + + isvce_init_quant_params(ps_proc, ps_proc->u1_mb_qp); + } + } + + /* Once the last row is processed, mark the buffer status appropriately */ + if(ps_proc->i4_ht_mbs == ps_proc->i4_mb_y) + { + /* Pointer to current picture buffer structure */ + svc_au_buf_t *ps_cur_pic = ps_proc->ps_cur_pic; + + /* Pointer to current picture's mv buffer structure */ + svc_au_data_t *ps_cur_mv_buf = ps_proc->ps_cur_mv_buf; + + /**********************************************************************/ + /* if disable deblock level is '0' this implies enable deblocking for */ + /* all edges of all macroblocks with out any restrictions */ + /* */ + /* if disable deblock level is '1' this implies disable deblocking for*/ + /* all edges of all macroblocks with out any restrictions */ + /* */ + /* if disable deblock level is '2' this implies enable deblocking for */ + /* all edges of all macroblocks except edges overlapping with slice */ + /* boundaries. This option is not currently supported by the encoder */ + /* hence the slice map should be of no significance to perform debloc */ + /* king */ + /**********************************************************************/ + error_status = ih264_buf_mgr_release(ps_codec->pv_svc_au_data_store_mgr, + ps_cur_mv_buf->i4_buf_id, BUF_MGR_CODEC); + if(error_status != IH264E_SUCCESS) + { + return error_status; + } + error_status = ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr, ps_cur_pic->i4_buf_id, + BUF_MGR_CODEC); + if(error_status != IH264E_SUCCESS) + { + return error_status; + } + if(ps_codec->s_cfg.u4_enable_recon) + { + /* pic cnt */ + ps_codec->as_rec_buf[ctxt_sel].i4_pic_cnt = ps_proc->i4_pic_cnt; + + /* rec buffers */ + ps_codec->as_rec_buf[ctxt_sel].s_pic_buf = *ps_proc->ps_cur_pic; + + /* is last? */ + ps_codec->as_rec_buf[ctxt_sel].u4_is_last = ps_proc->s_entropy.u4_is_last; + + /* frame time stamp */ + ps_codec->as_rec_buf[ctxt_sel].u4_timestamp_high = + ps_proc->s_entropy.u4_timestamp_high; + ps_codec->as_rec_buf[ctxt_sel].u4_timestamp_low = + ps_proc->s_entropy.u4_timestamp_low; + } + } + } + + DEBUG_HISTOGRAM_DUMP(ps_codec->s_cfg.i4_ht_mbs == ps_proc->i4_mb_y); + + return error_status; +} + +/** +******************************************************************************* +* +* @brief +* entry point of a spawned encoder thread +* +* @par Description: +* The encoder thread dequeues a proc/entropy job from the encoder queue and +* calls necessary routines. +* +* @param[in] pv_proc +* Process context corresponding to the thread +* +* @returns error status +* +* @remarks +* +******************************************************************************* +*/ +WORD32 isvce_process_thread(void *pv_proc) +{ + job_t s_job; + + isvce_process_ctxt_t *ps_proc = pv_proc; + isvce_codec_t *ps_codec = ps_proc->ps_codec; + + IH264_ERROR_T ret = IH264_SUCCESS; + + WORD32 error_status = IH264_SUCCESS; + WORD32 is_blocking = 0; + + ps_proc->i4_error_code = IH264_SUCCESS; + + while(1) + { + /* dequeue a job from the entropy queue */ + { + WORD32 retval = ithread_mutex_lock(ps_codec->pv_entropy_mutex); + + /* codec context selector */ + WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS; + + volatile UWORD32 *pu4_buf = &ps_codec->au4_entropy_thread_active[ctxt_sel]; + + /* have the lock */ + if(retval == 0) + { + if(*pu4_buf == 0) + { + /* no entropy threads are active, try dequeuing a job from the entropy + * queue */ + ret = ih264_list_dequeue(ps_proc->pv_entropy_jobq, &s_job, is_blocking); + if(IH264_SUCCESS == ret) + { + *pu4_buf = 1; + ithread_mutex_unlock(ps_codec->pv_entropy_mutex); + goto WORKER; + } + else if(is_blocking) + { + ithread_mutex_unlock(ps_codec->pv_entropy_mutex); + break; + } + } + ithread_mutex_unlock(ps_codec->pv_entropy_mutex); + } + } + + /* dequeue a job from the process queue */ + ret = ih264_list_dequeue(ps_proc->pv_proc_jobq, &s_job, 1); + if(IH264_SUCCESS != ret) + { + if(ps_proc->i4_id) + break; + else + { + is_blocking = 1; + continue; + } + } + + WORKER: + /* choose appropriate proc context based on proc_base_idx */ + switch(s_job.i4_cmd) + { + case CMD_PROCESS: + { + ps_proc->i4_mb_cnt = s_job.i2_mb_cnt; + ps_proc->i4_mb_x = s_job.i2_mb_x; + ps_proc->i4_mb_y = s_job.i2_mb_y; + + isvce_init_layer_proc_ctxt(ps_proc); + + error_status = isvce_process(ps_proc); + + if(error_status != IH264_SUCCESS) + { + ps_proc->i4_error_code = error_status; + return ret; + } + + break; + } + case CMD_ENTROPY: + { + ps_proc->s_entropy.i4_mb_x = s_job.i2_mb_x; + ps_proc->s_entropy.i4_mb_y = s_job.i2_mb_y; + ps_proc->s_entropy.i4_mb_cnt = s_job.i2_mb_cnt; + + isvce_init_entropy_ctxt(ps_proc); + + error_status = isvce_entropy(ps_proc); + + if(error_status != IH264_SUCCESS) + { + ps_proc->i4_error_code = error_status; + return ret; + } + + break; + } + default: + { + ps_proc->i4_error_code = IH264_FAIL; + return ret; + } + } + } + + return ret; +} diff --git a/encoder/svc/isvce_process.h b/encoder/svc/isvce_process.h new file mode 100644 index 0000000..bacd324 --- /dev/null +++ b/encoder/svc/isvce_process.h @@ -0,0 +1,285 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** +******************************************************************************* +* @file +* isvce_process.h +* +* @brief +* Contains functions for codec thread +* +* @author +* ittiam +* +* @remarks +* None +* +******************************************************************************* +*/ + +#ifndef _ISVCE_PROCESS_H_ +#define _ISVCE_PROCESS_H_ + +/*****************************************************************************/ +/* Function Declarations */ +/*****************************************************************************/ + +/** +****************************************************************************** +* +* @brief This function generates sps, pps set on request +* +* @par Description +* When the encoder is set in header generation mode, the following function +* is called. This generates sps and pps headers and returns the control back +* to caller. +* +* @param[in] ps_codec +* pointer to codec context +* +* @return success or failure error code +* +****************************************************************************** +*/ +IH264E_ERROR_T isvce_generate_sps_pps(isvce_codec_t *ps_codec, isvce_inp_buf_t *ps_inp_buf); + +/** +******************************************************************************* +* +* @brief initialize entropy context. +* +* @par Description: +* Before invoking the call to perform to entropy coding the entropy context +* associated with the job needs to be initialized. This involves the start +* mb address, end mb address, slice index and the pointer to location at +* which the mb residue info and mb header info are packed. +* +* @param[in] ps_proc +* Pointer to the current process context +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +IH264E_ERROR_T isvce_init_entropy_ctxt(isvce_process_ctxt_t *ps_proc); + +/** +******************************************************************************* +* +* @brief entry point for entropy coding +* +* @par Description +* This function calls lower level functions to perform entropy coding for a +* group (n rows) of mb's. After encoding 1 row of mb's, the function takes +* back the control, updates the ctxt and calls lower level functions again. +* This process is repeated till all the rows or group of mb's (which ever is +* minimum) are coded +* +* @param[in] ps_proc +* process context +* +* @returns error status +* +* @remarks +* NOTE : It is assumed that this routine is invoked at the start of a slice, +* so the slice header is generated by default. +* +******************************************************************************* +*/ +IH264E_ERROR_T isvce_entropy(isvce_process_ctxt_t *ps_proc); + +/** +******************************************************************************* +* +* @brief Packs header information of a mb in to a buffer +* +* @par Description: +* After the deciding the mode info of a macroblock, the syntax elements +* associated with the mb are packed and stored. The entropy thread unpacks +* this buffer and generates the end bit stream. +* +* @param[in] ps_proc +* Pointer to the current process context +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +IH264E_ERROR_T isvce_pack_header_data(isvce_process_ctxt_t *ps_proc); + +/** +******************************************************************************* +* +* @brief update process context after encoding an mb. This involves preserving +* the current mb information for later use, initialize the proc ctxt elements to +* encode next mb. +* +* @par Description: +* This function performs house keeping tasks after encoding an mb. +* After encoding an mb, various elements of the process context needs to be +* updated to encode the next mb. For instance, the source, recon and reference +* pointers, mb indices have to be adjusted to the next mb. The slice index of +* the current mb needs to be updated. If mb qp modulation is enabled, then if +* the qp changes the quant param structure needs to be updated. Also to +*encoding the next mb, the current mb info is used as part of mode prediction or +*mv prediction. Hence the current mb info has to preserved at top/top left/left +* locations. +* +* @param[in] ps_proc +* Pointer to the current process context +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +WORD32 isvce_update_proc_ctxt(isvce_process_ctxt_t *ps_proc); + +/** +******************************************************************************* +* +* @brief initialize process context. +* +* @par Description: +* Before dispatching the current job to process thread, the process context +* associated with the job is initialized. Usually every job aims to encode one +* row of mb's. Basing on the row indices provided by the job, the process +* context's buffer ptrs, slice indices and other elements that are necessary +* during core-coding are initialized. +* +* @param[in] ps_proc +* Pointer to the current process context +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +IH264E_ERROR_T isvce_init_proc_ctxt(isvce_process_ctxt_t *ps_proc); + +/** +******************************************************************************* +* +* @brief This function performs luma & chroma padding +* +* @par Description: +* +* @param[in] ps_proc +* Process context corresponding to the job +* +* @param[in] pu1_curr_pic_luma +* Pointer to luma buffer +* +* @param[in] pu1_curr_pic_chroma +* Pointer to chroma buffer +* +* @param[in] i4_mb_x +* mb index x +* +* @param[in] i4_mb_y +* mb index y +* +* @param[in] i4_pad_ht +* number of rows to be padded +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +IH264E_ERROR_T isvce_pad_recon_buffer(isvce_process_ctxt_t *ps_proc, UWORD8 *pu1_curr_pic_luma, + WORD32 i4_luma_stride, UWORD8 *pu1_curr_pic_chroma, + WORD32 i4_chroma_stride, WORD32 i4_mb_x, WORD32 i4_mb_y, + WORD32 i4_pad_ht); + +/** +******************************************************************************* +* +* @brief This function performs luma half pel planes generation +* +* @par Description: +* +* @param[in] ps_proc +* Process context corresponding to the job +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +IH264E_ERROR_T isvce_halfpel_generation(isvce_process_ctxt_t *ps_proc, UWORD8 *pu1_curr_pic_luma, + WORD32 i4_mb_x, WORD32 i4_mb_y); + +/** +******************************************************************************* +* +* @brief This function performs luma & chroma core coding for a set of mb's. +* +* @par Description: +* The mb to be coded is taken and is evaluated over a predefined set of modes +* (intra (i16, i4, i8)/inter (mv, skip)) for best cost. The mode with least +*cost is selected and using intra/inter prediction filters, prediction is +*carried out. The deviation between src and pred signal constitutes error +*signal. This error signal is transformed (hierarchical transform if necessary) +*and quantized. The quantized residue is packed in to entropy buffer for entropy +*coding. This is repeated for all the mb's enlisted under the job. +* +* @param[in] ps_proc +* Process context corresponding to the job +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +WORD32 isvce_process(isvce_process_ctxt_t *ps_proc); + +/** +******************************************************************************* +* +* @brief +* entry point of a spawned encoder thread +* +* @par Description: +* The encoder thread dequeues a proc/entropy job from the encoder queue and +* calls necessary routines. +* +* @param[in] pv_proc +* Process context corresponding to the thread +* +* @returns error status +* +* @remarks +* +******************************************************************************* +*/ +WORD32 isvce_process_thread(void *pv_proc); + +#endif diff --git a/encoder/svc/isvce_rate_control.c b/encoder/svc/isvce_rate_control.c new file mode 100644 index 0000000..1562f08 --- /dev/null +++ b/encoder/svc/isvce_rate_control.c @@ -0,0 +1,716 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** +******************************************************************************* +* @file +* isvce_rate_control.c +* +* @brief +* Contains api function definitions for h264 rate control +* +* @author +* ittiam +* +* @par List of Functions: +* - isvce_rc_init() +* - isvce_rc_get_picture_details() +* - isvce_rc_pre_enc() +* - isvce_update_rc_mb_info() +* - isvce_rc_get_buffer_status() +* - isvce_rc_post_enc() +* - isvce_update_rc_bits_info() +* +* @remarks +* None +* +******************************************************************************* +*/ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +#include "ih264_typedefs.h" +#include "irc_datatypes.h" +#include "iv2.h" +#include "ive2.h" +#include "isvce.h" +#include "isvc_defs.h" +#include "isvc_macros.h" +#include "isvc_structs.h" +#include "isvc_trans_quant_itrans_iquant.h" +#include "isvc_inter_pred_filters.h" +#include "isvc_mem_fns.h" +#include "ih264_padding.h" +#include "ih264_intra_pred_filters.h" +#include "ih264_deblk_edge_filters.h" +#include "isvc_common_tables.h" +#include "isvc_cabac_tables.h" +#include "isvce_defs.h" +#include "isvce_globals.h" +#include "irc_mem_req_and_acq.h" +#include "irc_cntrl_param.h" +#include "irc_frame_info_collector.h" +#include "irc_rate_control_api.h" +#include "ih264e_time_stamp.h" +#include "ih264e_modify_frm_rate.h" +#include "isvce_rate_control.h" +#include "ih264e_error.h" +#include "ih264e_time_stamp.h" +#include "ih264e_bitstream.h" +#include "ime_distortion_metrics.h" +#include "ime_defs.h" +#include "ime_structs.h" +#include "isvce_cabac_structs.h" +#include "isvce_structs.h" +#include "ih264e_utils.h" +#include "irc_trace_support.h" + +/*****************************************************************************/ +/* Function Definitions */ +/*****************************************************************************/ + +/** +******************************************************************************* +* +* @brief +* This function initializes rate control context and variables +* +* @par Description +* This function initializes rate control type, source and target frame rate, +* average and peak bitrate, intra-inter frame interval and initial +* quantization parameter +* +* @param[in] pv_rc_api +* Handle to rate control api +* +* @param[in] pv_frame_time +* Handle to frame time context +* +* @param[in] pv_time_stamp +* Handle to time stamp context +* +* @param[in] pv_pd_frm_rate +* Handle to pull down frame time context +* +* @param[in] u4_max_frm_rate +* Maximum frame rate +* +* @param[in] u4_src_frm_rate +* Source frame rate +* +* @param[in] u4_tgt_frm_rate +* Target frame rate +* +* @param[in] e_rate_control_type +* Rate control type +* +* @param[in] u4_avg_bit_rate +* Average bit rate +* +* @param[in] u4_peak_bit_rate +* Peak bit rate +* +* @param[in] u4_max_delay +* Maximum delay between frames +* +* @param[in] u4_intra_frame_interval +* Intra frame interval +* +* @param[in] pu1_init_qp +* Initial qp +* +* @param[in] i4_max_inter_frm_int +* Maximum inter frame interval +* +* @param[in] pu1_min_max_qp +* Array of min/max qp +* +* @param[in] u1_profile_level +* Encoder profile level +* +* @returns none +* +* @remarks +* +******************************************************************************* +*/ +void isvce_rc_init(void *pv_rc_api, void *pv_frame_time, void *pv_time_stamp, void *pv_pd_frm_rate, + UWORD32 u4_max_frm_rate, UWORD32 u4_src_frm_rate, UWORD32 u4_tgt_frm_rate, + rc_type_e e_rate_control_type, UWORD32 u4_avg_bit_rate, UWORD32 u4_peak_bit_rate, + UWORD32 u4_max_delay, UWORD32 u4_intra_frame_interval, WORD32 i4_inter_frm_int, + UWORD8 *pu1_init_qp, WORD32 i4_max_inter_frm_int, UWORD8 *pu1_min_max_qp, + UWORD8 u1_profile_level) +{ + // UWORD8 u1_is_mb_level_rc_on = 0; + UWORD32 au4_peak_bit_rate[2] = {0, 0}; + UWORD32 u4_min_bit_rate = 0; + WORD32 i4_is_gop_closed = 1; + // WORD32 i4_use_est_intra_sad = 1; + UWORD32 u4_src_ticks = 0; + UWORD32 u4_tgt_ticks = 0; + UWORD8 u1_level_idx = ih264e_get_lvl_idx(u1_profile_level); + UWORD32 u4_max_cpb_size = 1200 * gas_isvc_lvl_tbl[u1_level_idx].u4_max_cpb_size; + + /* Fill the params needed for the RC init */ + if(e_rate_control_type == CBR_NLDRC) + { + au4_peak_bit_rate[0] = u4_avg_bit_rate; + au4_peak_bit_rate[1] = u4_avg_bit_rate; + } + else + { + au4_peak_bit_rate[0] = u4_peak_bit_rate; + au4_peak_bit_rate[1] = u4_peak_bit_rate; + } + + /* Initialize frame time computation module*/ + ih264e_init_frame_time(pv_frame_time, u4_src_frm_rate, /* u4_src_frm_rate */ + u4_tgt_frm_rate); /* u4_tgt_frm_rate */ + + /* Initialize the pull_down frame rate */ + ih264e_init_pd_frm_rate(pv_pd_frm_rate, u4_src_frm_rate); /* u4_input_frm_rate */ + + /* Initialize time stamp structure */ + ih264e_init_time_stamp(pv_time_stamp, u4_max_frm_rate, /* u4_max_frm_rate */ + u4_src_frm_rate); /* u4_src_frm_rate */ + + u4_src_ticks = ih264e_frame_time_get_src_ticks(pv_frame_time); + u4_tgt_ticks = ih264e_frame_time_get_tgt_ticks(pv_frame_time); + + /* Init max_inter_frame int */ + i4_max_inter_frm_int = (i4_inter_frm_int == 1) ? 2 : (i4_inter_frm_int + 2); + + /* Initialize the rate control */ + irc_initialise_rate_control( + pv_rc_api, /* RC handle */ + e_rate_control_type, /* RC algo type */ + 0, /* MB activity on/off */ + u4_avg_bit_rate, /* Avg Bitrate */ + au4_peak_bit_rate, /* Peak bitrate array[2]:[I][P] */ + u4_min_bit_rate, /* Min Bitrate */ + u4_src_frm_rate, /* Src frame_rate */ + u4_max_delay, /* Max buffer delay */ + u4_intra_frame_interval, /* Intra frm_interval */ + i4_inter_frm_int, /* Inter frame interval */ + pu1_init_qp, /* Init QP array[3]:[I][P][B] */ + u4_max_cpb_size, /* Max VBV/CPB Buffer Size */ + i4_max_inter_frm_int, /* Max inter frm_interval */ + i4_is_gop_closed, /* Open/Closed GOP */ + pu1_min_max_qp, /* Min-max QP + array[6]:[Imax][Imin][Pmax][Pmin][Bmax][Bmin] */ + 0, /* How to calc the I-frame estimated_sad */ + u4_src_ticks, /* Src_ticks = LCM(src_frm_rate,tgt_frm_rate)/src_frm_rate + */ + u4_tgt_ticks); /* Tgt_ticks = LCM(src_frm_rate,tgt_frm_rate)/tgt_frm_rate + */ +} + +/** +******************************************************************************* +* +* @brief Function to get picture details +* +* @par Description +* This function returns the Picture type(I/P/B) +* +* @param[in] pv_rc_api +* Handle to Rate control api +* +* @returns +* Picture type +* +* @remarks none +* +******************************************************************************* +*/ +picture_type_e isvce_rc_get_picture_details(void *pv_rc_api, WORD32 *pi4_pic_id, + WORD32 *pi4_pic_disp_order_no) +{ + picture_type_e e_rc_pic_type = P_PIC; + + irc_get_picture_details(pv_rc_api, pi4_pic_id, pi4_pic_disp_order_no, &e_rc_pic_type); + + return (e_rc_pic_type); +} + +/** +******************************************************************************* +* +* @brief Function to get rate control output before encoding +* +* @par Description +* This function is called before queing the current frame. It decides if we +*should skip the current iput buffer due to frame rate mismatch. It also updates +*RC about the acehivble frame rate +* +* @param[in] ps_rate_control_api +* Handle to rate control api +* +* @param[in] ps_pd_frm_rate +* Handle to pull down frm rate context +* +* @param[in] ps_time_stamp +* Handle to time stamp context +* +* @param[in] ps_frame_time +* Handle to frame time context +* +* @param[in] i4_delta_time_stamp +* Time stamp difference between frames +* +* @param[in] i4_total_mb_in_frame +* Total Macro Blocks in frame +* +* @param[in/out] pe_vop_coding_type +* Picture coding type(I/P/B) +* +* @param[in/out] pu1_frame_qp +* QP for current frame +* +* @returns +* Skip or queue the current frame +* +* @remarks +* +******************************************************************************* +*/ +WORD32 isvce_update_rc_framerates(void *ps_rate_control_api, void *ps_pd_frm_rate, + void *ps_time_stamp, void *ps_frame_time) +{ + WORD8 i4_skip_src = 0; + UWORD32 u4_src_not_skipped_for_dts = 0; + + /* Update the time stamp for the current frame */ + ih264e_update_time_stamp(ps_time_stamp); + + /* Check if a src not needs to be skipped */ + i4_skip_src = ih264e_should_src_be_skipped(ps_frame_time, 1, &u4_src_not_skipped_for_dts); + + if(i4_skip_src) + { + /*********************************************************************** + *Based on difference in source and target frame rate frames are skipped + ***********************************************************************/ + /*update the missing frames frm_rate with 0 */ + ih264e_update_pd_frm_rate(ps_pd_frm_rate, 0); + } + else + { + WORD32 i4_avg_frm_rate, i4_source_frame_rate; + + i4_source_frame_rate = ih264e_frame_time_get_src_frame_rate(ps_frame_time); + + /* Update the frame rate of the frame present with the tgt_frm_rate */ + /* If the frm was not skipped due to delta_time_stamp, update the + frame_rate with double the tgt_frame_rate value, so that it makes + up for one of the frames skipped by the application */ + ih264e_update_pd_frm_rate(ps_pd_frm_rate, i4_source_frame_rate); + + /* Based on the update get the average frame rate */ + i4_avg_frm_rate = ih264e_get_pd_avg_frm_rate(ps_pd_frm_rate); + + /* Call the RC library function to change the frame_rate to the + actually achieved frm_rate */ + irc_change_frm_rate_for_bit_alloc(ps_rate_control_api, i4_avg_frm_rate); + } + + return (i4_skip_src); +} + +/** +******************************************************************************* +* +* @brief Function to update mb info for rate control context +* +* @par Description +* After encoding a mb, information such as mb type, qp used, mb distortion +* resulted in encoding the block and so on needs to be preserved for modeling +* RC. This is preserved via this function call. +* +* @param[in] ps_frame_info +* Handle Frame info context +* +* @param[in] ps_proc +* Process context +* +* @returns +* +* @remarks +* +******************************************************************************* +*/ +void isvce_update_rc_mb_info(frame_info_t *ps_frame_info, void *pv_proc) +{ + /* proc ctxt */ + isvce_process_ctxt_t *ps_proc = pv_proc; + + /* is intra or inter */ + WORD32 mb_type = !ps_proc->ps_mb_info->u1_is_intra; + + /* distortion */ + ps_frame_info->tot_mb_sad[mb_type] += ps_proc->i4_mb_distortion; + + /* qp */ + ps_frame_info->qp_sum[mb_type] += gau1_h264_to_mpeg2_qmap[ps_proc->u1_mb_qp]; + + /* mb cnt */ + ps_frame_info->num_mbs[mb_type]++; + + /* cost */ + if(ps_proc->ps_mb_info->u1_is_intra) + { + ps_frame_info->intra_mb_cost_sum += ps_proc->i4_mb_cost; + } +} + +/** +******************************************************************************* +* +* @brief Function to get rate control buffer status +* +* @par Description +* This function is used to get buffer status(underflow/overflow) by rate +* control module +* +* @param[in] pv_rc_api +* Handle to rate control api context +* +* @param[in] i4_total_frame_bits +* Total frame bits +* +* @param[in] u1_pic_type +* Picture type +* +* @param[in] pi4_num_bits_to_prevent_vbv_underflow +* Number of bits to prevent underflow +* +* @param[out] pu1_is_enc_buf_overflow +* Buffer overflow indication flag +* +* @param[out] pu1_is_enc_buf_underflow +* Buffer underflow indication flag +* +* @returns +* +* @remarks +* +******************************************************************************* +*/ +void isvce_rc_get_buffer_status(void *pv_rc_api, WORD32 i4_total_frame_bits, + picture_type_e e_pic_type, + WORD32 *pi4_num_bits_to_prevent_vbv_underflow, + UWORD8 *pu1_is_enc_buf_overflow, UWORD8 *pu1_is_enc_buf_underflow) +{ + vbv_buf_status_e e_vbv_buf_status = VBV_NORMAL; + + e_vbv_buf_status = irc_get_buffer_status(pv_rc_api, i4_total_frame_bits, e_pic_type, + pi4_num_bits_to_prevent_vbv_underflow); + + if(e_vbv_buf_status == VBV_OVERFLOW) + { + *pu1_is_enc_buf_underflow = 1; + *pu1_is_enc_buf_overflow = 0; + } + else if(e_vbv_buf_status == VBV_UNDERFLOW) + { + *pu1_is_enc_buf_underflow = 0; + *pu1_is_enc_buf_overflow = 1; + } + else + { + *pu1_is_enc_buf_underflow = 0; + *pu1_is_enc_buf_overflow = 0; + } +} + +/** +******************************************************************************* +* +* @brief Function to update rate control module after encoding +* +* @par Description +* This function is used to update the rate control module after the current +* frame encoding is done with details such as bits consumed, SAD for I/P/B, +* intra cost ,mb type and other +* +* @param[in] ps_rate_control_api +* Handle to rate control api context +* +* @param[in] ps_frame_info +* Handle to frame info context +* +* @param[in] ps_pd_frm_rate +* Handle to pull down frame rate context +* +* @param[in] ps_time_stamp +* Handle to time stamp context +* +* @param[in] ps_frame_time +* Handle to frame time context +* +* @param[in] i4_total_mb_in_frame +* Total mb in frame +* +* @param[in] pe_vop_coding_type +* Picture coding type +* +* @param[in] i4_is_first_frame +* Is first frame +* +* @param[in] pi4_is_post_encode_skip +* Post encoding skip flag +* +* @param[in] u1_frame_qp +* Frame qp +* +* @param[in] pi4_num_intra_in_prev_frame +* Numberf of intra mbs in previous frame +* +* @param[in] pi4_avg_activity +* Average activity +* +* @returns +* +* @remarks +* +******************************************************************************* +*/ +WORD32 isvce_rc_post_enc(void *ps_rate_control_api, frame_info_t *ps_frame_info, + void *ps_pd_frm_rate, void *ps_time_stamp, void *ps_frame_time, + WORD32 i4_total_mb_in_frame, picture_type_e *pe_vop_coding_type, + WORD32 i4_is_first_frame, WORD32 *pi4_is_post_encode_skip, + UWORD8 u1_frame_qp, WORD32 *pi4_num_intra_in_prev_frame, + WORD32 *pi4_avg_activity +#if ENABLE_RE_ENC_AS_SKIP + , + UWORD8 *u1_is_post_enc_skip +#endif +) +{ + /* Variables for the update_frm_level_info */ + WORD32 ai4_tot_mb_in_type[MAX_MB_TYPE]; + WORD32 ai4_tot_mb_type_qp[MAX_MB_TYPE] = {0, 0}; + WORD32 ai4_mb_type_sad[MAX_MB_TYPE] = {0, 0}; + WORD32 ai4_mb_type_tex_bits[MAX_MB_TYPE] = {0, 0}; + WORD32 i4_total_frame_bits = 0; + WORD32 i4_total_hdr_bits = 0; + WORD32 i4_total_texturebits; + WORD32 i4_avg_mb_activity = 0; + WORD32 i4_intra_frm_cost = 0; + UWORD8 u1_is_scd = 0; + WORD32 i4_cbr_bits_to_stuff = 0; + UWORD32 u4_num_intra_in_prev_frame = *pi4_num_intra_in_prev_frame; + + UNUSED(ps_pd_frm_rate); + UNUSED(ps_time_stamp); + UNUSED(ps_frame_time); + UNUSED(u1_frame_qp); + UNUSED(i4_is_first_frame); + /* Accumulate RC stats */ + ai4_tot_mb_in_type[MB_TYPE_INTRA] = irc_fi_get_total_mb(ps_frame_info, MB_TYPE_INTRA); + ai4_tot_mb_in_type[MB_TYPE_INTER] = irc_fi_get_total_mb(ps_frame_info, MB_TYPE_INTER); + ai4_tot_mb_type_qp[MB_TYPE_INTRA] = irc_fi_get_total_mb_qp(ps_frame_info, MB_TYPE_INTRA); + ai4_tot_mb_type_qp[MB_TYPE_INTER] = irc_fi_get_total_mb_qp(ps_frame_info, MB_TYPE_INTER); + ai4_mb_type_sad[MB_TYPE_INTRA] = irc_fi_get_total_mb_sad(ps_frame_info, MB_TYPE_INTRA); + ai4_mb_type_sad[MB_TYPE_INTER] = irc_fi_get_total_mb_sad(ps_frame_info, MB_TYPE_INTER); + i4_intra_frm_cost = irc_fi_get_total_intra_mb_cost(ps_frame_info); + i4_avg_mb_activity = irc_fi_get_avg_activity(ps_frame_info); + i4_total_hdr_bits = irc_fi_get_total_header_bits(ps_frame_info); + i4_total_texturebits = irc_fi_get_total_mb_texture_bits(ps_frame_info, MB_TYPE_INTRA); + i4_total_texturebits += irc_fi_get_total_mb_texture_bits(ps_frame_info, MB_TYPE_INTER); + i4_total_frame_bits = i4_total_hdr_bits + i4_total_texturebits; + + *pi4_avg_activity = i4_avg_mb_activity; + + /* Texture bits are not accumulated. Hence subtracting hdr bits from total + * bits */ + ai4_mb_type_tex_bits[MB_TYPE_INTRA] = 0; + ai4_mb_type_tex_bits[MB_TYPE_INTER] = i4_total_frame_bits - i4_total_hdr_bits; + + /* Set post encode skip to zero */ + pi4_is_post_encode_skip[0] = 0; + + /* For NLDRC, get the buffer status for stuffing or skipping */ + if(irc_get_rc_type(ps_rate_control_api) == CBR_NLDRC) + { + WORD32 i4_get_num_bit_to_prevent_vbv_overflow; + UWORD8 u1_enc_buf_overflow, u1_enc_buf_underflow; + + /* Getting the buffer status */ + isvce_rc_get_buffer_status(ps_rate_control_api, i4_total_frame_bits, pe_vop_coding_type[0], + &i4_get_num_bit_to_prevent_vbv_overflow, &u1_enc_buf_overflow, + &u1_enc_buf_underflow); + + /* We skip the frame if decoder buffer is underflowing. But we never skip + * first I frame */ +#if !DISABLE_POST_ENC_SKIP + if((u1_enc_buf_overflow == 1) && (i4_is_first_frame != 1)) + // if ((u1_enc_buf_overflow == 1) && (i4_is_first_frame != 0)) + { + irc_post_encode_frame_skip(ps_rate_control_api, (picture_type_e) pe_vop_coding_type[0]); + // i4_total_frame_bits = imp4_write_skip_frame_header(ps_enc); + i4_total_frame_bits = 0; + + *pi4_is_post_encode_skip = 1; + + /* Adjust the GOP if in case we skipped an I-frame */ + if(*pe_vop_coding_type == I_PIC) irc_force_I_frame(ps_rate_control_api); + + /* Since this frame is skipped by writing 7 bytes header, we say this is a + * P frame */ + // *pe_vop_coding_type = P; + + /* Getting the buffer status again,to check if it underflows */ + irc_get_buffer_status(ps_rate_control_api, i4_total_frame_bits, + (picture_type_e) pe_vop_coding_type[0], + &i4_get_num_bit_to_prevent_vbv_overflow); + } +#endif + +#if ENABLE_RE_ENC_AS_SKIP + /* Check for VBV constraints - post encode skip */ + if(u1_enc_buf_overflow == 1 && (pe_vop_coding_type[0] != I_PIC)) + { + *u1_is_post_enc_skip = 1; + + ai4_tot_mb_in_type[MB_TYPE_INTER] += ai4_tot_mb_in_type[MB_TYPE_INTRA]; + ai4_tot_mb_in_type[MB_TYPE_INTRA] = 0; + ai4_tot_mb_type_qp[MB_TYPE_INTER] += ai4_tot_mb_type_qp[MB_TYPE_INTRA]; + ai4_tot_mb_type_qp[MB_TYPE_INTRA] = 0; + + ai4_mb_type_sad[MB_TYPE_INTER] += ai4_mb_type_sad[MB_TYPE_INTRA]; + ai4_mb_type_sad[MB_TYPE_INTRA] = 0; + + i4_intra_frm_cost = 0; + + i4_total_hdr_bits = 0; + i4_total_texturebits = 0; + i4_total_frame_bits = i4_total_hdr_bits + i4_total_texturebits; + + ai4_mb_type_tex_bits[MB_TYPE_INTRA] = 0; + ai4_mb_type_tex_bits[MB_TYPE_INTER] = i4_total_frame_bits - i4_total_hdr_bits; + + /* Getting the buffer status again,to check if it underflows */ + irc_get_buffer_status(ps_rate_control_api, i4_total_frame_bits, + (picture_type_e) pe_vop_coding_type[0], + &i4_get_num_bit_to_prevent_vbv_overflow); + } +#endif + + /* In this case we stuff bytes as buffer is overflowing */ + if(u1_enc_buf_underflow == 1) + { + /* The stuffing function is directly pulled out from split controller + workspace. encode_vop_data() function makes sure alignment data is + dumped at the end of a frame. Split controller was identifying this + alignment byte, overwriting it with the stuff data and then finally + aligning the buffer. Here every thing is inside the DSP. So, ideally + encode_vop_data needn't align, and we can start stuffing directly. But + in that case, it'll break the logic for a normal frame. Hence for + simplicity, not changing this part since it is ok to align and then + overwrite since stuffing is not done for every frame */ + i4_cbr_bits_to_stuff = irc_get_bits_to_stuff(ps_rate_control_api, i4_total_frame_bits, + pe_vop_coding_type[0]); + + /* Just add extra 32 bits to make sure we don't stuff lesser */ + i4_cbr_bits_to_stuff += 32; + + /* We can not stuff more than the outbuf size. So have a check here */ + /* Add stuffed bits to total bits */ + i4_total_frame_bits += i4_cbr_bits_to_stuff; + } + } + + /* If number of intra MBs are more than 2/3rd of total MBs, assume it as a + * scene change */ + if((ai4_tot_mb_in_type[MB_TYPE_INTRA] > ((2 * i4_total_mb_in_frame) / 3)) && + (*pe_vop_coding_type == P_PIC) && + (ai4_tot_mb_in_type[MB_TYPE_INTRA] > ((11 * (WORD32) u4_num_intra_in_prev_frame) / 10))) + { + u1_is_scd = 1; + } + + /* Update num intra mbs of this frame */ + if(pi4_is_post_encode_skip[0] == 0) + { + *pi4_num_intra_in_prev_frame = ai4_tot_mb_in_type[MB_TYPE_INTRA]; + } + + /* Reset intra count to zero, if u encounter an I frame */ + if(*pe_vop_coding_type == I_PIC) + { + *pi4_num_intra_in_prev_frame = 0; + } + + /* Do an update of rate control after post encode */ + irc_update_frame_level_info(ps_rate_control_api, /* RC state */ + pe_vop_coding_type[0], /* PIC type */ + ai4_mb_type_sad, /* SAD for [Intra/Inter] */ + i4_total_frame_bits, /* Total frame bits */ + i4_total_hdr_bits, /* header bits for */ + ai4_mb_type_tex_bits, /* for MB[Intra/Inter] */ + ai4_tot_mb_type_qp, /* for MB[Intra/Inter] */ + ai4_tot_mb_in_type, /* for MB[Intra/Inter] */ + i4_avg_mb_activity, /* Average mb activity in frame */ + u1_is_scd, /* Is a scene change detected */ + 0, /* Pre encode skip */ + (WORD32) i4_intra_frm_cost, /* Intra cost for frame */ + 0); /* Not done outside */ + + return (i4_cbr_bits_to_stuff >> 3); +} + +/** +******************************************************************************* +* +* @brief Function to update bits consumed info to rate control context +* +* @par Description +* Function to update bits consume info to rate control context +* +* @param[in] ps_frame_info +* Frame info context +* +* @param[in] ps_entropy +* Entropy context +* +* @returns +* total bits consumed by the frame +* +* @remarks +* +******************************************************************************* +*/ +void isvce_update_rc_bits_info(frame_info_t *ps_frame_info, void *pv_entropy) +{ + isvce_entropy_ctxt_t *ps_entropy = pv_entropy; + + ps_frame_info->mb_header_bits[MB_TYPE_INTRA] += ps_entropy->u4_header_bits[MB_TYPE_INTRA]; + + ps_frame_info->mb_texture_bits[MB_TYPE_INTRA] += ps_entropy->u4_residue_bits[MB_TYPE_INTRA]; + + ps_frame_info->mb_header_bits[MB_TYPE_INTER] += ps_entropy->u4_header_bits[MB_TYPE_INTER]; + + ps_frame_info->mb_texture_bits[MB_TYPE_INTER] += ps_entropy->u4_residue_bits[MB_TYPE_INTER]; + + return; +} diff --git a/encoder/svc/isvce_rate_control.h b/encoder/svc/isvce_rate_control.h new file mode 100644 index 0000000..26e0a98 --- /dev/null +++ b/encoder/svc/isvce_rate_control.h @@ -0,0 +1,330 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** +******************************************************************************* +* @file +* isvce_rate_control.h +* +* @brief +* This file contains function declarations of api functions for h264 rate +* control +* +* @author +* ittiam +* +* @remarks +* None +* +******************************************************************************* +*/ + +#ifndef _ISVCE_RATE_CONTROL_H_ +#define _ISVCE_RATE_CONTROL_H_ + +#if ENABLE_RE_ENC_AS_SKIP +#include "isvce_structs.h" +#endif + +/*****************************************************************************/ +/* Function Declarations */ +/*****************************************************************************/ + +/** +******************************************************************************* +* +* @brief +* This function initializes rate control context and variables +* +* @par Description +* This function initializes rate control type, source and target frame rate, +* average and peak bitrate, intra-inter frame interval and initial +* quantization parameter +* +* @param[in] pv_rc_api +* Handle to rate control api +* +* @param[in] pv_frame_time +* Handle to frame time context +* +* @param[in] pv_time_stamp +* Handle to time stamp context +* +* @param[in] pv_pd_frm_rate +* Handle to pull down frame time context +* +* @param[in] u4_max_frm_rate +* Maximum frame rate +* +* @param[in] u4_src_frm_rate +* Source frame rate +* +* @param[in] u4_tgt_frm_rate +* Target frame rate +* +* @param[in] e_rate_control_type +* Rate control type +* +* @param[in] u4_avg_bit_rate +* Average bit rate +* +* @param[in] u4_peak_bit_rate +* Peak bit rate +* +* @param[in] u4_max_delay +* Maximum delay between frames +* +* @param[in] u4_intra_frame_interval +* Intra frame interval +* +* @param[in] i4_inter_frm_int +* Inter frame interval +* +* @param[in] pu1_init_qp +* Initial qp +* +* @param[in] i4_max_inter_frm_int +* Maximum inter frame interval +* +* @param[in] pu1_min_max_qp +* Array of min/max qp +* +* @param[in] u1_profile_level +* Encoder profile level +* +* @returns none +* +* @remarks +* +******************************************************************************* +*/ +void isvce_rc_init(void *pv_rc_api, void *pv_frame_time, void *pv_time_stamp, void *pv_pd_frm_rate, + UWORD32 u4_max_frm_rate, UWORD32 u4_src_frm_rate, UWORD32 u4_tgt_frm_rate, + rc_type_e e_rate_control_type, UWORD32 u4_avg_bit_rate, UWORD32 u4_peak_bit_rate, + UWORD32 u4_max_delay, UWORD32 u4_intra_frame_interval, WORD32 i4_inter_frm_int, + UWORD8 *pu1_init_qp, WORD32 i4_max_inter_frm_int, UWORD8 *pu1_min_max_qp, + UWORD8 u1_profile_level); + +/** +******************************************************************************* +* +* @brief Function to get picture details +* +* @par Description +* This function returns the Picture type(I/P/B) +* +* @param[in] pv_rc_api +* Handle to Rate control api +* +* @returns +* Picture type +* +* @remarks none +* +******************************************************************************* +*/ +picture_type_e isvce_rc_get_picture_details(void *pv_rc_api, WORD32 *pi4_pic_id, + WORD32 *pi4_pic_disp_order_no); + +/** +******************************************************************************* +* +* @brief Function to set frame rate inside RC. +* +* @par Description +* This function is called before encoding the current frame and gets the qp +* for the current frame from rate control module +* +* @param[in] ps_rate_control_api +* Handle to rate control api +* +* @param[in] ps_pd_frm_rate +* Handle to pull down frm rate context +* +* @param[in] ps_time_stamp +* Handle to time stamp context +* +* @param[in] ps_frame_time +* Handle to frame time context +* +* @returns +* Skip or encode the current frame +* +* @remarks +* +******************************************************************************* +*/ +WORD32 isvce_update_rc_framerates(void *ps_rate_control_api, void *ps_pd_frm_rate, + void *ps_time_stamp, void *ps_frame_time); + +/** +******************************************************************************* +* +* @brief Function to update mb info for rate control context +* +* @par Description +* After encoding a mb, information such as mb type, qp used, mb distortion +* resulted in encoding the block and so on needs to be preserved for modelling +* RC. This is preserved via this function call. +* +* @param[in] ps_frame_info +* Handle Frame info context +* +* @param[in] ps_proc +* Process context +* +* @returns +* +* @remarks +* +******************************************************************************* +*/ +void isvce_update_rc_mb_info(frame_info_t *ps_frame_info, void *pv_proc); + +/** +******************************************************************************* +* +* @brief Function to get rate control buffer status +* +* @par Description +* This function is used to get buffer status(underflow/overflow) by rate +* control module +* +* @param[in] pv_rc_api +* Handle to rate control api context +* +* @param[in] i4_total_frame_bits +* Total frame bits +* +* @param[in] u1_pic_type +* Picture type +* +* @param[in] pi4_num_bits_to_prevent_vbv_underflow +* Number of bits to prevent underflow +* +* @param[out] pu1_is_enc_buf_overflow +* Buffer overflow indication flag +* +* @param[out] pu1_is_enc_buf_underflow +* Buffer underflow indication flag +* +* @returns +* +* @remarks +* +******************************************************************************* +*/ +void isvce_rc_get_buffer_status(void *pv_rc_api, WORD32 i4_total_frame_bits, + picture_type_e e_pic_type, + WORD32 *pi4_num_bits_to_prevent_vbv_underflow, + UWORD8 *pu1_is_enc_buf_overflow, UWORD8 *pu1_is_enc_buf_underflow); + +/** +******************************************************************************* +* +* @brief Function to update rate control module after encoding +* +* @par Description +* This function is used to update the rate control module after the current +* frame encoding is done with details such as bits consumed, SAD for I/P/B, +* intra cost ,mb type and other +* +* @param[in] ps_rate_control_api +* Handle to rate control api context +* +* @param[in] ps_frame_info +* Handle to frame info context +* +* @param[in] ps_pd_frm_rate +* Handle to pull down frame rate context +* +* @param[in] ps_time_stamp +* Handle to time stamp context +* +* @param[in] ps_frame_time +* Handle to frame time context +* +* @param[in] i4_total_mb_in_frame +* Total mb in frame +* +* @param[in] pe_vop_coding_type +* Picture coding type +* +* @param[in] i4_is_first_frame +* Is first frame +* +* @param[in] pi4_is_post_encode_skip +* Post encoding skip flag +* +* @param[in] u1_frame_qp +* Frame qp +* +* @param[in] pi4_num_intra_in_prev_frame +* Number of intra mbs in previous frame +* +* @param[in] pi4_avg_activity +* Average activity +* +* @returns +* +* @remarks +* +******************************************************************************* +*/ +#if ENABLE_RE_ENC_AS_SKIP +WORD32 isvce_rc_post_enc(void *ps_rate_control_api, frame_info_t *ps_frame_info, + void *ps_pd_frm_rate, void *ps_time_stamp, void *ps_frame_time, + WORD32 i4_total_mb_in_frame, picture_type_e *pe_vop_coding_type, + WORD32 i4_is_first_frame, WORD32 *pi4_is_post_encode_skip, + UWORD8 u1_frame_qp, WORD32 *pi4_num_intra_in_prev_frame, + WORD32 *pi4_avg_activity, UWORD8 *u1_is_post_enc_skip); +#else +WORD32 isvce_rc_post_enc(void *ps_rate_control_api, frame_info_t *ps_frame_info, + void *ps_pd_frm_rate, void *ps_time_stamp, void *ps_frame_time, + WORD32 i4_total_mb_in_frame, picture_type_e *pe_vop_coding_type, + WORD32 i4_is_first_frame, WORD32 *pi4_is_post_encode_skip, + UWORD8 u1_frame_qp, WORD32 *pi4_num_intra_in_prev_frame, + WORD32 *pi4_avg_activity); + +#endif +/** +******************************************************************************* +* +* @brief Function to update bits consumed info to rate control context +* +* @par Description +* Function to update bits consume info to rate control context +* +* @param[in] ps_frame_info +* Frame info context +* +* @param[in] ps_entropy +* Entropy context +* +* @returns +* total bits consumed by the frame +* +* @remarks +* +******************************************************************************* +*/ +void isvce_update_rc_bits_info(frame_info_t *ps_frame_info, void *pv_entropy); + +#endif diff --git a/encoder/svc/isvce_rc_mem_interface.c b/encoder/svc/isvce_rc_mem_interface.c new file mode 100644 index 0000000..b021781 --- /dev/null +++ b/encoder/svc/isvce_rc_mem_interface.c @@ -0,0 +1,325 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** +****************************************************************************** +* @file +* isvce_rc_mem_interface.c +* +* @brief +* This file contains api function definitions for rate control memtabs +* +* @author +* ittiam +* +* List of Functions +* - fill_memtab() +* - use_or_fill_base() +* - isvce_map_rc_mem_recs_to_itt_api() +* - isvce_map_itt_mem_rec_to_rc_mem_rec() +* - isvce_get_rate_control_mem_tab() +* +* @remarks +* None +* +******************************************************************************* +*/ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System include files */ +#include +#include +#include +#include +#include +#include + +/* User Include Files */ +#include "ih264e_config.h" +#include "ih264_typedefs.h" +#include "ih264_size_defs.h" +#include "iv2.h" +#include "ive2.h" +#include "ime_distortion_metrics.h" +#include "ime_defs.h" +#include "ime_structs.h" +#include "isvce.h" +#include "ithread.h" +#include "isvc_defs.h" +#include "ih264_debug.h" +#include "isvc_macros.h" +#include "ih264_platform_macros.h" +#include "ih264_error.h" +#include "isvc_structs.h" +#include "isvc_trans_quant_itrans_iquant.h" +#include "isvc_inter_pred_filters.h" +#include "isvc_mem_fns.h" +#include "ih264_padding.h" +#include "ih264_intra_pred_filters.h" +#include "ih264_deblk_edge_filters.h" +#include "isvc_common_tables.h" +#include "ih264_list.h" +#include "isvc_cabac_tables.h" +#include "ih264e_error.h" +#include "isvce_defs.h" +#include "ih264e_bitstream.h" +#include "irc_cntrl_param.h" +#include "irc_frame_info_collector.h" +#include "isvce_rate_control.h" +#include "isvce_cabac_structs.h" +#include "isvce_structs.h" +#include "ih264e_master.h" +#include "ih264_buf_mgr.h" +#include "ih264_dpb_mgr.h" +#include "isvce_utils.h" +#include "ih264e_platform_macros.h" +#include "ih264_cavlc_tables.h" +#include "ih264e_statistics.h" +#include "ih264e_trace.h" +#include "ih264e_fmt_conv.h" +#include "isvce_cavlc.h" +#include "ih264e_rc_mem_interface.h" +#include "isvce_rc_mem_interface.h" +#include "ih264e_time_stamp.h" +#include "irc_common.h" +#include "irc_rd_model.h" +#include "irc_est_sad.h" +#include "irc_fixed_point_error_bits.h" +#include "irc_vbr_storage_vbv.h" +#include "irc_picture_type.h" +#include "irc_bit_allocation.h" +#include "irc_mb_model_based.h" +#include "irc_cbr_buffer_control.h" +#include "irc_vbr_str_prms.h" +#include "irc_rate_control_api.h" +#include "irc_rate_control_api_structs.h" +#include "ih264e_modify_frm_rate.h" + +/*****************************************************************************/ +/* Function Definitions */ +/*****************************************************************************/ + +/** +****************************************************************************** +* +* @brief This function maps rc mem records structure to encoder lib mem records +* structure +* +* @par Description +* This function maps rc mem records structure to encoder lib mem records +* structure +* +* @param[in] ps_mem +* pointer to encoder lib mem records +* +* @param[in] rc_memtab +* pointer to rc mem records +* +* @param[in] num_mem_recs +* number of memory records +* +* @return void +* +****************************************************************************** +*/ +static void isvce_map_rc_mem_recs_to_itt_api(iv_mem_rec_t *ps_mem, itt_memtab_t *rc_memtab, + UWORD32 num_mem_recs) +{ + UWORD32 j; + UWORD32 Size, align; + + for(j = 0; j < num_mem_recs; j++) + { + Size = rc_memtab->u4_size; + align = rc_memtab->i4_alignment; + + /* we always ask for external persistent cacheable memory */ + FILL_MEMTAB(ps_mem, j, Size, align, IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM); + + rc_memtab++; + } +} + +/** +******************************************************************************* +* +* @brief This function maps encoder lib mem records structure to RC memory +* records structure +* +* @par Description +* This function maps encoder lib mem records structure to RC memory +* records structure +* +* @param[in] ps_mem +* pointer to encoder lib mem records +* +* @param[in] rc_memtab +* pointer to rc mem records +* +* @param[in] num_mem_recs +* Number of memory records + +* @returns none +* +* @remarks +* +******************************************************************************* +*/ +static void isvce_map_itt_mem_rec_to_rc_mem_rec(iv_mem_rec_t *ps_mem, itt_memtab_t *rc_memtab, + UWORD32 num_mem_recs) +{ + UWORD32 i; + + for(i = 0; i < num_mem_recs; i++) + { + rc_memtab->i4_alignment = ps_mem->u4_mem_alignment; + rc_memtab->u4_size = ps_mem->u4_mem_size; + rc_memtab->pv_base = ps_mem->pv_base; + + /* only DDR memory is available */ + rc_memtab->e_mem_region = DDR; + rc_memtab->e_usage = PERSISTENT; + + rc_memtab++; + ps_mem++; + } +} + +/** +****************************************************************************** +* +* @brief Get memtabs for rate control +* +* @par Description +* This routine is used to Get/init memtabs for rate control +* +* @param[in] pv_rate_control +* pointer to rate control context (handle) +* +* @param[in] ps_mem +* pointer to encoder lib mem records +* +* @param[in] e_func_type +* enum that dictates fill memory records or Init memory records +* +* @return total number of mem records +* +****************************************************************************** +*/ +WORD32 isvce_get_rate_control_mem_tab(void *pv_rate_control, iv_mem_rec_t *ps_mem, + ITT_FUNC_TYPE_E e_func_type) +{ + itt_memtab_t as_itt_memtab[NUM_SVCE_RC_MEMTABS]; + WORD32 i4_num_memtab = 0, j = 0; + void *refptr2[RC_MEM_CNT]; + void **refptr1[RC_MEM_CNT]; + isvce_rate_control_ctxt_t *ps_rate_control = pv_rate_control; + + for(j = 0; j < RC_MEM_CNT; j++) refptr1[j] = &(refptr2[j]); + + j = 0; + + if(e_func_type == USE_BASE || e_func_type == FILL_BASE) + { + refptr1[RC_MEM_FRAME_TIME] = &ps_rate_control->pps_frame_time; + refptr1[RC_MEM_TIME_STAMP] = &ps_rate_control->pps_time_stamp; + refptr1[RC_MEM_FRAME_RATE] = &ps_rate_control->pps_pd_frm_rate; + refptr1[RC_MEM_API_L0] = &ps_rate_control->apps_rate_control_api[0]; + refptr1[RC_MEM_API_L1] = &ps_rate_control->apps_rate_control_api[1]; + refptr1[RC_MEM_API_L2] = &ps_rate_control->apps_rate_control_api[2]; + } + + /* Get the total number of memtabs used by Frame time Module */ + i4_num_memtab = ih264e_frame_time_get_init_free_memtab( + (frame_time_t **) refptr1[RC_MEM_FRAME_TIME], NULL, GET_NUM_MEMTAB); + /* Few extra steps during init */ + isvce_map_itt_mem_rec_to_rc_mem_rec((&ps_mem[j]), as_itt_memtab + j, i4_num_memtab); + /* Fill the memtabs used by Frame time Module */ + i4_num_memtab = ih264e_frame_time_get_init_free_memtab( + (frame_time_t **) refptr1[RC_MEM_FRAME_TIME], as_itt_memtab + j, e_func_type); + /* Mapping ittiam memtabs to App. memtabs */ + isvce_map_rc_mem_recs_to_itt_api((&ps_mem[j]), as_itt_memtab + j, i4_num_memtab); + j += i4_num_memtab; + + /* Get the total number of memtabs used by Time stamp Module */ + i4_num_memtab = ih264e_time_stamp_get_init_free_memtab( + (time_stamp_t **) refptr1[RC_MEM_TIME_STAMP], NULL, GET_NUM_MEMTAB); + /* Few extra steps during init */ + isvce_map_itt_mem_rec_to_rc_mem_rec((&ps_mem[j]), as_itt_memtab + j, i4_num_memtab); + /* Fill the memtabs used by Time Stamp Module */ + i4_num_memtab = ih264e_time_stamp_get_init_free_memtab( + (time_stamp_t **) refptr1[RC_MEM_TIME_STAMP], as_itt_memtab + j, e_func_type); + /* Mapping ittiam memtabs to App. memtabs */ + isvce_map_rc_mem_recs_to_itt_api((&ps_mem[j]), as_itt_memtab + j, i4_num_memtab); + j += i4_num_memtab; + + /* Get the total number of memtabs used by Frame rate Module */ + i4_num_memtab = ih264e_pd_frm_rate_get_init_free_memtab( + (pd_frm_rate_t **) refptr1[RC_MEM_FRAME_RATE], NULL, GET_NUM_MEMTAB); + /* Few extra steps during init */ + isvce_map_itt_mem_rec_to_rc_mem_rec((&ps_mem[j]), as_itt_memtab + j, i4_num_memtab); + /* Fill the memtabs used by Frame Rate Module */ + i4_num_memtab = ih264e_pd_frm_rate_get_init_free_memtab( + (pd_frm_rate_t **) refptr1[RC_MEM_FRAME_RATE], as_itt_memtab + j, e_func_type); + /* Mapping ittiam memtabs to App. memtabs */ + isvce_map_rc_mem_recs_to_itt_api((&ps_mem[j]), as_itt_memtab + j, i4_num_memtab); + j += i4_num_memtab; + + /* Get the total number of memtabs used by Rate Controller */ + i4_num_memtab = irc_rate_control_num_fill_use_free_memtab( + (rate_control_api_t **) refptr1[RC_MEM_API_L0], NULL, GET_NUM_MEMTAB); + /* Few extra steps during init */ + isvce_map_itt_mem_rec_to_rc_mem_rec((&ps_mem[j]), as_itt_memtab + j, i4_num_memtab); + /* Fill the memtabs used by Rate Controller */ + i4_num_memtab = irc_rate_control_num_fill_use_free_memtab( + (rate_control_api_t **) refptr1[RC_MEM_API_L0], as_itt_memtab + j, e_func_type); + /* Mapping ittiam memtabs to App. memtabs */ + isvce_map_rc_mem_recs_to_itt_api((&ps_mem[j]), as_itt_memtab + j, i4_num_memtab); + j += i4_num_memtab; + + /* Get the total number of memtabs used by Rate Controller */ + i4_num_memtab = irc_rate_control_num_fill_use_free_memtab( + (rate_control_api_t **) refptr1[RC_MEM_API_L1], NULL, GET_NUM_MEMTAB); + /* Few extra steps during init */ + isvce_map_itt_mem_rec_to_rc_mem_rec((&ps_mem[j]), as_itt_memtab + j, i4_num_memtab); + /* Fill the memtabs used by Rate Controller */ + i4_num_memtab = irc_rate_control_num_fill_use_free_memtab( + (rate_control_api_t **) refptr1[RC_MEM_API_L1], as_itt_memtab + j, e_func_type); + /* Mapping ittiam memtabs to App. memtabs */ + isvce_map_rc_mem_recs_to_itt_api((&ps_mem[j]), as_itt_memtab + j, i4_num_memtab); + j += i4_num_memtab; + + /* Get the total number of memtabs used by Rate Controller */ + i4_num_memtab = irc_rate_control_num_fill_use_free_memtab( + (rate_control_api_t **) refptr1[RC_MEM_API_L2], NULL, GET_NUM_MEMTAB); + /* Few extra steps during init */ + isvce_map_itt_mem_rec_to_rc_mem_rec((&ps_mem[j]), as_itt_memtab + j, i4_num_memtab); + /* Fill the memtabs used by Rate Controller */ + i4_num_memtab = irc_rate_control_num_fill_use_free_memtab( + (rate_control_api_t **) refptr1[RC_MEM_API_L2], as_itt_memtab + j, e_func_type); + /* Mapping ittiam memtabs to App. memtabs */ + isvce_map_rc_mem_recs_to_itt_api((&ps_mem[j]), as_itt_memtab + j, i4_num_memtab); + j += i4_num_memtab; + + return j; /* Total MemTabs Needed by Rate Control Module */ +} diff --git a/encoder/svc/isvce_rc_mem_interface.h b/encoder/svc/isvce_rc_mem_interface.h new file mode 100644 index 0000000..ff7a475 --- /dev/null +++ b/encoder/svc/isvce_rc_mem_interface.h @@ -0,0 +1,77 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** +****************************************************************************** +* @file +* isvce_rc_mem_interface.h +* +* @brief +* This file contains function declaration and structures for rate control +* memtabs +* +* @author +* ittiam +* +* @remarks +* The rate control library is a global library across various codecs. It +* anticipates certain structures definitions. Those definitions are to be +* imported from global workspace. Instead of that, the structures needed for +* rc library are copied in to this file and exported to rc library. If the +* structures / enums / ... in the global workspace change, this file also needs +* to be modified accordingly. +* +****************************************************************************** +*/ +#ifndef _ISVCE_RC_MEM_INTERFACE_H_ +#define _ISVCE_RC_MEM_INTERFACE_H_ + +#include "ih264e_rc_mem_interface.h" + +/** + *************************************************************************** + * Enum to hold mem records in RC + **************************************************************************** + */ +typedef enum RC_MEM_TYPES_T +{ + RC_MEM_FRAME_TIME, + + RC_MEM_TIME_STAMP, + + RC_MEM_FRAME_RATE, + + RC_MEM_API_L0, + + RC_MEM_API_L1, + + RC_MEM_API_L2, + + RC_MEM_CNT + + /* + * Do not add anything below + */ +} RC_MEM_TYPES_T; + +extern WORD32 isvce_get_rate_control_mem_tab(void *pv_rate_control, iv_mem_rec_t *ps_mem, + ITT_FUNC_TYPE_E e_func_type); + +#endif diff --git a/encoder/svc/isvce_rc_utils.c b/encoder/svc/isvce_rc_utils.c new file mode 100644 index 0000000..907d247 --- /dev/null +++ b/encoder/svc/isvce_rc_utils.c @@ -0,0 +1,286 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** +******************************************************************************* +* @file +* isvce_rc_utils.c +* +* @brief +* Contains get gpp function required by the SVC encoder +* +* @author +* ittiam +* +* @par List of Functions: +* - isvce_get_gpp() +* - isvce_rc_utils_init() +* - isvce_get_rc_utils_data_size() +* - isvce_compute_gpp() +* - isvce_get_gpp_function_selector() +* +* @remarks +* None +* +******************************************************************************* +*/ + +#include "ih264_typedefs.h" +#include "ih264_macros.h" +#include "isvc_structs.h" +#include "isvce_rc_utils.h" +#include "isvce_rc_utils_private_defs.h" + +/** +******************************************************************************* +* +* @brief +* get gpp function +* +* @par Description: +* computes gradient per pixel value for a given frame +* +* @param[in] ps_input_buf +* pointer to yuv buffer properties +* +* @returns +* calculated gpp value +* +* @remarks +* none +* +******************************************************************************* +*/ + +static DOUBLE isvce_get_gpp(yuv_buf_props_t *ps_input_buf) +{ + UWORD32 i, j; + + DOUBLE d_gpp_y = 0; + DOUBLE d_gpp_u = 0; + DOUBLE d_gpp_v = 0; + + DOUBLE d_gpp = 0; + + UWORD32 u4_width = ps_input_buf->u4_width; + UWORD32 u4_height = ps_input_buf->u4_height; + + UWORD8 *pu1_input_buf = (UWORD8 *) ps_input_buf->as_component_bufs[0].pv_data; + WORD32 i4_input_stride = ps_input_buf->as_component_bufs[0].i4_data_stride; + + for(i = 0; i < u4_height - 1; i++) + { + for(j = 0; j < u4_width - 1; j++) + { + UWORD8 u1_cur_pix = pu1_input_buf[j]; + UWORD8 u1_bot_pix = pu1_input_buf[i4_input_stride + j]; + UWORD8 u1_right_pix = pu1_input_buf[j + 1]; + + d_gpp_y += (ABS(u1_cur_pix - u1_bot_pix) + ABS(u1_cur_pix - u1_right_pix)); + } + pu1_input_buf += i4_input_stride; + } + + pu1_input_buf = (UWORD8 *) ps_input_buf->as_component_bufs[1].pv_data; + i4_input_stride = ps_input_buf->as_component_bufs[1].i4_data_stride; + + for(i = 0; i < (u4_height >> 1) - 1; i++) + { + for(j = 0; j < u4_width - 2; j += 2) + { + UWORD8 u1_cur_pix = pu1_input_buf[j]; + UWORD8 u1_bot_pix = pu1_input_buf[i4_input_stride + j]; + UWORD8 u1_right_pix = pu1_input_buf[j + 2]; + + d_gpp_u += (ABS(u1_cur_pix - u1_bot_pix) + ABS(u1_cur_pix - u1_right_pix)); + + u1_cur_pix = pu1_input_buf[j + 1]; + u1_bot_pix = pu1_input_buf[i4_input_stride + j + 1]; + u1_right_pix = pu1_input_buf[j + 2 + 1]; + + d_gpp_v += (ABS(u1_cur_pix - u1_bot_pix) + ABS(u1_cur_pix - u1_right_pix)); + } + pu1_input_buf += i4_input_stride; + } + + d_gpp_y /= (u4_width * u4_height); + d_gpp_u /= ((u4_width >> 1) * (u4_height >> 1)); + d_gpp_v /= ((u4_width >> 1) * (u4_height >> 1)); + + d_gpp = (DOUBLE) ((4 * d_gpp_y) + d_gpp_u + d_gpp_v) / 6; + + return d_gpp; +} + +/** +******************************************************************************* +* +* @brief +* gets the memory size required for compute gpp +* +* @par Description: +* returns the memory required by the rc utils context and state structs +* for allocation. +* +* @returns +* +* @remarks +* +* +******************************************************************************* +*/ + +UWORD32 isvce_get_rc_utils_data_size() { return sizeof(svc_rc_utils_state_t); } + +/** +******************************************************************************* +* +* @brief +* compute gpp process +* +* @par Description: +* calls the function to compute gpp +* +* @param[in] ps_svc_rc_utils_ctxt +* pointer to svc rc utils context +* +* @param[in] ps_input_buf +* pointer to yuv buffer properties +* +* @returns +* calculated gpp value +* +* @remarks +* none +* +******************************************************************************* +*/ + +DOUBLE isvce_compute_gpp(svc_rc_utils_ctxt_t *ps_svc_rc_utils_ctxt, yuv_buf_props_t *ps_input_buf) +{ + svc_rc_utils_state_t *ps_rc_utils_state = + (svc_rc_utils_state_t *) ps_svc_rc_utils_ctxt->pv_rc_utils_state; + + return ps_rc_utils_state->pf_get_gpp(ps_input_buf); +} + +/** +******************************************************************************* +* +* @brief +* selects which function to call for get gpp based on e_arch +* +* @par Description: +* +* @param[in] ps_rc_utils_state +* pointer to svc rc utils state +* +* @param[in] e_arch +* architecure type +* +* @returns +* +* @remarks +* +******************************************************************************* +*/ + +static void isvce_get_gpp_function_selector(svc_rc_utils_state_t *ps_rc_utils_state, + IV_ARCH_T e_arch) +{ + switch(e_arch) + { +#if defined(X86) + case ARCH_X86_SSE42: + { + ps_rc_utils_state->pf_get_gpp = isvce_get_gpp_sse42; + + break; + } +#elif defined(ARMV8) + case ARCH_ARM_A53: + case ARCH_ARM_A57: + case ARCH_ARM_V8_NEON: + { + ps_rc_utils_state->pf_get_gpp = isvce_get_gpp_neon; + + break; + } +#elif !defined(DISABLE_NEON) + case ARCH_ARM_A9Q: + case ARCH_ARM_A9A: + case ARCH_ARM_A9: + case ARCH_ARM_A7: + case ARCH_ARM_A5: + case ARCH_ARM_A15: + { + ps_rc_utils_state->pf_get_gpp = isvce_get_gpp_neon; + + break; + } +#endif + default: + { + ps_rc_utils_state->pf_get_gpp = isvce_get_gpp; + + break; + } + } +} + +/** +******************************************************************************* +* +* @brief +* initializes the rc utils context +* +* @par Description: +* initializes the rc utils context +* +* @param[in] ps_svc_rc_utils_ctxt +* pointer to svc rc utils context +* +* @param[in] ps_mem_rec +* pointer to memory allocated to compute gpp process +* +* @param[in] e_arch +* architecure type +* +* @returns +* +* @remarks +* none +* +******************************************************************************* +*/ + +void isvce_rc_utils_init(svc_rc_utils_ctxt_t *ps_svc_rc_utils_ctxt, iv_mem_rec_t *ps_mem_rec, + IV_ARCH_T e_arch) +{ + svc_rc_utils_state_t *ps_rc_utils_state; + + UWORD8 *pu1_buf = (UWORD8 *) ps_mem_rec->pv_base; + + ps_rc_utils_state = (svc_rc_utils_state_t *) pu1_buf; + + ps_svc_rc_utils_ctxt->pv_rc_utils_state = ps_rc_utils_state; + + isvce_get_gpp_function_selector(ps_rc_utils_state, e_arch); +} diff --git a/encoder/svc/isvce_rc_utils.h b/encoder/svc/isvce_rc_utils.h new file mode 100644 index 0000000..f2f6451 --- /dev/null +++ b/encoder/svc/isvce_rc_utils.h @@ -0,0 +1,134 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** +******************************************************************************* +* @file +* isvce_rc_utils.h +* +* @brief +* Contains get gpp function required by the SVC encoder +* +* @author +* ittiam +* +* @par List of Functions: +* - isvce_rc_utils_init() +* - isvce_get_rc_utils_data_size() +* - isvce_compute_gpp() +* +* @remarks +* None +* +******************************************************************************* +*/ + +#ifndef _ISVCE_RC_UTILS_H_ +#define _ISVCE_RC_UTILS_H_ + +#include "ih264_typedefs.h" +#include "isvc_structs.h" + +typedef struct +{ + /** + * pointer to the state of rc utils + */ + void *pv_rc_utils_state; + +} svc_rc_utils_ctxt_t; + +/** +******************************************************************************* +* +* @brief +* initializes the rc utils context +* +* @par Description: +* initializes the rc utils context +* +* @param[in] ps_svc_rc_utils_ctxt +* pointer to svc rc utils context +* +* @param[in] ps_mem_rec +* pointer to memory allocated to compute gpp process +* +* @param[in] e_arch +* architecure type +* +* @returns +* +* @remarks +* none +* +******************************************************************************* +*/ + +extern void isvce_rc_utils_init(svc_rc_utils_ctxt_t *ps_svc_rc_utils_ctxt, iv_mem_rec_t *ps_mem_rec, + IV_ARCH_T e_arch); + +/** +******************************************************************************* +* +* @brief +* gets the memory size required for compute gpp +* +* @par Description: +* returns the memory required by the rc utils context and state structs +* for allocation. +* +* @returns +* +* @remarks +* +* +******************************************************************************* +*/ + +extern UWORD32 isvce_get_rc_utils_data_size(); + +/** +******************************************************************************* +* +* @brief +* compute gpp process +* +* @par Description: +* calls the function to compute gpp +* +* @param[in] ps_svc_rc_utils_ctxt +* pointer to svc rc utils context +* +* @param[in] ps_input_buf +* pointer to yuv buffer properties +* +* @returns +* calculated gpp value +* +* @remarks +* none +* +******************************************************************************* +*/ + +extern DOUBLE isvce_compute_gpp(svc_rc_utils_ctxt_t *ps_svc_rc_utils_ctxt, + yuv_buf_props_t *ps_input_buf); + +#endif diff --git a/encoder/svc/isvce_rc_utils_private_defs.h b/encoder/svc/isvce_rc_utils_private_defs.h new file mode 100644 index 0000000..cf2d7eb --- /dev/null +++ b/encoder/svc/isvce_rc_utils_private_defs.h @@ -0,0 +1,52 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +#ifndef _ISVCE_RC_UTILS_PRIVATE_DEFS_H_ +#define _ISVCE_RC_UTILS_PRIVATE_DEFS_H_ + +#include "ih264_typedefs.h" +#include "isvc_structs.h" +#include "isvce_rc_utils.h" + +/* Macros */ +#define WT_LUMA_GPP 4 + +#define WT_TOTAL_GPP 6 + +/* Typedefs */ +typedef DOUBLE FT_GET_GPP(yuv_buf_props_t *ps_input_buf); + +/* Structs */ +typedef struct +{ + /** + * function pointer to the leaf level function for get gpp + */ + FT_GET_GPP *pf_get_gpp; + +} svc_rc_utils_state_t; + +/* SSE42 Declarations */ +extern FT_GET_GPP isvce_get_gpp_sse42; + +/* NEON Declarations */ +extern FT_GET_GPP isvce_get_gpp_neon; + +#endif diff --git a/encoder/svc/isvce_res_pred_private_defs.h b/encoder/svc/isvce_res_pred_private_defs.h new file mode 100644 index 0000000..768be5b --- /dev/null +++ b/encoder/svc/isvce_res_pred_private_defs.h @@ -0,0 +1,124 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** +******************************************************************************* +* @file +* isvc_res_pred_private_defs.h +* +* @brief +* Contains datatype and macro definitions used exclusively in +* residual prediction +* +******************************************************************************* +*/ + +#ifndef _ISVCE_RES_PRED_PRIVATE_DEFS_H_ +#define _ISVCE_RES_PRED_PRIVATE_DEFS_H_ + +#include "ih264_typedefs.h" +#include "isvc_defs.h" +#include "isvc_structs.h" + +#define REF_ARRAY_MAX_WIDTH (MB_SIZE + 6) + +#define REF_ARRAY_MAX_HEIGHT (MB_SIZE + 6) + +typedef UWORD32 FT_GET_SAD_WITH_RES_PRED(buffer_container_t *ps_src, buffer_container_t *ps_pred, + buffer_container_t *ps_res, UWORD32 u4_mb_wd, + UWORD32 u4_mb_ht); + +typedef void FT_RESIDUAL_SAMPLER(coordinates_t *ps_ref_array_positions, + coordinates_t *ps_ref_array_phases, buffer_container_t *ps_inp, + buffer_container_t *ps_out, buffer_container_t *ps_scratch, + UWORD32 u4_ref_nnz, UWORD8 u1_ref_tx_size); + +/* Structs */ +/* Offsets, etc used for residual upsampling and interpolation */ +/* Derived as per 'G.8.6.3.2', and 'G.8.6.3.3' for all MB's once during init */ +typedef struct res_pred_mb_state_t +{ + coordinates_t s_offsets; + + coordinates_t s_ref_array_dims; + + coordinates_t *ps_ref_array_positions; + + coordinates_t *ps_ref_array_phases; +} res_pred_mb_state_t; + +typedef struct res_pred_layer_state_t +{ + layer_resampler_props_t *ps_luma_props; + + layer_resampler_props_t *ps_chroma_props; + + res_pred_mb_state_t *ps_luma_mb_states; + + res_pred_mb_state_t *ps_chroma_mb_states; + + WORD8 *pi1_mb_mode; + + WORD32 i4_mb_mode_stride; + +} res_pred_layer_state_t; + +typedef struct res_pred_mem_store_t +{ + buffer_container_t s_scratch; + +} res_pred_mem_store_t; + +typedef struct res_pred_state_t +{ + /* Array of size numSpatialLayers */ + res_pred_layer_state_t *ps_layer_state; + + res_pred_mem_store_t s_mem_store; + + FT_RESIDUAL_SAMPLER *apf_residual_samplers[NUM_COMPONENTS]; + + FT_GET_SAD_WITH_RES_PRED *pf_get_sad_with_residual_pred; + + UWORD8 *pu1_ref_x_ptr_incr; /*!< buffer to store the reference + array ptr increments for + operand 2 of interpolation + */ + UWORD8 *pu1_ref_y_ptr_incr; /*!< buffer to store the reference + array ptr increments for + operand 2 of interpolation + */ + +} res_pred_state_t; + +/* C declarations */ +extern FT_RESIDUAL_SAMPLER isvce_luma_residual_sampler_2x; +extern FT_RESIDUAL_SAMPLER isvce_chroma_residual_sampler_2x; +extern FT_GET_SAD_WITH_RES_PRED isvce_get_sad_with_residual_pred; + +/* SSE42 declarations */ +extern FT_RESIDUAL_SAMPLER isvce_luma_residual_sampler_2x_sse42; +extern FT_GET_SAD_WITH_RES_PRED isvce_get_sad_with_residual_pred_sse42; + +/* NEON declarations */ +extern FT_RESIDUAL_SAMPLER isvce_luma_residual_sampler_2x_neon; +extern FT_GET_SAD_WITH_RES_PRED isvce_get_sad_with_residual_pred_neon; + +#endif diff --git a/encoder/svc/isvce_residual_pred.c b/encoder/svc/isvce_residual_pred.c new file mode 100644 index 0000000..91aeb92 --- /dev/null +++ b/encoder/svc/isvce_residual_pred.c @@ -0,0 +1,1950 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** +******************************************************************************* +* @file +* isvce_residual_pred.c +* +* @brief +* Contains functions used for SVC residual prediction +* +******************************************************************************* +*/ +#include +#include + +#include "ih264_typedefs.h" +#include "iv2.h" +#include "isvc_macros.h" +#include "ih264_debug.h" +#include "isvc_defs.h" +#include "isvc_structs.h" +#include "isvce_defs.h" +#include "isvce_structs.h" +#include "isvce_res_pred_private_defs.h" +#include "isvce_residual_pred.h" +#include "isvce_utils.h" +#include "isvc_defs.h" + +void isvce_chroma_residual_sampler_2x(coordinates_t *ps_ref_array_positions, + coordinates_t *ps_ref_array_phases, + buffer_container_t *ps_inp, buffer_container_t *ps_out, + buffer_container_t *ps_scratch, UWORD32 u4_ref_nnz, + UWORD8 u1_ref_tx_size) +{ + WORD32 i4_i; + WORD16 *pi2_ref_data_byte; + WORD32 *pi4_ref_array; + WORD32 i4_phase1, i4_phase2; + + WORD16 *pi2_inp_data = ps_inp->pv_data; + WORD16 *pi2_out_res = ps_out->pv_data; + WORD32 i4_inp_data_stride = ps_inp->i4_data_stride; + WORD32 i4_out_res_stride = ps_out->i4_data_stride; + + UNUSED(u4_ref_nnz); + + UNUSED(ps_ref_array_positions); + UNUSED(u1_ref_tx_size); + + /* For 2x scaling, offsets always point to TL pixel outside MB */ + /* Hence, refTransBlkIdc will be different and since phase */ + /* for first refArray pos for horiz filtering samples > 8, */ + /* first row and first column from the refArray is never used */ + pi2_inp_data += 2 + i4_inp_data_stride; + + pi2_ref_data_byte = pi2_inp_data; + + i4_phase1 = ps_ref_array_phases[0].i4_abscissa; + i4_phase2 = ps_ref_array_phases[1].i4_abscissa; + + ASSERT(i4_phase1 >= 8); + + pi4_ref_array = (WORD32 *) ps_scratch->pv_data; + + for(i4_i = 0; i4_i < BLK_SIZE; i4_i++) + { + WORD16 i2_coeff1, i2_coeff2; + + i2_coeff1 = (WORD16) (pi2_ref_data_byte[0]); + + /* populate the first inter sample */ + *pi4_ref_array++ = i2_coeff1 << 4; + + { + /* unroll count 1 */ + i2_coeff2 = (WORD16) (pi2_ref_data_byte[2]); + + /* populate 2 samples based on current coeffs */ + *pi4_ref_array++ = ((16 - i4_phase2) * i2_coeff1 + i4_phase2 * i2_coeff2); + + /* unroll count 2 */ + *pi4_ref_array++ = ((16 - i4_phase1) * i2_coeff1 + i4_phase1 * i2_coeff2); + + /* unroll count 3 */ + i2_coeff1 = (WORD16) (pi2_ref_data_byte[4]); + + /* populate 2 samples based on current coeffs */ + *pi4_ref_array++ = ((16 - i4_phase2) * i2_coeff2 + i4_phase2 * i2_coeff1); + + /* unroll count 4 */ + *pi4_ref_array++ = ((16 - i4_phase1) * i2_coeff2 + i4_phase1 * i2_coeff1); + + /* unroll count 5 */ + i2_coeff2 = (WORD16) (pi2_ref_data_byte[6]); + + /* populate 2 samples based on current coeffs */ + *pi4_ref_array++ = ((16 - i4_phase2) * i2_coeff1 + i4_phase2 * i2_coeff2); + + /* unroll count 6 */ + *pi4_ref_array++ = ((16 - i4_phase1) * i2_coeff1 + i4_phase1 * i2_coeff2); + } + + /* populate the last inter sample */ + *pi4_ref_array++ = i2_coeff2 << 4; + + /* vertical loop uopdates */ + pi2_ref_data_byte = pi2_inp_data + ((i4_i + 1) * i4_inp_data_stride); + } + + /* ----------- Vertical Interpolation ---------------- */ + pi4_ref_array = (WORD32 *) ps_scratch->pv_data; + + i4_phase1 = ps_ref_array_phases[0].i4_ordinate; + i4_phase2 = ps_ref_array_phases[2].i4_ordinate; + + for(i4_i = 0; i4_i < BLK8x8SIZE; i4_i++) + { + WORD16 *pi2_out; + WORD32 *pi4_ref_array_temp; + WORD32 i4_horz_samp_1, i4_horz_samp_2; + pi2_out = pi2_out_res; + pi4_ref_array_temp = pi4_ref_array; + + /* populate the first inter sample */ + i4_horz_samp_1 = *pi4_ref_array_temp; + pi4_ref_array_temp += BLK8x8SIZE; + *pi2_out = (i4_horz_samp_1 + 8) >> 4; + pi2_out += i4_out_res_stride; + + { + /* unroll count 1 */ + i4_horz_samp_2 = *pi4_ref_array_temp; + pi4_ref_array_temp += BLK8x8SIZE; + + /* populate 2 samples based on current coeffs */ + *pi2_out = ((16 - i4_phase2) * i4_horz_samp_1 + i4_phase2 * i4_horz_samp_2 + 128) >> 8; + pi2_out += i4_out_res_stride; + + /* unroll count 2 */ + *pi2_out = ((16 - i4_phase1) * i4_horz_samp_1 + i4_phase1 * i4_horz_samp_2 + 128) >> 8; + pi2_out += i4_out_res_stride; + + /* unroll count 3 */ + i4_horz_samp_1 = *pi4_ref_array_temp; + pi4_ref_array_temp += BLK8x8SIZE; + + /* populate 2 samples based on current coeffs */ + *pi2_out = ((16 - i4_phase2) * i4_horz_samp_2 + i4_phase2 * i4_horz_samp_1 + 128) >> 8; + pi2_out += i4_out_res_stride; + + /* unroll count 4 */ + *pi2_out = ((16 - i4_phase1) * i4_horz_samp_2 + i4_phase1 * i4_horz_samp_1 + 128) >> 8; + pi2_out += i4_out_res_stride; + + /* unroll count 5 */ + i4_horz_samp_2 = *pi4_ref_array_temp; + + /* populate 2 samples based on current coeffs */ + *pi2_out = ((16 - i4_phase2) * i4_horz_samp_1 + i4_phase2 * i4_horz_samp_2 + 128) >> 8; + pi2_out += i4_out_res_stride; + + /* unroll count 6 */ + *pi2_out = ((16 - i4_phase1) * i4_horz_samp_1 + i4_phase1 * i4_horz_samp_2 + 128) >> 8; + pi2_out += i4_out_res_stride; + } + + /* populate the last inter sample */ + *pi2_out = (i4_horz_samp_2 + 8) >> 4; + + /* horizontal loop updates */ + pi4_ref_array++; + pi2_out_res += 2; + } +} + +void isvce_luma_residual_sampler_2x(coordinates_t *ps_ref_array_positions, + coordinates_t *ps_ref_array_phases, buffer_container_t *ps_inp, + buffer_container_t *ps_out, buffer_container_t *ps_scratch, + UWORD32 u4_ref_nnz, UWORD8 u1_ref_tx_size) +{ + WORD16 *pi2_inp_data = ps_inp->pv_data; + WORD16 *pi2_out_res = ps_out->pv_data; + WORD32 i4_inp_data_stride = ps_inp->i4_data_stride; + WORD32 i4_out_res_stride = ps_out->i4_data_stride; + WORD16 *pi2_refarray_buffer = ps_scratch->pv_data; + WORD32 i4_blk_ctr; + + UNUSED(ps_ref_array_positions); + UNUSED(ps_ref_array_phases); + + /* For 2x scaling, offsets always point to TL pixel outside MB */ + /* Hence, refTransBlkIdc will be different and since phase */ + /* for first refArray pos for horiz filtering samples > 8, */ + /* first row and first column from the refArray is never used */ + pi2_inp_data += 1 + i4_inp_data_stride; + + if((u1_ref_tx_size) && (0 != u4_ref_nnz)) + { + WORD16 *pi2_ref_data_byte; + WORD32 *pi4_ref_array; + WORD32 i4_i, i4_j; + + pi2_ref_data_byte = pi2_inp_data; + + /* ----------- Horizontal Interpolation ---------------- */ + pi4_ref_array = (WORD32 *) pi2_refarray_buffer; + + for(i4_i = 0; i4_i < BLK8x8SIZE; i4_i++) + { + WORD16 i2_coeff1, i2_coeff2; + + i2_coeff1 = (WORD16) (*pi2_ref_data_byte++); + + /* populate the first inter sample */ + *pi4_ref_array++ = i2_coeff1 << 2; + + for(i4_j = 0; i4_j < 14; i4_j += 2) + { + i2_coeff2 = (WORD16) (*pi2_ref_data_byte++); + + /* populate 2 samples based on current coeffs */ + *pi4_ref_array++ = ((i2_coeff1 << 1) + (i2_coeff1) + (i2_coeff2)); + + *pi4_ref_array++ = ((i2_coeff2 << 1) + (i2_coeff2) + (i2_coeff1)); + + /* store the coeff 2 to coeff 1 */ + /* (used in next iteration) */ + i2_coeff1 = i2_coeff2; + } + + /* populate the last inter sample */ + *pi4_ref_array++ = i2_coeff1 << 2; + + /* vertical loop uopdates */ + pi2_ref_data_byte = pi2_inp_data + ((i4_i + 1) * i4_inp_data_stride); + } + + /* ----------- Vertical Interpolation ---------------- */ + pi4_ref_array = (WORD32 *) pi2_refarray_buffer; + + for(i4_i = 0; i4_i < MB_SIZE; i4_i++) + { + WORD32 *pi4_ref_array_temp; + WORD16 *pi2_out; + WORD32 i4_horz_samp_1, i4_horz_samp_2; + + pi4_ref_array_temp = pi4_ref_array; + pi2_out = pi2_out_res; + i4_horz_samp_1 = *pi4_ref_array_temp; + + /* populate the first inter sample */ + *pi2_out = (i4_horz_samp_1 + 2) >> 2; + pi2_out += i4_out_res_stride; + + for(i4_j = 0; i4_j < 14; i4_j += 2) + { + pi4_ref_array_temp += MB_SIZE; + i4_horz_samp_2 = *pi4_ref_array_temp; + + /* populate 2 samples based on current coeffs */ + *pi2_out = ((i4_horz_samp_1 << 1) + (i4_horz_samp_1) + (i4_horz_samp_2) + 8) >> 4; + pi2_out += i4_out_res_stride; + + *pi2_out = ((i4_horz_samp_2 << 1) + (i4_horz_samp_2) + (i4_horz_samp_1) + 8) >> 4; + pi2_out += i4_out_res_stride; + + /* store the coeff 2 to coeff 1 */ + /* (used in next iteration) */ + i4_horz_samp_1 = i4_horz_samp_2; + } + + /* populate the first inter sample */ + *pi2_out = (i4_horz_samp_1 + 2) >> 2; + + /* horizontal loop updates */ + pi4_ref_array++; + pi2_out_res++; + } + } + else + { + /* ----------------------------------------------------------------- */ + /* LOOP over number of blocks */ + /* ----------------------------------------------------------------- */ + for(i4_blk_ctr = 0; i4_blk_ctr < BLK_SIZE; i4_blk_ctr++) + { + WORD16 *pi2_ref_data_byte; + WORD32 *pi4_ref_array; + WORD32 i4_i; + + /* if reference layer is not coded then no processing */ + if(0 != (u4_ref_nnz & 0x1)) + { + pi2_ref_data_byte = pi2_inp_data; + + /* ----------- Horizontal Interpolation ---------------- */ + pi4_ref_array = (WORD32 *) pi2_refarray_buffer; + + for(i4_i = 0; i4_i < BLK_SIZE; i4_i++) + { + WORD16 i2_coeff1, i2_coeff2; + + i2_coeff1 = (WORD16) (*pi2_ref_data_byte++); + + /* populate the first inter sample */ + *pi4_ref_array++ = i2_coeff1 << 2; + + { + i2_coeff2 = (WORD16) (*pi2_ref_data_byte++); + + /* populate 2 samples based on current coeffs */ + *pi4_ref_array++ = ((i2_coeff1 << 1) + (i2_coeff1) + (i2_coeff2)); + + *pi4_ref_array++ = ((i2_coeff2 << 1) + (i2_coeff2) + (i2_coeff1)); + + i2_coeff1 = (WORD16) (*pi2_ref_data_byte++); + + /* populate 2 samples based on current coeffs */ + *pi4_ref_array++ = ((i2_coeff2 << 1) + (i2_coeff2) + (i2_coeff1)); + + *pi4_ref_array++ = ((i2_coeff1 << 1) + (i2_coeff1) + (i2_coeff2)); + + i2_coeff2 = (WORD16) (*pi2_ref_data_byte++); + + /* populate 2 samples based on current coeffs */ + *pi4_ref_array++ = ((i2_coeff1 << 1) + (i2_coeff1) + (i2_coeff2)); + + *pi4_ref_array++ = ((i2_coeff2 << 1) + (i2_coeff2) + (i2_coeff1)); + } + + /* populate the last inter sample */ + *pi4_ref_array++ = i2_coeff2 << 2; + + /* vertical loop uopdates */ + pi2_ref_data_byte = pi2_inp_data + ((i4_i + 1) * i4_inp_data_stride); + } + + /* ----------- Vertical Interpolation ---------------- */ + pi4_ref_array = (WORD32 *) pi2_refarray_buffer; + + for(i4_i = 0; i4_i < BLK8x8SIZE; i4_i++) + { + WORD32 *pi4_ref_array_temp; + WORD16 *pi2_out; + WORD32 i4_horz_samp_1, i4_horz_samp_2; + + pi4_ref_array_temp = pi4_ref_array; + pi2_out = pi2_out_res; + i4_horz_samp_1 = *pi4_ref_array_temp; + + /* populate the first inter sample */ + *pi2_out = (i4_horz_samp_1 + 2) >> 2; + pi2_out += i4_out_res_stride; + + { + /* unroll loop count 1 */ + pi4_ref_array_temp += BLK8x8SIZE; + i4_horz_samp_2 = *pi4_ref_array_temp; + + /* populate 2 samples based on current coeffs */ + *pi2_out = + ((i4_horz_samp_1 << 1) + (i4_horz_samp_1) + (i4_horz_samp_2) + 8) >> 4; + pi2_out += i4_out_res_stride; + + *pi2_out = + ((i4_horz_samp_2 << 1) + (i4_horz_samp_2) + (i4_horz_samp_1) + 8) >> 4; + pi2_out += i4_out_res_stride; + + /* unroll loop count 2 */ + pi4_ref_array_temp += BLK8x8SIZE; + i4_horz_samp_1 = *pi4_ref_array_temp; + + /* populate 2 samples based on current coeffs */ + *pi2_out = + ((i4_horz_samp_2 << 1) + (i4_horz_samp_2) + (i4_horz_samp_1) + 8) >> 4; + pi2_out += i4_out_res_stride; + + *pi2_out = + ((i4_horz_samp_1 << 1) + (i4_horz_samp_1) + (i4_horz_samp_2) + 8) >> 4; + pi2_out += i4_out_res_stride; + + /* unroll loop count 3 */ + pi4_ref_array_temp += BLK8x8SIZE; + i4_horz_samp_2 = *pi4_ref_array_temp; + + /* populate 2 samples based on current coeffs */ + *pi2_out = + ((i4_horz_samp_1 << 1) + (i4_horz_samp_1) + (i4_horz_samp_2) + 8) >> 4; + pi2_out += i4_out_res_stride; + + *pi2_out = + ((i4_horz_samp_2 << 1) + (i4_horz_samp_2) + (i4_horz_samp_1) + 8) >> 4; + pi2_out += i4_out_res_stride; + } + + /* populate the last inter sample */ + *pi2_out = (i4_horz_samp_2 + 2) >> 2; + + /* horizontal loop updates */ + pi4_ref_array++; + pi2_out_res++; + } + } + else + { + pi2_out_res += BLK8x8SIZE; + } + + if(1 == i4_blk_ctr) + { + pi2_inp_data -= BLK_SIZE; + pi2_inp_data += (i4_inp_data_stride * BLK_SIZE); + pi2_out_res -= MB_SIZE; + pi2_out_res += (i4_out_res_stride * BLK8x8SIZE); + u4_ref_nnz >>= 2; + } + else + { + pi2_inp_data += BLK_SIZE; + } + + u4_ref_nnz >>= 1; + } + } +} + +/** +******************************************************************************* +* +* @brief +* Returns size of buffers for storing residual pred ctxt +* +* @param[in] u1_num_spatial_layers +* Num Spatial Layers +* +* @param[in] d_spatial_res_ratio +* Resolution Ratio b/w spatial layers +* +* @param[in] u4_wd +* Input Width +* +* @param[in] u4_ht +* Input Height +* +* @returns Size of buffers +* +******************************************************************************* +*/ +UWORD32 isvce_get_svc_res_pred_ctxt_size(UWORD8 u1_num_spatial_layers, DOUBLE d_spatial_res_ratio, + UWORD32 u4_wd, UWORD32 u4_ht) +{ + UWORD32 u4_size = 0; + + if(u1_num_spatial_layers > 1) + { + WORD32 i; + + u4_size += MAX_PROCESS_CTXT * sizeof(svc_res_pred_ctxt_t); + u4_size += MAX_PROCESS_CTXT * sizeof(res_pred_state_t); + + /* Mem for storing pred */ + u4_size += MAX_PROCESS_CTXT * MB_SIZE * MB_SIZE * sizeof(WORD16); + u4_size += MAX_PROCESS_CTXT * MB_SIZE * (MB_SIZE / 2) * sizeof(WORD16); + + /* Mem for storing intermediates */ + u4_size += MAX_PROCESS_CTXT * REF_ARRAY_MAX_WIDTH * REF_ARRAY_MAX_HEIGHT * sizeof(WORD16); + + /* Mem for pu1_ref_x_ptr_incr and pu1_ref_y_ptr_incr*/ + u4_size += + 2 * MAX_PROCESS_CTXT * REF_ARRAY_MAX_WIDTH * REF_ARRAY_MAX_HEIGHT * sizeof(UWORD8); + + u4_size += MAX_PROCESS_CTXT * u1_num_spatial_layers * sizeof(res_pred_layer_state_t); + + for(i = u1_num_spatial_layers - 1; i >= 1; i--) + { + WORD32 i4_layer_luma_wd = + (WORD32) ((DOUBLE) u4_wd / + pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - i)) + + 0.99; + WORD32 i4_layer_luma_ht = + ((DOUBLE) u4_ht / pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - i)) + 0.99; + WORD32 i4_layer_luma_mbs = (i4_layer_luma_wd / MB_SIZE) * (i4_layer_luma_ht / MB_SIZE); + WORD32 i4_layer_u_wd = i4_layer_luma_wd / 2.0 + 0.99; + WORD32 i4_layer_u_ht = i4_layer_luma_ht / 2.0 + 0.99; + WORD32 i4_layer_u_mbs = + (i4_layer_u_wd / (MB_SIZE / 2)) * (i4_layer_u_ht / (MB_SIZE / 2)); + + /* ps_luma_mb_states */ + { + u4_size += i4_layer_luma_mbs * sizeof(res_pred_mb_state_t); + + /* ps_ref_array_positions */ + u4_size += + ((1.5 == d_spatial_res_ratio) ? (i4_layer_luma_mbs * MB_SIZE * MB_SIZE) : 0) * + sizeof(coordinates_t); + + /* ps_ref_array_phases */ + u4_size += ((1.5 == d_spatial_res_ratio) ? (i4_layer_luma_mbs * 5) : 0) * + sizeof(coordinates_t); + } + + /* ps_chroma_mb_states */ + { + u4_size += i4_layer_u_mbs * sizeof(res_pred_mb_state_t); + + /* ps_ref_array_positions */ + u4_size += + ((1.5 == d_spatial_res_ratio) ? (i4_layer_u_mbs * (MB_SIZE / 2) * (MB_SIZE / 2)) + : 0) * + sizeof(coordinates_t); + + /* ps_ref_array_phases */ + u4_size += ((1.5 == d_spatial_res_ratio) ? (i4_layer_u_mbs * 5) : 3) * + sizeof(coordinates_t); + } + } + + for(i = u1_num_spatial_layers - 1; i >= 0; i--) + { + WORD32 i4_layer_luma_wd = + (WORD32) ((DOUBLE) u4_wd / + pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - i)) + + 0.99; + WORD32 i4_layer_luma_ht = + ((DOUBLE) u4_ht / pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - i)) + 0.99; + WORD32 i4_layer_luma_mbs = + ((i4_layer_luma_wd / MB_SIZE) + 2) * ((i4_layer_luma_ht / MB_SIZE) + 2); + + /* pi1_mb_mode */ + u4_size += i4_layer_luma_mbs * sizeof(WORD8); + } + } + else + { + u4_size += MAX_PROCESS_CTXT * sizeof(yuv_buf_props_t); + + /* Mem for storing pred */ + u4_size += MAX_PROCESS_CTXT * MB_SIZE * MB_SIZE * sizeof(WORD16); + u4_size += MAX_PROCESS_CTXT * MB_SIZE * (MB_SIZE / 2) * sizeof(WORD16); + } + + return u4_size; +} + +static FORCEINLINE WORD32 isvce_get_scaled_pixel_pos(layer_resampler_props_t *ps_layer_props, + WORD32 i4_pixel_pos, UWORD8 u1_dim_id) +{ + if(1 == u1_dim_id) + { + return (((i4_pixel_pos - ps_layer_props->i4_offset_y) * + ((WORD64) ps_layer_props->u4_scale_y) + + ps_layer_props->i4_add_y) >> + (ps_layer_props->u4_shift_y - 4)) - + ps_layer_props->i4_delta_y; + } + else + { + return (((i4_pixel_pos - ps_layer_props->i4_offset_x) * + ((WORD64) ps_layer_props->u4_scale_x) + + ps_layer_props->i4_add_x) >> + (ps_layer_props->u4_shift_x - 4)) - + ps_layer_props->i4_delta_x; + } +} + +static FORCEINLINE void isvce_ref_array_pos_and_phase_init_dyadic( + layer_resampler_props_t *ps_layer_props, res_pred_mb_state_t *ps_mb_state, + coordinates_t *ps_mb_pos, UWORD8 u1_frame_mbs_only_flag, UWORD8 u1_field_mb_flag, + UWORD8 u1_ref_layer_frame_mbs_only_flag) +{ + UWORD32 i, j; + + coordinates_t *ps_ref_array_phases = ps_mb_state->ps_ref_array_phases; + + WORD32 i4_x_offset = ps_mb_state->s_offsets.i4_abscissa; + WORD32 i4_y_offset = ps_mb_state->s_offsets.i4_ordinate; + + for(i = 0; i < 2; i++) + { + WORD32 i4_y_ref16; + + WORD32 i4_yc = ps_mb_pos->i4_ordinate * ps_layer_props->u4_mb_ht + i; + + if((0 == u1_frame_mbs_only_flag) || (0 == u1_ref_layer_frame_mbs_only_flag)) + { + i4_yc = i4_yc >> (1 - u1_field_mb_flag); + } + + i4_y_ref16 = isvce_get_scaled_pixel_pos(ps_layer_props, i4_yc, 1); + + for(j = 0; j < ((0 == i) ? 2 : 1); j++) + { + WORD32 i4_x_ref16; + + WORD32 i4_xc = ps_mb_pos->i4_abscissa * ps_layer_props->u4_mb_wd + j; + + i4_x_ref16 = isvce_get_scaled_pixel_pos(ps_layer_props, i4_xc, 0); + + ps_ref_array_phases[j + i * 2].i4_abscissa = (i4_x_ref16 - (16 * i4_x_offset)) & 15; + ps_ref_array_phases[j + i * 2].i4_ordinate = (i4_y_ref16 - (16 * i4_y_offset)) & 15; + } + } +} + +static FORCEINLINE void isvce_ref_array_pos_and_phase_init(layer_resampler_props_t *ps_layer_props, + res_pred_mb_state_t *ps_mb_state, + coordinates_t *ps_mb_pos, + UWORD8 u1_frame_mbs_only_flag, + UWORD8 u1_field_mb_flag, + UWORD8 u1_ref_layer_frame_mbs_only_flag) +{ + UWORD32 i, j; + + coordinates_t *ps_ref_array_positions = ps_mb_state->ps_ref_array_positions; + coordinates_t *ps_ref_array_phases = ps_mb_state->ps_ref_array_phases; + + WORD32 i4_x_offset = ps_mb_state->s_offsets.i4_abscissa; + WORD32 i4_y_offset = ps_mb_state->s_offsets.i4_ordinate; + UWORD32 u4_phase_array_idx = 0; + + for(i = 0; i < ps_layer_props->u4_mb_ht; i++) + { + WORD32 i4_y_ref16; + + WORD32 i4_yc = ps_mb_pos->i4_ordinate * ps_layer_props->u4_mb_ht + i; + + if((0 == u1_frame_mbs_only_flag) || (0 == u1_ref_layer_frame_mbs_only_flag)) + { + i4_yc = i4_yc >> (1 - u1_field_mb_flag); + } + + i4_y_ref16 = isvce_get_scaled_pixel_pos(ps_layer_props, i4_yc, 1); + + for(j = 0; j < ps_layer_props->u4_mb_wd; j++) + { + WORD32 i4_x_ref16; + + WORD32 i4_xc = ps_mb_pos->i4_abscissa * ps_layer_props->u4_mb_wd + j; + + i4_x_ref16 = isvce_get_scaled_pixel_pos(ps_layer_props, i4_xc, 0); + + ps_ref_array_positions[j + i * ps_layer_props->u4_mb_wd].i4_abscissa = + (i4_x_ref16 >> 4) - i4_x_offset; + ps_ref_array_positions[j + i * ps_layer_props->u4_mb_wd].i4_ordinate = + (i4_y_ref16 >> 4) - i4_y_offset; + + if(((0 == i) && (j < 3)) || ((0 == j) && (i < 3))) + { + ps_ref_array_phases[u4_phase_array_idx].i4_abscissa = + (i4_x_ref16 - (16 * i4_x_offset)) & 15; + ps_ref_array_phases[u4_phase_array_idx].i4_ordinate = + (i4_y_ref16 - (16 * i4_y_offset)) & 15; + + u4_phase_array_idx++; + } + } + } +} + +static void isvce_res_pred_layer_state_init(res_pred_layer_state_t *ps_layer_state, + DOUBLE d_spatial_res_ratio, UWORD32 u4_wd, + UWORD32 u4_ht, IV_COLOR_FORMAT_T e_color_format) +{ + UWORD32 i, j, k; + + const UWORD8 u1_ref_layer_field_pic_flag = 0; + const UWORD8 u1_field_pic_flag = 0; + const UWORD8 u1_frame_mbs_only_flag = 1; + const UWORD8 u1_ref_layer_frame_mbs_only_flag = 1; + const UWORD8 u1_field_mb_flag = 0; + + ASSERT((IV_YUV_420P == e_color_format) || (IV_YUV_420SP_UV == e_color_format)); + + UNUSED(e_color_format); + + for(i = 0; i < 2; i++) + { + res_pred_mb_state_t *ps_mb_states; + layer_resampler_props_t *ps_layer_props; + + UWORD32 u4_wd_in_mbs; + UWORD32 u4_ht_in_mbs; + + UWORD8 u1_is_chroma = (Y != ((COMPONENT_TYPE) i)); + UWORD32 u4_ref_wd = (u4_wd / d_spatial_res_ratio); + UWORD32 u4_ref_ht = (u4_ht / d_spatial_res_ratio) * (1 + u1_ref_layer_field_pic_flag); + UWORD32 u4_scaled_wd = u4_wd; + UWORD32 u4_scaled_ht = u4_ht * (1 + u1_field_pic_flag); + + ps_mb_states = + u1_is_chroma ? ps_layer_state->ps_chroma_mb_states : ps_layer_state->ps_luma_mb_states; + ps_layer_props = + u1_is_chroma ? ps_layer_state->ps_chroma_props : ps_layer_state->ps_luma_props; + + u4_ref_wd = u4_ref_wd >> u1_is_chroma; + u4_ref_ht = u4_ref_ht >> u1_is_chroma; + u4_scaled_wd = u4_scaled_wd >> u1_is_chroma; + u4_scaled_ht = u4_scaled_ht >> u1_is_chroma; + + u4_wd_in_mbs = u4_scaled_wd / ps_layer_props->u4_mb_wd; + u4_ht_in_mbs = u4_scaled_ht / ps_layer_props->u4_mb_ht; + + for(j = 0; j < u4_ht_in_mbs; j++) + { + WORD32 i4_y_refmin16; + WORD32 i4_y_refmax16; + WORD32 i4_y_offset; + + i4_y_refmin16 = + isvce_get_scaled_pixel_pos(ps_layer_props, j * ps_layer_props->u4_mb_ht, 1); + i4_y_refmax16 = isvce_get_scaled_pixel_pos( + ps_layer_props, j * ps_layer_props->u4_mb_ht + ps_layer_props->u4_mb_ht - 1, 1); + i4_y_offset = i4_y_refmin16 >> 4; + + for(k = 0; k < u4_wd_in_mbs; k++) + { + WORD32 i4_x_refmin16; + WORD32 i4_x_refmax16; + WORD32 i4_x_offset; + + coordinates_t s_mb_pos = {k, j}; + + i4_x_refmin16 = + isvce_get_scaled_pixel_pos(ps_layer_props, k * ps_layer_props->u4_mb_wd, 0); + i4_x_refmax16 = isvce_get_scaled_pixel_pos( + ps_layer_props, k * ps_layer_props->u4_mb_wd + ps_layer_props->u4_mb_wd - 1, 0); + i4_x_offset = i4_x_refmin16 >> 4; + + ps_mb_states[k + j * u4_wd_in_mbs].s_offsets.i4_abscissa = i4_x_offset; + ps_mb_states[k + j * u4_wd_in_mbs].s_offsets.i4_ordinate = i4_y_offset; + ps_mb_states[k + j * u4_wd_in_mbs].s_ref_array_dims.i4_abscissa = + (i4_x_refmax16 >> 4) - i4_x_offset + 2; + ps_mb_states[k + j * u4_wd_in_mbs].s_ref_array_dims.i4_ordinate = + (i4_y_refmax16 >> 4) - i4_y_offset + 2; + + if((0 == k) && (0 == j) && (2 == d_spatial_res_ratio) && u1_is_chroma) + { + isvce_ref_array_pos_and_phase_init_dyadic( + ps_layer_props, &ps_mb_states[k + j * u4_wd_in_mbs], &s_mb_pos, + u1_frame_mbs_only_flag, u1_field_mb_flag, u1_ref_layer_frame_mbs_only_flag); + } + else if(1.5 == d_spatial_res_ratio) + { + isvce_ref_array_pos_and_phase_init( + ps_layer_props, &ps_mb_states[k + j * u4_wd_in_mbs], &s_mb_pos, + u1_frame_mbs_only_flag, u1_field_mb_flag, u1_ref_layer_frame_mbs_only_flag); + } + } + } + } +} + +void isvce_svc_residual_sampling_function_selector(res_pred_state_t *ps_res_pred_state, + DOUBLE d_spatial_res_ratio, IV_ARCH_T e_arch) +{ + if(2. == d_spatial_res_ratio) + { + ps_res_pred_state->apf_residual_samplers[U] = isvce_chroma_residual_sampler_2x; + ps_res_pred_state->apf_residual_samplers[V] = isvce_chroma_residual_sampler_2x; + + switch(e_arch) + { +#if defined(X86) + case ARCH_X86_SSE42: + { + ps_res_pred_state->apf_residual_samplers[Y] = isvce_luma_residual_sampler_2x_sse42; + + break; + } +#elif defined(ARMV8) + case ARCH_ARM_A53: + case ARCH_ARM_A57: + case ARCH_ARM_V8_NEON: + { + ps_res_pred_state->apf_residual_samplers[Y] = isvce_luma_residual_sampler_2x_neon; + + break; + } +#elif !defined(DISABLE_NEON) + case ARCH_ARM_A9Q: + case ARCH_ARM_A9A: + case ARCH_ARM_A9: + case ARCH_ARM_A7: + case ARCH_ARM_A5: + case ARCH_ARM_A15: + { + ps_res_pred_state->apf_residual_samplers[Y] = isvce_luma_residual_sampler_2x_neon; + + break; + } +#endif + default: + { + ps_res_pred_state->apf_residual_samplers[Y] = isvce_luma_residual_sampler_2x; + + break; + } + } + } + + switch(e_arch) + { +#if defined(X86) + case ARCH_X86_SSE42: + { + ps_res_pred_state->pf_get_sad_with_residual_pred = + isvce_get_sad_with_residual_pred_sse42; + + break; + } +#elif defined(ARMV8) + case ARCH_ARM_A53: + case ARCH_ARM_A57: + case ARCH_ARM_V8_NEON: + { + ps_res_pred_state->pf_get_sad_with_residual_pred = + isvce_get_sad_with_residual_pred_neon; + + break; + } +#elif !defined(DISABLE_NEON) + case ARCH_ARM_A9Q: + case ARCH_ARM_A9A: + case ARCH_ARM_A9: + case ARCH_ARM_A7: + case ARCH_ARM_A5: + case ARCH_ARM_A15: + { + ps_res_pred_state->pf_get_sad_with_residual_pred = + isvce_get_sad_with_residual_pred_neon; + + break; + } +#endif + default: + { + ps_res_pred_state->pf_get_sad_with_residual_pred = isvce_get_sad_with_residual_pred; + + break; + } + } +} + +/** +******************************************************************************* +* +* @brief +* Function to initialize svc ilp buffers +* +* @param[in] ps_codec +* Pointer to codec context +* +* @param[in] ps_mem_rec +* Pointer to memory allocated for input buffers +* +******************************************************************************* +*/ +void isvce_svc_res_pred_ctxt_init(isvce_codec_t *ps_codec, iv_mem_rec_t *ps_mem_rec) +{ + WORD32 i, j, k; + + const WORD32 i4_num_proc_ctxts = sizeof(ps_codec->as_process) / sizeof(ps_codec->as_process[0]); + UWORD8 u1_num_spatial_layers = ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers; + DOUBLE d_spatial_res_ratio = ps_codec->s_cfg.s_svc_params.d_spatial_res_ratio; + UWORD32 u4_wd = ps_codec->s_cfg.u4_wd; + UWORD32 u4_ht = ps_codec->s_cfg.u4_ht; + UWORD8 *pu1_buf = ps_mem_rec->pv_base; + WORD64 i8_alloc_mem_size = + isvce_get_svc_res_pred_ctxt_size(u1_num_spatial_layers, d_spatial_res_ratio, u4_wd, u4_ht); + + if(u1_num_spatial_layers > 1) + { + res_pred_mb_state_t *aps_luma_mb_states[MAX_NUM_SPATIAL_LAYERS]; + res_pred_mb_state_t *aps_chroma_mb_states[MAX_NUM_SPATIAL_LAYERS]; + + WORD8 *api1_mb_mode[MAX_NUM_SPATIAL_LAYERS]; + WORD32 ai4_mb_mode_stride[MAX_NUM_SPATIAL_LAYERS]; + + WORD32 i4_size; + + for(i = 0; i < i4_num_proc_ctxts; i++) + { + res_pred_state_t *ps_res_pred_state; + svc_res_pred_ctxt_t *ps_res_pred_ctxt; + yuv_buf_props_t *ps_mb_res_buf; + res_pred_mem_store_t *ps_mem_store; + + isvce_process_ctxt_t *ps_proc = ps_codec->as_process + i; + + ps_res_pred_ctxt = ps_proc->ps_res_pred_ctxt = (svc_res_pred_ctxt_t *) pu1_buf; + pu1_buf += sizeof(svc_res_pred_ctxt_t); + i8_alloc_mem_size -= sizeof(svc_res_pred_ctxt_t); + + ps_res_pred_ctxt->s_res_pred_constants.pv_state = pu1_buf; + ps_res_pred_state = (res_pred_state_t *) pu1_buf; + pu1_buf += sizeof(res_pred_state_t); + i8_alloc_mem_size -= sizeof(res_pred_state_t); + + ps_res_pred_state->ps_layer_state = (res_pred_layer_state_t *) pu1_buf; + pu1_buf += u1_num_spatial_layers * sizeof(ps_res_pred_state->ps_layer_state[0]); + i8_alloc_mem_size -= + u1_num_spatial_layers * sizeof(ps_res_pred_state->ps_layer_state[0]); + + i4_size = REF_ARRAY_MAX_WIDTH * REF_ARRAY_MAX_HEIGHT * sizeof(UWORD8); + ps_res_pred_state->pu1_ref_x_ptr_incr = (UWORD8 *) pu1_buf; + pu1_buf += i4_size; + ps_res_pred_state->pu1_ref_y_ptr_incr = (UWORD8 *) pu1_buf; + pu1_buf += i4_size; + + ASSERT(i8_alloc_mem_size >= 0); + + if(0 == i) + { + UWORD32 au4_ref_pos_array_size[NUM_SP_COMPONENTS]; + UWORD32 au4_ref_phase_array_size[NUM_SP_COMPONENTS]; + + if(1.5 == d_spatial_res_ratio) + { + au4_ref_pos_array_size[Y] = MB_SIZE * MB_SIZE; + au4_ref_phase_array_size[Y] = 5; + au4_ref_pos_array_size[U] = (MB_SIZE / 2) * (MB_SIZE / 2); + au4_ref_phase_array_size[U] = 5; + } + else + { + au4_ref_pos_array_size[Y] = au4_ref_pos_array_size[U] = 0; + au4_ref_phase_array_size[Y] = 0; + au4_ref_phase_array_size[U] = 3; + } + + for(j = u1_num_spatial_layers - 1; j >= 1; j--) + { + res_pred_layer_state_t *ps_layer = &ps_res_pred_state->ps_layer_state[j]; + + WORD32 i4_layer_luma_wd = + ((DOUBLE) u4_wd / pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - j)) + + 0.99; + WORD32 i4_layer_luma_ht = + ((DOUBLE) u4_ht / pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - j)) + + 0.99; + WORD32 i4_layer_luma_mbs = + (i4_layer_luma_wd / MB_SIZE) * (i4_layer_luma_ht / MB_SIZE); + WORD32 i4_layer_u_wd = i4_layer_luma_wd / 2.0 + 0.99; + WORD32 i4_layer_u_ht = i4_layer_luma_ht / 2.0 + 0.99; + WORD32 i4_layer_u_mbs = + (i4_layer_u_wd / (MB_SIZE / 2)) * (i4_layer_u_ht / (MB_SIZE / 2)); + + ps_layer->ps_luma_mb_states = (res_pred_mb_state_t *) pu1_buf; + aps_luma_mb_states[j] = ps_layer->ps_luma_mb_states; + pu1_buf += i4_layer_luma_mbs * sizeof(ps_layer->ps_luma_mb_states[0]); + i8_alloc_mem_size -= + u1_num_spatial_layers * sizeof(ps_layer->ps_luma_mb_states[0]); + + ps_layer->ps_chroma_mb_states = (res_pred_mb_state_t *) pu1_buf; + aps_chroma_mb_states[j] = ps_layer->ps_chroma_mb_states; + pu1_buf += i4_layer_u_mbs * sizeof(ps_layer->ps_chroma_mb_states[0]); + i8_alloc_mem_size -= i4_layer_u_mbs * sizeof(ps_layer->ps_chroma_mb_states[0]); + + if(1.5 == d_spatial_res_ratio) + { + coordinates_t *ps_ref_array_pos = (coordinates_t *) pu1_buf; + coordinates_t *ps_ref_array_phases = + ps_ref_array_pos + i4_layer_luma_mbs * au4_ref_pos_array_size[Y]; + + for(k = 0; k < i4_layer_luma_mbs; k++) + { + ps_layer->ps_luma_mb_states[k].ps_ref_array_positions = + ps_ref_array_pos + k * au4_ref_pos_array_size[Y]; + ps_layer->ps_luma_mb_states[k].ps_ref_array_phases = + ps_ref_array_phases + k * au4_ref_phase_array_size[Y]; + pu1_buf += au4_ref_pos_array_size[Y] * sizeof(ps_ref_array_pos[0]); + i8_alloc_mem_size -= + au4_ref_pos_array_size[Y] * sizeof(ps_ref_array_pos[0]); + pu1_buf += au4_ref_phase_array_size[Y] * sizeof(ps_ref_array_phases[0]); + i8_alloc_mem_size -= + au4_ref_phase_array_size[Y] * sizeof(ps_ref_array_phases[0]); + } + + ps_ref_array_pos = (coordinates_t *) pu1_buf; + ps_ref_array_phases = + ps_ref_array_pos + i4_layer_u_mbs * au4_ref_pos_array_size[U]; + + for(k = 0; k < i4_layer_u_mbs; k++) + { + ps_layer->ps_chroma_mb_states[k].ps_ref_array_positions = + ps_ref_array_pos + k * au4_ref_pos_array_size[U]; + ps_layer->ps_chroma_mb_states[k].ps_ref_array_phases = + ps_ref_array_phases + k * au4_ref_phase_array_size[U]; + pu1_buf += au4_ref_pos_array_size[U] * sizeof(ps_ref_array_pos[0]); + i8_alloc_mem_size -= + au4_ref_pos_array_size[U] * sizeof(ps_ref_array_pos[0]); + pu1_buf += au4_ref_phase_array_size[U] * sizeof(ps_ref_array_phases[0]); + i8_alloc_mem_size -= + au4_ref_phase_array_size[U] * sizeof(ps_ref_array_phases[0]); + } + } + else + { + coordinates_t *ps_ref_array_pos = NULL; + coordinates_t *ps_ref_array_phases = NULL; + + for(k = 0; k < i4_layer_luma_mbs; k++) + { + ps_layer->ps_luma_mb_states[k].ps_ref_array_positions = + ps_ref_array_pos; + ps_layer->ps_luma_mb_states[k].ps_ref_array_phases = + ps_ref_array_phases; + } + + ps_ref_array_pos = NULL; + ps_ref_array_phases = (coordinates_t *) pu1_buf; + + for(k = 0; k < i4_layer_u_mbs; k++) + { + ps_layer->ps_chroma_mb_states[k].ps_ref_array_positions = + ps_ref_array_pos; + ps_layer->ps_chroma_mb_states[k].ps_ref_array_phases = + ps_ref_array_phases; + } + + pu1_buf += au4_ref_phase_array_size[U] * sizeof(ps_ref_array_pos[0]); + i8_alloc_mem_size -= + au4_ref_phase_array_size[U] * sizeof(ps_ref_array_phases[0]); + } + + ASSERT(i8_alloc_mem_size >= 0); + /* Asserts below verify that + * 'ps_codec->s_svc_ilp_data.aps_layer_resampler_props' is initialised + */ + ASSERT(ps_codec->s_svc_ilp_data.aps_layer_resampler_props[Y][j].u4_mb_wd == + MB_SIZE); + ASSERT(ps_codec->s_svc_ilp_data.aps_layer_resampler_props[UV][j].u4_mb_wd == + (MB_SIZE / 2)); + + ps_layer->ps_luma_props = + &ps_codec->s_svc_ilp_data.aps_layer_resampler_props[Y][j]; + ps_layer->ps_chroma_props = + &ps_codec->s_svc_ilp_data.aps_layer_resampler_props[UV][j]; + + isvce_res_pred_layer_state_init(ps_layer, d_spatial_res_ratio, i4_layer_luma_wd, + i4_layer_luma_ht, + ps_codec->s_cfg.e_inp_color_fmt); + } + + for(j = u1_num_spatial_layers - 1; j >= 0; j--) + { + res_pred_layer_state_t *ps_layer = &ps_res_pred_state->ps_layer_state[j]; + + WORD32 i4_layer_luma_wd = + ((DOUBLE) u4_wd / pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - j)) + + 0.99; + WORD32 i4_layer_luma_ht = + ((DOUBLE) u4_ht / pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - j)) + + 0.99; + WORD32 i4_layer_luma_mbs = + ((i4_layer_luma_wd / MB_SIZE) + 2) * ((i4_layer_luma_ht / MB_SIZE) + 2); + + ps_layer->pi1_mb_mode = (WORD8 *) pu1_buf; + pu1_buf += i4_layer_luma_mbs * sizeof(WORD8); + memset(ps_layer->pi1_mb_mode, -1, i4_layer_luma_mbs); + + ps_layer->i4_mb_mode_stride = ai4_mb_mode_stride[j] = + (i4_layer_luma_wd / MB_SIZE) + 2; + ps_layer->pi1_mb_mode += 1 + ps_layer->i4_mb_mode_stride; + api1_mb_mode[j] = ps_layer->pi1_mb_mode; + } + } + else + { + for(j = u1_num_spatial_layers - 1; j >= 1; j--) + { + res_pred_layer_state_t *ps_layer = &ps_res_pred_state->ps_layer_state[j]; + + ps_layer->ps_luma_mb_states = aps_luma_mb_states[j]; + ps_layer->ps_chroma_mb_states = aps_chroma_mb_states[j]; + + ps_layer->ps_luma_props = + &ps_codec->s_svc_ilp_data.aps_layer_resampler_props[Y][j]; + ps_layer->ps_chroma_props = + &ps_codec->s_svc_ilp_data.aps_layer_resampler_props[UV][j]; + } + for(j = u1_num_spatial_layers - 1; j >= 0; j--) + { + res_pred_layer_state_t *ps_layer = &ps_res_pred_state->ps_layer_state[j]; + + ps_layer->pi1_mb_mode = api1_mb_mode[j]; + ps_layer->i4_mb_mode_stride = ai4_mb_mode_stride[j]; + } + } + + ps_mb_res_buf = &ps_res_pred_ctxt->s_res_pred_outputs.s_res_pred; + ps_mem_store = &ps_res_pred_state->s_mem_store; + ps_proc->ps_mb_res_buf = ps_mb_res_buf; + + for(j = 0; j < NUM_SP_COMPONENTS; j++) + { + buffer_container_t *ps_comp_buf = &ps_mb_res_buf->as_component_bufs[j]; + + UWORD8 u1_is_chroma = (Y != ((COMPONENT_TYPE) j)); + + ps_comp_buf->pv_data = pu1_buf; + ps_comp_buf->i4_data_stride = MB_SIZE; + pu1_buf += MB_SIZE * (MB_SIZE >> u1_is_chroma) * sizeof(WORD16); + i8_alloc_mem_size -= MB_SIZE * (MB_SIZE >> u1_is_chroma) * sizeof(WORD16); + } + + ps_mem_store->s_scratch.pv_data = pu1_buf; + ps_mem_store->s_scratch.i4_data_stride = REF_ARRAY_MAX_WIDTH; + pu1_buf += REF_ARRAY_MAX_WIDTH * REF_ARRAY_MAX_HEIGHT * sizeof(WORD16); + i8_alloc_mem_size -= REF_ARRAY_MAX_WIDTH * REF_ARRAY_MAX_HEIGHT * sizeof(WORD16); + + ASSERT(i8_alloc_mem_size >= 0); + + ps_mb_res_buf->as_component_bufs[V].pv_data = NULL; + ps_mb_res_buf->e_color_format = IV_YUV_420SP_UV; + ps_mb_res_buf->u1_bit_depth = 10; + ps_mb_res_buf->u4_width = MB_SIZE; + ps_mb_res_buf->u4_height = MB_SIZE; + + isvce_svc_residual_sampling_function_selector(ps_res_pred_state, d_spatial_res_ratio, + ps_codec->s_cfg.e_arch); + } + } + else + { + for(i = 0; i < i4_num_proc_ctxts; i++) + { + isvce_process_ctxt_t *ps_proc = ps_codec->as_process + i; + + ps_proc->ps_res_pred_ctxt = NULL; + + ps_proc->ps_mb_res_buf = (yuv_buf_props_t *) pu1_buf; + pu1_buf += sizeof(yuv_buf_props_t); + i8_alloc_mem_size -= sizeof(yuv_buf_props_t); + + for(j = 0; j < NUM_SP_COMPONENTS; j++) + { + buffer_container_t *ps_comp_buf = &ps_proc->ps_mb_res_buf->as_component_bufs[j]; + + UWORD8 u1_is_chroma = (Y != ((COMPONENT_TYPE) j)); + + ps_comp_buf->pv_data = pu1_buf; + ps_comp_buf->i4_data_stride = MB_SIZE; + pu1_buf += MB_SIZE * (MB_SIZE >> u1_is_chroma) * sizeof(WORD16); + i8_alloc_mem_size -= MB_SIZE * (MB_SIZE >> u1_is_chroma) * sizeof(WORD16); + } + + ASSERT(i8_alloc_mem_size >= 0); + } + } +} + +void isvce_get_mb_residual_pred(svc_res_pred_ctxt_t *ps_res_pred_ctxt) +{ + buffer_container_t s_inp; + buffer_container_t s_out; + coordinates_t s_frame_dims; + coordinates_t s_frame_dims_in_mbs; + coordinates_t s_ref_array_offsets; + svc_layer_data_t *ps_ref_layer_data; + res_pred_layer_state_t *ps_layer_state; + yuv_buf_props_t *ps_ref_residual_buf; + res_pred_mb_state_t *ps_luma_mb_state; + res_pred_mb_state_t *ps_chroma_mb_state; + isvce_mb_info_t *ps_ref_mb; + + WORD32 i; + + res_pred_constants_t *ps_res_pred_constants = &ps_res_pred_ctxt->s_res_pred_constants; + res_pred_variables_t *ps_res_pred_variables = &ps_res_pred_ctxt->s_res_pred_variables; + res_pred_outputs_t *ps_res_pred_outputs = &ps_res_pred_ctxt->s_res_pred_outputs; + res_pred_state_t *ps_res_pred_state = (res_pred_state_t *) ps_res_pred_constants->pv_state; + res_pred_mem_store_t *ps_mem_store = &ps_res_pred_state->s_mem_store; + svc_ilp_data_t *ps_svc_ilp_data = ps_res_pred_variables->ps_svc_ilp_data; + coordinates_t *ps_mb_pos = &ps_res_pred_variables->s_mb_pos; + + UWORD8 u1_spatial_layer_id = ps_res_pred_variables->u1_spatial_layer_id; + + ASSERT(u1_spatial_layer_id > 0); + + s_frame_dims.i4_abscissa = ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id].u4_width; + s_frame_dims.i4_ordinate = ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id].u4_height; + s_frame_dims_in_mbs.i4_abscissa = s_frame_dims.i4_abscissa / MB_SIZE; + s_frame_dims_in_mbs.i4_ordinate = s_frame_dims.i4_ordinate / MB_SIZE; + + ps_ref_layer_data = + &ps_svc_ilp_data->ps_svc_au_data->ps_svc_layer_data[u1_spatial_layer_id - 1]; + ps_layer_state = &ps_res_pred_state->ps_layer_state[u1_spatial_layer_id]; + ps_ref_residual_buf = &ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id - 1]; + ps_luma_mb_state = ps_layer_state->ps_luma_mb_states + ps_mb_pos->i4_abscissa + + ps_mb_pos->i4_ordinate * s_frame_dims_in_mbs.i4_abscissa; + ps_chroma_mb_state = ps_layer_state->ps_chroma_mb_states + ps_mb_pos->i4_abscissa + + ps_mb_pos->i4_ordinate * s_frame_dims_in_mbs.i4_abscissa; + + for(i = 0; i < NUM_COMPONENTS; i++) + { + res_pred_mb_state_t *ps_mb_state; + layer_resampler_props_t *ps_layer_props; + + UWORD8 u1_is_chroma = (Y != ((COMPONENT_TYPE) i)); + + ps_mb_state = u1_is_chroma ? ps_chroma_mb_state : ps_luma_mb_state; + ps_layer_props = + u1_is_chroma ? ps_layer_state->ps_chroma_props : ps_layer_state->ps_luma_props; + + /* Presence of appropriate padding is assumed */ + s_ref_array_offsets = ps_mb_state->s_offsets; + + s_inp = ps_ref_residual_buf->as_component_bufs[u1_is_chroma ? UV : Y]; + s_inp.pv_data = ((WORD16 *) s_inp.pv_data) + (V == ((COMPONENT_TYPE) i)) + + (s_ref_array_offsets.i4_abscissa << u1_is_chroma) + + s_ref_array_offsets.i4_ordinate * s_inp.i4_data_stride; + + s_out = ps_res_pred_outputs->s_res_pred.as_component_bufs[u1_is_chroma ? UV : Y]; + s_out.pv_data = ((WORD16 *) s_out.pv_data) + (V == ((COMPONENT_TYPE) i)); + + ps_ref_mb = + ps_ref_layer_data->ps_mb_info + + ((s_ref_array_offsets.i4_abscissa + (ps_mb_state->s_ref_array_dims.i4_abscissa / 2)) / + ps_layer_props->u4_mb_wd) + + ((s_ref_array_offsets.i4_ordinate + (ps_mb_state->s_ref_array_dims.i4_ordinate / 2)) / + ps_layer_props->u4_mb_ht) * + (s_frame_dims_in_mbs.i4_abscissa / 2); + + ps_res_pred_state->apf_residual_samplers[i]( + ps_mb_state->ps_ref_array_positions, ps_mb_state->ps_ref_array_phases, &s_inp, &s_out, + &ps_mem_store->s_scratch, UINT32_MAX, ps_ref_mb->u1_tx_size == 8); + } +} + +void isvce_get_ref_layer_mbtype_tx_size(WORD8 *pi1_ref_mb_modes, WORD32 i4_ref_mode_stride, + WORD32 i4_element_size, WORD32 i4_x_ref, WORD32 i4_y_ref, + WORD32 *pi4_mb_type, WORD32 *pi4_tx_size, + WORD32 i4_chroma_flag) +{ + WORD32 i4_mb_wd_sft, i4_mb_ht_sft; + WORD32 i4_mb_x, i4_mb_y; + WORD8 i1_mb_mode; + + if(i4_x_ref < 0) + { + i4_x_ref = 0; + } + if(i4_y_ref < 0) + { + i4_y_ref = 0; + } + + i4_mb_wd_sft = (MB_WIDTH_SHIFT - i4_chroma_flag); + i4_mb_ht_sft = (MB_HEIGHT_SHIFT - i4_chroma_flag); + i4_mb_x = (i4_x_ref >> i4_mb_wd_sft); + i4_mb_y = (i4_y_ref >> i4_mb_ht_sft); + + pi1_ref_mb_modes += (i4_mb_y * i4_ref_mode_stride * i4_element_size); + pi1_ref_mb_modes += (i4_mb_x * i4_element_size); + i1_mb_mode = *pi1_ref_mb_modes; + i1_mb_mode = (i1_mb_mode < 0) ? i1_mb_mode : SVC_EXTRACT_MB_MODE(*pi1_ref_mb_modes); + + if(i1_mb_mode <= SVC_INTER_MB) + { + *pi4_mb_type = SVC_INTER_MB; + *pi4_tx_size = GET_BIT_TX_SIZE(*pi1_ref_mb_modes, 1); + } + else + { + *pi4_mb_type = SVC_INTRA_MB; + *pi4_tx_size = 1; + } +} + +void isvce_ref_layer_ptr_incr(WORD8 *pi1_ref_mb_modes, WORD32 i4_ref_mode_stride, + WORD32 i4_element_size, WORD32 i4_x_offset, WORD32 i4_y_offset, + WORD32 i4_refary_wd, WORD32 i4_refary_ht, UWORD8 *pu1_ref_x_ptr_incr, + UWORD8 *pu1_ref_y_ptr_incr, WORD32 i4_chroma_flag) +{ + WORD32 i4_x, i4_y; + WORD32 i4_x_idx, i4_y_idx; + WORD32 i4_prev_x, i4_prev_y; + WORD32 i4_const_val; + WORD32 i4_pos_x, i4_pos_y; + WORD32 i4_trans_size; + WORD32 i4_mb_type, i4_tx_size; + WORD32 i4_act_ary_wd, i4_act_ary_ht; + WORD32 i4_and_const; + UWORD8 *pu1_incr_x, *pu1_incr_y; + + memset(pu1_ref_x_ptr_incr, 1, (i4_refary_wd * i4_refary_ht)); + memset(pu1_ref_y_ptr_incr, 1, (i4_refary_wd * i4_refary_ht)); + + i4_act_ary_wd = i4_refary_wd; + i4_act_ary_ht = i4_refary_ht; + + i4_x = 0; + i4_y = 0; + i4_prev_y = 0; + + if(0 == i4_chroma_flag) + { + do + { + WORD32 i4_x_ref, i4_y_ref; + WORD32 i4_idx; + WORD32 i4_wd, i4_ht; + WORD32 i4_max_pos_x, i4_max_pos_y; + + i4_prev_x = i4_x; + + i4_x_ref = i4_x_offset + i4_x; + i4_y_ref = i4_y_offset + i4_y; + + isvce_get_ref_layer_mbtype_tx_size(pi1_ref_mb_modes, i4_ref_mode_stride, + i4_element_size, i4_x_ref, i4_y_ref, &i4_mb_type, + &i4_tx_size, i4_chroma_flag); + + i4_trans_size = ((i4_tx_size + 1) << 2); + i4_const_val = i4_trans_size - 1; + i4_and_const = i4_const_val; + + /* Fill horizontal tx block edges of current reference mb with 0 */ + pu1_incr_x = pu1_ref_x_ptr_incr + i4_x; + pu1_incr_x += (i4_y * i4_refary_wd); + + i4_ht = (16 - (i4_y_ref & 0xF)); + i4_ht = MIN((i4_act_ary_ht - i4_y), i4_ht); + + i4_x_idx = i4_x; + + i4_pos_x = i4_x_ref & 0xF; + + i4_max_pos_x = 16; + i4_x += (16 - i4_pos_x); + + /* Get the transform block edge pos */ + i4_idx = (i4_const_val - (i4_pos_x & i4_and_const)); + + i4_x_idx += i4_idx; + + while((i4_pos_x < i4_max_pos_x) && (i4_x_idx < i4_act_ary_wd)) + { + WORD32 i4_i; + UWORD8 *pu1_incr; + + pu1_incr = pu1_incr_x + i4_idx; + + for(i4_i = 0; i4_i < i4_ht; i4_i++) + { /* Fill the block edge with 0s */ + *pu1_incr = 0; + pu1_incr += i4_refary_wd; + } + + i4_pos_x += i4_trans_size; + pu1_incr_x += i4_trans_size; + i4_x_idx += MIN(i4_trans_size, (i4_act_ary_wd - i4_x_idx)); + } + + /* Fill vertical tx block edges of current reference mb with 0 */ + pu1_incr_y = pu1_ref_y_ptr_incr + i4_prev_x; + pu1_incr_y += (i4_y * i4_refary_wd); + + i4_wd = (16 - (i4_x_ref & 0xF)); + i4_wd = MIN((i4_act_ary_wd - i4_prev_x), i4_wd); + + i4_y_idx = i4_y; + + i4_pos_y = i4_y_ref & 0xF; + + i4_max_pos_y = 16; + i4_y += (16 - i4_pos_y); + + /* Get the transform block edge pos */ + i4_idx = (i4_const_val - (i4_pos_y & i4_and_const)); + + i4_y_idx += i4_idx; + + while((i4_pos_y < i4_max_pos_y) && (i4_y_idx < i4_act_ary_ht)) + { + WORD32 i4_i; + UWORD8 *pu1_incr; + + pu1_incr = pu1_incr_y + i4_idx * i4_refary_wd; + + for(i4_i = 0; i4_i < i4_wd; i4_i++) + { /* Fill the block edge with 0s */ + *pu1_incr = 0; + pu1_incr++; + } + + i4_pos_y += i4_trans_size; + pu1_incr_y += i4_trans_size * i4_refary_wd; + i4_y_idx += MIN(i4_trans_size, (i4_act_ary_ht - i4_y_idx)); + } + + if(i4_x < i4_act_ary_wd) + { + i4_y = i4_prev_y; + } + else if(i4_y < i4_act_ary_ht) + { + i4_prev_y = i4_y; + i4_x = 0; + } + } while((i4_y < i4_act_ary_ht) || (i4_x < i4_act_ary_wd)); + } + else + { + i4_trans_size = 4; + i4_const_val = 3; + + do + { + WORD32 i4_x_ref, i4_y_ref; + WORD32 i4_idx; + WORD32 i4_wd, i4_ht; + WORD32 i4_max_pos_x, i4_max_pos_y; + + i4_prev_x = i4_x; + + i4_x_ref = i4_x_offset + i4_x; + i4_y_ref = i4_y_offset + i4_y; + + /* Fill horizontal tx block edges of current reference mb with 0 */ + pu1_incr_x = pu1_ref_x_ptr_incr + i4_x; + pu1_incr_x += (i4_y * i4_refary_wd); + + i4_ht = (8 - (i4_y_ref & 0x7)); + i4_ht = MIN((i4_act_ary_ht - i4_y), i4_ht); + + i4_x_idx = i4_x; + + i4_pos_x = i4_x_ref & 0x7; + + i4_max_pos_x = 8; + i4_x += (8 - i4_pos_x); + + /* Get the transform block edge pos */ + i4_idx = (i4_const_val - (i4_pos_x & 0x3)); + + i4_x_idx += i4_idx; + + while((i4_pos_x < i4_max_pos_x) && (i4_x_idx < i4_act_ary_wd)) + { + WORD32 i4_i; + UWORD8 *pu1_incr; + + pu1_incr = pu1_incr_x + i4_idx; + + for(i4_i = 0; i4_i < i4_ht; i4_i++) + { /* Fill the block edge with 0s */ + *pu1_incr = 0; + pu1_incr += i4_refary_wd; + } + + i4_pos_x += i4_trans_size; + pu1_incr_x += i4_trans_size; + i4_x_idx += MIN(i4_trans_size, (i4_act_ary_wd - i4_x_idx)); + } + + /* Fill vertical tx block edges of current reference mb with 0 */ + pu1_incr_y = pu1_ref_y_ptr_incr + i4_prev_x; + pu1_incr_y += (i4_y * i4_refary_wd); + + i4_wd = (8 - (i4_x_ref & 0x7)); + i4_wd = MIN((i4_act_ary_wd - i4_prev_x), i4_wd); + + i4_y_idx = i4_y; + + i4_pos_y = i4_y_ref & 0x7; + + i4_max_pos_y = 8; + i4_y += (8 - i4_pos_y); + + /* Get the transform block edge pos */ + i4_idx = (i4_const_val - (i4_pos_y & 0x3)); + + i4_y_idx += i4_idx; + + while((i4_pos_y < i4_max_pos_y) && (i4_y_idx < i4_act_ary_ht)) + { + WORD32 i4_i; + UWORD8 *pu1_incr; + + pu1_incr = pu1_incr_y + i4_idx * i4_refary_wd; + + for(i4_i = 0; i4_i < i4_wd; i4_i++) + { /* Fill the block edge with 0s */ + *pu1_incr = 0; + pu1_incr++; + } + + i4_pos_y += i4_trans_size; + pu1_incr_y += i4_trans_size * i4_refary_wd; + i4_y_idx += MIN(i4_trans_size, (i4_act_ary_ht - i4_y_idx)); + } + + if(i4_x < i4_act_ary_wd) + { + i4_y = i4_prev_y; + } + else if(i4_y < i4_act_ary_ht) + { + i4_prev_y = i4_y; + i4_x = 0; + } + } while((i4_y < i4_act_ary_ht) || (i4_x < i4_act_ary_wd)); + } +} + +void isvce_residual_reflayer_const(svc_res_pred_ctxt_t *ps_res_pred_ctxt, WORD16 *pi2_inp_data, + WORD32 i4_inp_data_stride, WORD8 *ps_ref_mb_mode, + WORD32 i4_ref_mb_mode_stride, WORD32 *pi4_refarr_wd, + WORD32 i4_chroma_flag) +{ + WORD8 *pi1_ref_mb_modes; + WORD32 i4_ref_mode_stride; + + WORD32 i4_x, i4_y; + WORD32 i4_ref_wd; + WORD32 i4_ref_ht; + WORD32 i4_x_offset; + WORD32 i4_y_offset; + WORD32 i4_refarray_wd; + WORD32 i4_refarray_ht; + + WORD16 *pi2_ref_array; + + res_pred_mb_state_t *ps_mb_states; + res_pred_layer_state_t *ps_layer_state; + + res_pred_constants_t *ps_res_pred_constants = &ps_res_pred_ctxt->s_res_pred_constants; + res_pred_variables_t *ps_res_pred_variables = &ps_res_pred_ctxt->s_res_pred_variables; + res_pred_state_t *ps_res_pred_state = (res_pred_state_t *) ps_res_pred_constants->pv_state; + res_pred_mem_store_t *ps_mem_store = &ps_res_pred_state->s_mem_store; + svc_ilp_data_t *ps_svc_ilp_data = ps_res_pred_variables->ps_svc_ilp_data; + coordinates_t *ps_mb_pos = &ps_res_pred_variables->s_mb_pos; + + UWORD8 u1_spatial_layer_id = ps_res_pred_variables->u1_spatial_layer_id; + + ps_layer_state = &ps_res_pred_state->ps_layer_state[u1_spatial_layer_id]; + pi2_ref_array = (WORD16 *) ps_mem_store->s_scratch.pv_data; + + pi1_ref_mb_modes = (WORD8 *) ps_ref_mb_mode; + i4_ref_mode_stride = i4_ref_mb_mode_stride; + + ASSERT(NULL != pi1_ref_mb_modes); + + { + WORD32 i4_base_width; + WORD32 i4_base_height; + + coordinates_t s_frame_dims, s_frame_dims_in_mbs; + + s_frame_dims.i4_abscissa = ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id].u4_width; + s_frame_dims.i4_ordinate = ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id].u4_height; + s_frame_dims_in_mbs.i4_abscissa = s_frame_dims.i4_abscissa / MB_SIZE; + s_frame_dims_in_mbs.i4_ordinate = s_frame_dims.i4_ordinate / MB_SIZE; + + ps_mb_states = i4_chroma_flag ? ps_layer_state->ps_chroma_mb_states + : ps_layer_state->ps_luma_mb_states; + + ps_mb_states += + ps_mb_pos->i4_abscissa + ps_mb_pos->i4_ordinate * s_frame_dims_in_mbs.i4_abscissa; + + i4_base_width = ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id - 1].u4_width; + i4_base_height = ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id - 1].u4_height; + + i4_ref_wd = i4_base_width >> i4_chroma_flag; + i4_ref_ht = i4_base_height >> i4_chroma_flag; + + i4_x_offset = ps_mb_states->s_offsets.i4_abscissa; + i4_y_offset = ps_mb_states->s_offsets.i4_ordinate; + i4_refarray_wd = ps_mb_states->s_ref_array_dims.i4_abscissa; + i4_refarray_ht = ps_mb_states->s_ref_array_dims.i4_ordinate; + } + + { + isvce_ref_layer_ptr_incr(pi1_ref_mb_modes, i4_ref_mode_stride, 1, i4_x_offset, i4_y_offset, + i4_refarray_wd, i4_refarray_ht, + ps_res_pred_state->pu1_ref_x_ptr_incr, + ps_res_pred_state->pu1_ref_y_ptr_incr, i4_chroma_flag); + } + + for(i4_y = 0; i4_y < i4_refarray_ht; i4_y++) + { + for(i4_x = 0; i4_x < i4_refarray_wd; i4_x++) + { + WORD32 i4_x_ref; + WORD32 i4_y_ref; + WORD32 i4_ref_mb_type, i4_ref_tx_size; + WORD16 *pi2_ref_data_byte; + WORD16 *pi2_ref_array_temp; + + i4_x_ref = MAX(0, MIN(i4_ref_wd - 1, i4_x + i4_x_offset)); + i4_y_ref = MAX(0, MIN(i4_ref_ht - 1, i4_y + i4_y_offset)); + + isvce_get_ref_layer_mbtype_tx_size(pi1_ref_mb_modes, i4_ref_mode_stride, 1, i4_x_ref, + i4_y_ref, &i4_ref_mb_type, &i4_ref_tx_size, + i4_chroma_flag); + + if(0 <= i4_x_offset) + { + i4_x_ref = i4_x_ref - i4_x_offset; + } + + if(0 <= i4_y_offset) + { + i4_y_ref = i4_y_ref - i4_y_offset; + } + + pi2_ref_array_temp = pi2_ref_array + i4_x; + pi2_ref_array_temp += i4_y * i4_refarray_wd; + + if(SVC_INTER_MB == i4_ref_mb_type) + { + pi2_ref_data_byte = pi2_inp_data + (i4_x_ref << i4_chroma_flag); + pi2_ref_data_byte += i4_y_ref * i4_inp_data_stride; + + *pi2_ref_array_temp = (WORD16) (*pi2_ref_data_byte); + } + else + { + *pi2_ref_array_temp = 0; + } + } + } + *pi4_refarr_wd = i4_refarray_wd; +} + +void isvce_interpolate_residual(svc_res_pred_ctxt_t *ps_res_pred_ctxt, WORD16 *pi2_out, + WORD32 i4_out_stride, WORD32 i4_refarray_wd, WORD32 i4_chroma_flag, + coordinates_t *ps_mb_pos) +{ + res_pred_constants_t *ps_res_pred_constants = &ps_res_pred_ctxt->s_res_pred_constants; + res_pred_variables_t *ps_res_pred_variables = &ps_res_pred_ctxt->s_res_pred_variables; + res_pred_state_t *ps_res_pred_state = (res_pred_state_t *) ps_res_pred_constants->pv_state; + res_pred_mem_store_t *ps_mem_store = &ps_res_pred_state->s_mem_store; + + WORD32 i4_x, i4_y; + WORD32 i4_temp_array_ht; + WORD32 i4_mb_wd; + WORD32 i4_mb_ht; + WORD16 *pi2_ref_array; + UWORD8 *pu1_ref_x_ptr_incr, *pu1_ref_y_ptr_incr; + + coordinates_t *ps_phase; + coordinates_t *ps_pos; + res_pred_mb_state_t *ps_mb_states; + + coordinates_t s_frame_dims; + coordinates_t s_frame_dims_in_mbs; + + UWORD8 u1_spatial_layer_id = ps_res_pred_variables->u1_spatial_layer_id; + + svc_ilp_data_t *ps_svc_ilp_data = ps_res_pred_variables->ps_svc_ilp_data; + + res_pred_mb_state_t *ps_mb_state; + + s_frame_dims.i4_abscissa = ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id].u4_width; + s_frame_dims.i4_ordinate = ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id].u4_height; + s_frame_dims_in_mbs.i4_abscissa = s_frame_dims.i4_abscissa / MB_SIZE; + s_frame_dims_in_mbs.i4_ordinate = s_frame_dims.i4_ordinate / MB_SIZE; + + pu1_ref_x_ptr_incr = ps_res_pred_state->pu1_ref_x_ptr_incr; + pu1_ref_y_ptr_incr = ps_res_pred_state->pu1_ref_y_ptr_incr; + + ps_mb_states = i4_chroma_flag + ? ps_res_pred_state->ps_layer_state[u1_spatial_layer_id].ps_chroma_mb_states + : ps_res_pred_state->ps_layer_state[u1_spatial_layer_id].ps_luma_mb_states; + + i4_mb_wd = MB_SIZE >> i4_chroma_flag; + i4_mb_ht = MB_SIZE >> i4_chroma_flag; + + ps_mb_state = &ps_mb_states[ps_mb_pos->i4_abscissa + + (ps_mb_pos->i4_ordinate * s_frame_dims_in_mbs.i4_abscissa)]; + + ps_phase = ps_mb_state->ps_ref_array_phases; + ps_pos = ps_mb_state->ps_ref_array_positions; + + i4_temp_array_ht = i4_mb_ht; + + pi2_ref_array = (WORD16 *) ps_mem_store->s_scratch.pv_data; + + for(i4_y = 0; i4_y < i4_temp_array_ht; i4_y++) + { + for(i4_x = 0; i4_x < i4_mb_wd; i4_x++) + { + WORD32 i4_i; + WORD32 i4_y_ref; + WORD32 i4_y_phase; + WORD32 i4_x_ref; + WORD32 i4_x_phase; + WORD32 i4_x_ref_round; + WORD16 *pi2_out_curr; + WORD32 ai4_temp_pred[2]; + UWORD8 *pu1_ref_y_ptr_incr_temp; + WORD32 *pi4_temp_pred; + UWORD8 u1_incr_y; + WORD16 i2_res; + + pi2_out_curr = pi2_out + (i4_x << i4_chroma_flag) + (i4_y * i4_out_stride); + + i4_y_ref = ps_pos[(i4_mb_wd * i4_y) + i4_x].i4_ordinate; + i4_y_phase = ps_phase[((i4_y % 3) > 0) * 2 + (i4_y % 3)].i4_ordinate; + + i4_x_ref = ps_pos[(i4_mb_wd * i4_y) + i4_x].i4_abscissa; + i4_x_phase = ps_phase[i4_x % 3].i4_abscissa; + + /* horizontal processing*/ + for(i4_i = 0; i4_i < 2; i4_i++) + { + UWORD8 *pu1_ref_x_ptr_incr_temp; + UWORD8 u1_incr; + WORD16 *pi2_ref_array_1, *pi2_ref_array_2; + + pu1_ref_x_ptr_incr_temp = pu1_ref_x_ptr_incr + i4_x_ref; + pu1_ref_x_ptr_incr_temp += ((i4_y_ref + i4_i) * i4_refarray_wd); + u1_incr = *pu1_ref_x_ptr_incr_temp; + + pi2_ref_array_1 = pi2_ref_array + i4_x_ref; + pi2_ref_array_1 += ((i4_y_ref + i4_i) * i4_refarray_wd); + + if(!u1_incr) + { + pi2_ref_array_1 += (i4_x_phase >> 3); + } + + pi2_ref_array_2 = pi2_ref_array_1 + u1_incr; + + ai4_temp_pred[i4_i] = + (16 - i4_x_phase) * (*pi2_ref_array_1) + i4_x_phase * (*pi2_ref_array_2); + } + + /* vertical processing */ + i4_x_ref_round = (i4_x_ref + (i4_x_phase >> 3)); + + pu1_ref_y_ptr_incr_temp = + pu1_ref_y_ptr_incr + i4_x_ref_round + (i4_y_ref * i4_refarray_wd); + u1_incr_y = *pu1_ref_y_ptr_incr_temp; + + pi4_temp_pred = &ai4_temp_pred[0]; + if(!u1_incr_y) + { + pi4_temp_pred += (i4_y_phase >> 3); + } + i2_res = (((16 - i4_y_phase) * pi4_temp_pred[0] + + i4_y_phase * pi4_temp_pred[u1_incr_y] + 128) >> + 8); + *pi2_out_curr = i2_res; + } + } +} + +void isvce_get_mb_residual_pred_non_dyadic(svc_res_pred_ctxt_t *ps_res_pred_ctxt) +{ + buffer_container_t s_inp; + buffer_container_t s_out; + coordinates_t s_frame_dims; + coordinates_t s_frame_dims_in_mbs; + coordinates_t s_ref_array_offsets; + res_pred_layer_state_t *ps_layer_state, *ps_ref_layer_state; + yuv_buf_props_t *ps_ref_residual_buf; + res_pred_mb_state_t *ps_luma_mb_state; + res_pred_mb_state_t *ps_chroma_mb_state; + + WORD16 *pi2_inp, *pi2_out; + WORD32 i4_inp_stride, i4_out_stride; + + res_pred_constants_t *ps_res_pred_constants = &ps_res_pred_ctxt->s_res_pred_constants; + res_pred_variables_t *ps_res_pred_variables = &ps_res_pred_ctxt->s_res_pred_variables; + res_pred_outputs_t *ps_res_pred_outputs = &ps_res_pred_ctxt->s_res_pred_outputs; + res_pred_state_t *ps_res_pred_state = (res_pred_state_t *) ps_res_pred_constants->pv_state; + svc_ilp_data_t *ps_svc_ilp_data = ps_res_pred_variables->ps_svc_ilp_data; + coordinates_t *ps_mb_pos = &ps_res_pred_variables->s_mb_pos; + + UWORD8 u1_spatial_layer_id = ps_res_pred_variables->u1_spatial_layer_id; + + WORD32 i4_refarray_wd; + + WORD32 i; + + ASSERT(u1_spatial_layer_id > 0); + + s_frame_dims.i4_abscissa = ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id].u4_width; + s_frame_dims.i4_ordinate = ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id].u4_height; + s_frame_dims_in_mbs.i4_abscissa = s_frame_dims.i4_abscissa / MB_SIZE; + s_frame_dims_in_mbs.i4_ordinate = s_frame_dims.i4_ordinate / MB_SIZE; + + ps_layer_state = &ps_res_pred_state->ps_layer_state[u1_spatial_layer_id]; + ps_ref_layer_state = &ps_res_pred_state->ps_layer_state[u1_spatial_layer_id - 1]; + ps_ref_residual_buf = &ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id - 1]; + ps_luma_mb_state = ps_layer_state->ps_luma_mb_states + ps_mb_pos->i4_abscissa + + ps_mb_pos->i4_ordinate * s_frame_dims_in_mbs.i4_abscissa; + ps_chroma_mb_state = ps_layer_state->ps_chroma_mb_states + ps_mb_pos->i4_abscissa + + ps_mb_pos->i4_ordinate * s_frame_dims_in_mbs.i4_abscissa; + + for(i = 0; i < NUM_COMPONENTS; i++) + { + res_pred_mb_state_t *ps_mb_state; + + UWORD8 u1_is_chroma = (Y != ((COMPONENT_TYPE) i)); + + ps_mb_state = u1_is_chroma ? ps_chroma_mb_state : ps_luma_mb_state; + + s_ref_array_offsets.i4_abscissa = + MAX(0, MIN(ps_mb_state->s_offsets.i4_abscissa, + (s_frame_dims.i4_abscissa >> u1_is_chroma) - 1)); + s_ref_array_offsets.i4_ordinate = + MAX(0, MIN(ps_mb_state->s_offsets.i4_ordinate, + (s_frame_dims.i4_ordinate >> u1_is_chroma) - 1)); + + s_inp = ps_ref_residual_buf->as_component_bufs[u1_is_chroma ? UV : Y]; + s_inp.pv_data = ((WORD16 *) s_inp.pv_data) + (V == ((COMPONENT_TYPE) i)) + + (s_ref_array_offsets.i4_abscissa << u1_is_chroma) + + s_ref_array_offsets.i4_ordinate * s_inp.i4_data_stride; + + s_out = ps_res_pred_outputs->s_res_pred.as_component_bufs[u1_is_chroma ? UV : Y]; + s_out.pv_data = ((WORD16 *) s_out.pv_data) + (V == ((COMPONENT_TYPE) i)); + + pi2_inp = (WORD16 *) s_inp.pv_data; + pi2_out = (WORD16 *) s_out.pv_data; + + i4_inp_stride = s_inp.i4_data_stride; + i4_out_stride = s_out.i4_data_stride; + + /* ------- Constructing refSampleArray ----------------------- */ + isvce_residual_reflayer_const( + ps_res_pred_ctxt, pi2_inp, i4_inp_stride, ps_ref_layer_state->pi1_mb_mode, + ps_ref_layer_state->i4_mb_mode_stride, &i4_refarray_wd, u1_is_chroma); + + /* ---- Interpolation process for Residual prediction ------ */ + isvce_interpolate_residual(ps_res_pred_ctxt, pi2_out, i4_out_stride, i4_refarray_wd, + u1_is_chroma, ps_mb_pos); + } +} + +UWORD32 isvce_get_sad_with_residual_pred(buffer_container_t *ps_src, buffer_container_t *ps_pred, + buffer_container_t *ps_res, UWORD32 u4_mb_wd, + UWORD32 u4_mb_ht) +{ + UWORD32 i, j; + + UWORD32 u4_sad = 0; + + for(i = 0; i < u4_mb_ht; i++) + { + for(j = 0; j < u4_mb_wd; j++) + { + WORD16 i2_src = ((UWORD8 *) ps_src->pv_data)[j + i * ps_src->i4_data_stride]; + WORD16 i2_pred = ((UWORD8 *) ps_pred->pv_data)[j + i * ps_pred->i4_data_stride]; + WORD16 i2_res = ((WORD16 *) ps_res->pv_data)[j + i * ps_res->i4_data_stride]; + + u4_sad += ABS(i2_src - i2_pred - i2_res); + } + } + return u4_sad; +} + +/** +******************************************************************************* +* +* @brief +* Function to evaluate residual_prediction_flag +* +* @param[in] ps_src +* Pointer to MB src buffers +* +* @param[in] ps_pred +* Pointer to MB pred buffers +* +* @param[in] ps_res +* Pointer to MB res buffers +* +* @param[out] pu4_res_pred_sad +* Output variable for SAD +* +* @param[out] pu1_residual_prediction_flag +* Output variable for residual_prediction_flag +* +* @param[in] u4_winning_sad +* Winning mode's SAD +* +* @notes The algorithm currently uses only luma for evaluating +* residual_prediction_flag. +* +******************************************************************************* +*/ +void isvce_residual_pred_eval(svc_res_pred_ctxt_t *ps_res_pred_ctxt, yuv_buf_props_t *ps_src, + yuv_buf_props_t *ps_pred, yuv_buf_props_t *ps_res, + UWORD32 *pu4_res_pred_sad, UWORD8 *pu1_residual_prediction_flag, + UWORD32 u4_winning_sad) +{ + res_pred_constants_t *ps_res_pred_constants = &ps_res_pred_ctxt->s_res_pred_constants; + res_pred_state_t *ps_res_pred_state = (res_pred_state_t *) ps_res_pred_constants->pv_state; + pu4_res_pred_sad[0] = ps_res_pred_state->pf_get_sad_with_residual_pred( + &ps_src->as_component_bufs[Y], &ps_pred->as_component_bufs[Y], + &ps_res->as_component_bufs[Y], MB_SIZE, MB_SIZE); + + pu1_residual_prediction_flag[0] = pu4_res_pred_sad[0] < u4_winning_sad; +} + +void isvce_update_res_pred_info(isvce_process_ctxt_t *ps_proc) +{ + if(ps_proc->s_svc_params.u1_num_spatial_layers > 1) + { + svc_res_pred_ctxt_t *ps_res_pred_ctxt = ps_proc->ps_res_pred_ctxt; + res_pred_constants_t *ps_res_pred_constants = &ps_res_pred_ctxt->s_res_pred_constants; + res_pred_state_t *ps_res_pred_state = (res_pred_state_t *) ps_res_pred_constants->pv_state; + res_pred_layer_state_t *ps_layer_state = + &ps_res_pred_state->ps_layer_state[ps_proc->u1_spatial_layer_id]; + + WORD8 i1_is_intra = ps_proc->ps_mb_info->u1_is_intra; + + WORD8 *pi1_mb_mode = + &ps_layer_state->pi1_mb_mode[ps_proc->i4_mb_x + + (ps_proc->i4_mb_y * (ps_layer_state->i4_mb_mode_stride))]; + + if(ps_proc->ps_mb_info->u1_base_mode_flag == 1 && i1_is_intra) + { + *pi1_mb_mode = SVC_IBL_MB; + } + else + { + if(i1_is_intra) + { + *pi1_mb_mode = SVC_INTRA_MB; + } + else + { + *pi1_mb_mode = SVC_INTER_MB; + } + } + } +} diff --git a/encoder/svc/isvce_residual_pred.h b/encoder/svc/isvce_residual_pred.h new file mode 100644 index 0000000..d0ef076 --- /dev/null +++ b/encoder/svc/isvce_residual_pred.h @@ -0,0 +1,97 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** +******************************************************************************* +* @file +* isvce_residual_pred.h +* +* @brief +* Contains function declarations for function declared in +*isvce_residual_pred.c +* +* @author +* ittiam +* +* @remarks +* None +* +******************************************************************************* +*/ + +#ifndef _ISVCE_RESIDUAL_PRED_H_ +#define _ISVCE_RESIDUAL_PRED_H_ + +#include "ih264_typedefs.h" +#include "isvc_macros.h" +#include "ih264_debug.h" +#include "isvc_defs.h" +#include "isvc_structs.h" +#include "isvce_structs.h" +#include "isvce_structs.h" + +/* Structs */ +typedef struct res_pred_constants_t +{ + void *pv_state; +} res_pred_constants_t; + +typedef struct res_pred_outputs_t +{ + yuv_buf_props_t s_res_pred; +} res_pred_outputs_t; + +typedef struct res_pred_variables_t +{ + svc_ilp_data_t *ps_svc_ilp_data; + + coordinates_t s_mb_pos; + + UWORD8 u1_spatial_layer_id; +} res_pred_variables_t; + +typedef struct svc_res_pred_ctxt_t +{ + res_pred_constants_t s_res_pred_constants; + + res_pred_variables_t s_res_pred_variables; + + res_pred_outputs_t s_res_pred_outputs; + +} svc_res_pred_ctxt_t; + +extern UWORD32 isvce_get_svc_res_pred_ctxt_size(UWORD8 u1_num_spatial_layers, + DOUBLE d_spatial_res_ratio, UWORD32 u4_wd, + UWORD32 u4_ht); + +extern void isvce_svc_res_pred_ctxt_init(isvce_codec_t *ps_codec, iv_mem_rec_t *ps_mem_rec); + +extern void isvce_get_mb_residual_pred(svc_res_pred_ctxt_t *ps_res_pred_ctxt); + +extern void isvce_get_mb_residual_pred_non_dyadic(svc_res_pred_ctxt_t *ps_res_pred_ctxt); + +extern void isvce_residual_pred_eval(svc_res_pred_ctxt_t *ps_res_pred_ctxt, yuv_buf_props_t *ps_src, + yuv_buf_props_t *ps_pred, yuv_buf_props_t *ps_res, + UWORD32 *pu4_res_pred_sad, + UWORD8 *pu1_residual_prediction_flag, UWORD32 u4_winning_sad); + +extern void isvce_update_res_pred_info(isvce_process_ctxt_t *ps_proc); + +#endif diff --git a/encoder/svc/isvce_structs.h b/encoder/svc/isvce_structs.h new file mode 100644 index 0000000..1acf2ed --- /dev/null +++ b/encoder/svc/isvce_structs.h @@ -0,0 +1,2584 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** +******************************************************************************* +* @file +* isvce_structs.h +* +* @brief +* Contains struct definition used for SVC encoding +* +* @author +* ittiam +* +* @remarks +* None +* +******************************************************************************* +*/ + +#ifndef _ISVCE_STRUCTS_H_ +#define _ISVCE_STRUCTS_H_ + +#include "ih264_typedefs.h" +#include "iv2.h" +#include "ive2.h" +#include "ih264_defs.h" +#include "ih264_deblk_edge_filters.h" +#include "isvc_inter_pred_filters.h" +#include "ithread.h" +#include "isvc_defs.h" +#include "isvc_mem_fns.h" +#include "isvc_cabac_tables.h" +#include "isvc_trans_quant_itrans_iquant.h" + +/* Dependencies of ime_structs.h */ +#include "ime_defs.h" +#include "ime_distortion_metrics.h" + +/* Dependencies of ih264e_cabac_structs.h */ +#include "ih264_cabac_tables.h" + +/* Dependencies of ih264e_structs.h */ +#include "ih264e_error.h" +#include "ih264_trans_quant_itrans_iquant.h" +#include "ih264_inter_pred_filters.h" +#include "ih264e_bitstream.h" +#include "ih264e_cabac_structs.h" +#include "ih264e_defs.h" +#include "ime_structs.h" +#include "irc_cntrl_param.h" +#include "irc_frame_info_collector.h" + +#include "ih264e_structs.h" +#include "isvce_cabac_structs.h" +#include "isvce_defs.h" +#include "isvce_downscaler.h" +#include "isvce_interface_structs.h" +#include "isvce_nalu_stat_aggregator.h" +#include "isvce_pred_structs.h" +#include "isvce_rc_utils.h" + +#include "irc_cntrl_param.h" +#include "irc_frame_info_collector.h" + +typedef struct svc_params_t +{ + /** + * Num Temporal Layers + */ + UWORD8 u1_num_temporal_layers; + + /** + * Num Spatial Layers + */ + UWORD8 u1_num_spatial_layers; + + /** + * Resolution ration b/w spatial layers + */ + DOUBLE d_spatial_res_ratio; + +} svc_params_t; + +typedef struct svc_layer_data_t +{ + /** + * Array of structs that contain mode_info per MB for every MB per layer + */ + isvce_mb_info_t *ps_mb_info; + + UWORD32 *pu4_num_pus_in_mb; + +} svc_layer_data_t; + +typedef struct svc_au_data_t +{ + /** + * Array of structs that contain layer-wise data used for svc prediction + */ + svc_layer_data_t *ps_svc_layer_data; + + /** + * Absolute POC for the current MV Bank + */ + WORD32 i4_abs_poc; + + /** + * Buffer Id + */ + WORD32 i4_buf_id; + +} svc_au_data_t; + +typedef struct isvce_inp_buf_t +{ + /* App's buffer */ + isvce_raw_inp_buf_t s_inp_props; + + /* A copy of SVC parameters */ + svc_params_t s_svc_params; + + /** + * Array of structs that contain properties of the buffers used for storing + * layer-wise YUV data + */ + yuv_buf_props_t as_layer_yuv_buf_props[MAX_NUM_SPATIAL_LAYERS]; + +} isvce_inp_buf_t; + +typedef struct mb_intra_modes_t +{ + UWORD8 au1_intra_modes[MAX_PU_IN_MB]; +} mb_intra_modes_t; + +typedef struct nbr_info_t +{ + isvce_mb_info_t *ps_top_row_mb_info; + + isvce_mb_info_t *ps_left_mb_info; + + mb_intra_modes_t *ps_top_mb_intra_modes; + + mb_intra_modes_t *ps_left_mb_intra_modes; + +} nbr_info_t; + +typedef struct svc_nbr_info_t +{ + /** + * Array of structs that contain properties of the buffers used for storing + * layer-wise neighbour info + */ + nbr_info_t *ps_layer_nbr_info; +} svc_nbr_info_t; + +typedef struct layer_resampler_props_t +{ + UWORD32 u4_shift_x; + + UWORD32 u4_shift_y; + + UWORD32 u4_scale_x; + + UWORD32 u4_scale_y; + + WORD32 i4_offset_x; + + WORD32 i4_offset_y; + + WORD32 i4_add_x; + + WORD32 i4_add_y; + + WORD32 i4_delta_x; + + WORD32 i4_delta_y; + + WORD32 i4_refphase_x; + + WORD32 i4_refphase_y; + + WORD32 i4_phase_x; + + WORD32 i4_phase_y; + + UWORD32 u4_sub_wd; + + UWORD32 u4_sub_ht; + + UWORD32 u4_mb_wd; + + UWORD32 u4_mb_ht; + +} layer_resampler_props_t; + +typedef struct svc_ilp_data_t +{ + /* Pointer to current AU buf */ + svc_au_data_t *ps_svc_au_data; + + /* Array of bufs corresponding to numSpatialLayers */ + layer_resampler_props_t *aps_layer_resampler_props[NUM_SP_COMPONENTS]; + + /* Array of bufs corresponding to numSpatialLayers */ + yuv_buf_props_t *ps_intra_recon_bufs; + + /* Array of bufs corresponding to numSpatialLayers */ + yuv_buf_props_t *ps_residual_bufs; +} svc_ilp_data_t; + +typedef struct ilp_mv_t +{ + isvce_enc_pu_mv_t as_mv[ENC_MAX_PU_IN_MB][NUM_PRED_DIRS]; + + MBTYPES_T e_mb_type; + + PRED_MODE_T ae_pred_mode[ENC_MAX_PU_IN_MB]; +} ilp_mv_t; + +typedef struct ilp_me_cands_t +{ + isvce_enc_pu_mv_t as_mv[MAX_PU_IN_MB + MAX_ILP_MV_IN_NBR_RGN][NUM_PRED_DIRS]; + + MBTYPES_T e_mb_type[MAX_PU_IN_MB + MAX_ILP_MV_IN_NBR_RGN]; + + PRED_MODE_T ae_pred_mode[MAX_PU_IN_MB + MAX_ILP_MV_IN_NBR_RGN]; + + UWORD32 u4_num_ilp_mvs; + + UWORD32 u4_num_ilp_mvs_incl_nbrs; +} ilp_me_cands_t; + +typedef struct isvce_cfg_params_t +{ + /** maximum width for which codec should request memory requirements */ + UWORD32 u4_max_wd; + + /** maximum height for which codec should request memory requirements */ + UWORD32 u4_max_ht; + + /** Maximum number of reference frames */ + UWORD32 u4_max_ref_cnt; + + /** Maximum number of reorder frames */ + UWORD32 u4_max_reorder_cnt; + + /** Maximum level supported */ + UWORD32 u4_max_level; + + /** Input color format */ + IV_COLOR_FORMAT_T e_inp_color_fmt; + + /** Flag to enable/disable - To be used only for debugging/testing */ + UWORD32 u4_enable_recon; + + /** Recon color format */ + IV_COLOR_FORMAT_T e_recon_color_fmt; + + /** Encoder Speed preset - Value between 0 (slowest) and 100 (fastest) */ + IVE_SPEED_CONFIG u4_enc_speed_preset; + + /** Rate control mode */ + IVE_RC_MODE_T e_rc_mode; + + /** Maximum frame rate to be supported */ + UWORD32 u4_max_framerate; + + /** Maximum bitrate to be supported */ + UWORD32 au4_max_bitrate[MAX_NUM_SPATIAL_LAYERS]; + + /** Maximum number of consecutive B frames */ + UWORD32 u4_num_bframes; + + /** Content type Interlaced/Progressive */ + IV_CONTENT_TYPE_T e_content_type; + + /** Maximum search range to be used in X direction */ + UWORD32 u4_max_srch_rng_x; + + /** Maximum search range to be used in Y direction */ + UWORD32 u4_max_srch_rng_y; + + /** Slice Mode */ + IVE_SLICE_MODE_T e_slice_mode; + + /** Slice parameter */ + UWORD32 u4_slice_param; + + /** Processor architecture */ + IV_ARCH_T e_arch; + + /** SOC details */ + IV_SOC_T e_soc; + + /** Input width to be sent in bitstream */ + UWORD32 u4_disp_wd; + + /** Input height to be sent in bitstream */ + UWORD32 u4_disp_ht; + + /** Input width */ + UWORD32 u4_wd; + + /** Input height */ + UWORD32 u4_ht; + + /** Input stride */ + UWORD32 u4_strd; + + /** Source frame rate */ + UWORD32 u4_src_frame_rate; + + /** Target frame rate */ + UWORD32 u4_tgt_frame_rate; + + /** Target bitrate in kilobits per second */ + UWORD32 au4_target_bitrate[MAX_NUM_SPATIAL_LAYERS]; + + /** Force current frame type */ + IV_PICTURE_CODING_TYPE_T e_frame_type; + + /** Encoder mode */ + IVE_ENC_MODE_T e_enc_mode; + + /** Set initial Qp for I pictures */ + UWORD32 au4_i_qp[MAX_NUM_SPATIAL_LAYERS]; + + /** Set initial Qp for P pictures */ + UWORD32 au4_p_qp[MAX_NUM_SPATIAL_LAYERS]; + + /** Set initial Qp for B pictures */ + UWORD32 au4_b_qp[MAX_NUM_SPATIAL_LAYERS]; + + /** Set minimum Qp for I pictures */ + UWORD32 au4_i_qp_min[MAX_NUM_SPATIAL_LAYERS]; + + /** Set maximum Qp for I pictures */ + UWORD32 au4_i_qp_max[MAX_NUM_SPATIAL_LAYERS]; + + /** Set minimum Qp for P pictures */ + UWORD32 au4_p_qp_min[MAX_NUM_SPATIAL_LAYERS]; + + /** Set maximum Qp for P pictures */ + UWORD32 au4_p_qp_max[MAX_NUM_SPATIAL_LAYERS]; + + /** Set minimum Qp for B pictures */ + UWORD32 au4_b_qp_min[MAX_NUM_SPATIAL_LAYERS]; + + /** Set maximum Qp for B pictures */ + UWORD32 au4_b_qp_max[MAX_NUM_SPATIAL_LAYERS]; + + /** Adaptive intra refresh mode */ + IVE_AIR_MODE_T e_air_mode; + + /** Adaptive intra refresh period in frames */ + UWORD32 u4_air_refresh_period; + + /** VBV buffer delay */ + UWORD32 au4_vbv_buffer_delay[MAX_NUM_SPATIAL_LAYERS]; + + /** Number of cores to be used */ + UWORD32 u4_num_cores; + + /** ME speed preset - Value between 0 (slowest) and 100 (fastest) */ + UWORD32 u4_me_speed_preset; + + /** Flag to enable/disable half pel motion estimation */ + UWORD32 u4_enable_hpel; + + /** Flag to enable/disable quarter pel motion estimation */ + UWORD32 u4_enable_qpel; + + /** Flag to enable/disable intra 4x4 analysis */ + UWORD32 u4_enable_intra_4x4; + + /** Flag to enable/disable intra 8x8 analysis */ + UWORD32 u4_enable_intra_8x8; + + /** Flag to enable/disable intra 16x16 analysis */ + UWORD32 u4_enable_intra_16x16; + + /** Flag to enable/disable fast SAD approximation */ + UWORD32 u4_enable_fast_sad; + + /*flag to enable/disable alternate reference frames */ + UWORD32 u4_enable_alt_ref; + + /*Flag to enable/disable computation of SATDQ in ME*/ + UWORD32 u4_enable_satqd; + + /*Minimum SAD to search for*/ + WORD32 i4_min_sad; + + /** Maximum search range in X direction for farthest reference */ + UWORD32 u4_srch_rng_x; + + /** Maximum search range in Y direction for farthest reference */ + UWORD32 u4_srch_rng_y; + + /** I frame interval */ + UWORD32 u4_i_frm_interval; + + /** IDR frame interval */ + UWORD32 u4_idr_frm_interval; + + /** Disable deblock level (0: Enable completely, 3: Disable completely */ + UWORD32 u4_disable_deblock_level; + + /** Profile */ + IV_PROFILE_T e_profile; + + /** Lower 32bits of time stamp corresponding to input buffer, + * from which this command takes effect */ + UWORD32 u4_timestamp_low; + + /** Upper 32bits of time stamp corresponding to input buffer, + * from which this command takes effect */ + UWORD32 u4_timestamp_high; + + /** Flag to say if the current config parameter set is valid + * Will be zero to start with and will be set to 1, when configured + * Once encoder uses the parameter set, this will be set to zero */ + UWORD32 u4_is_valid; + + /** Command associated with this config param set */ + ISVCE_CONTROL_API_COMMAND_TYPE_T e_cmd; + + /** Input width in mbs */ + UWORD32 i4_wd_mbs; + + /** Input height in mbs */ + UWORD32 i4_ht_mbs; + + /** entropy coding mode flag */ + UWORD32 u4_entropy_coding_mode; + + /** enable weighted prediction */ + UWORD32 u4_weighted_prediction; + + /** Pic info type */ + UWORD32 u4_pic_info_type; + /** + * MB info type + */ + UWORD32 u4_isvce_mb_info_type; + + /** VUI structure */ + vui_t s_vui; + + /** SEI structure */ + sei_params_t s_sei; + + /** Flag to enable/disable VUI from header */ + UWORD32 u4_disable_vui; + + /** SVC params */ + svc_params_t s_svc_params; + + bool b_nalu_info_export_enable; + +} isvce_cfg_params_t; + +typedef struct mb_qp_ctxt_t +{ + UWORD8 u1_cur_mb_qp; + +} mb_qp_ctxt_t; + +typedef struct isvce_entropy_ctxt_t +{ + /** + * Pointer to the cabac context + */ + isvce_cabac_ctxt_t *ps_cabac; + + mb_qp_ctxt_t *ps_mb_qp_ctxt; + + /** + * start of frame / start of slice flag + */ + WORD32 i4_sof; + + /** + * end of frame / end of slice flag + */ + WORD32 i4_eof; + + /** + * generate header upon request + */ + WORD32 i4_gen_header; + + WORD32 i4_gen_subset_sps; + + /** + * Pointer to base of sequence parameter set structure array + */ + sps_t *ps_sps_base; + + /** + * Pointer to base of Picture parameter set structure array + */ + pps_t *ps_pps_base; + + /** + * Current slice idx + */ + WORD32 i4_cur_slice_idx; + + /** + * Points to the array of slice indices which is used to identify the + * independent slice to which each MB in a frame belongs. + */ + UWORD8 *pu1_slice_idx; + + /** + * Pointer to base of svc_nalu_ext structure array + */ + svc_nalu_ext_t *ps_svc_nalu_ext_base; + + /** + * Pointer to base of subset sequence parameter set structure array + */ + subset_sps_t *ps_subset_sps_base; + + /** + * Pointer to base of slice header structure array + */ + slice_header_t *ps_slice_hdr_base; + + /** + * Pointer to base of SVC slice header structure array + */ + svc_slice_header_t *ps_svc_slice_hdr_base; + + /** + * entropy status + */ + UWORD8 *pu1_entropy_map; + + /** + * MB's x position within a picture in raster scan in MB units + */ + WORD32 i4_mb_x; + + /** + * MB's y position within a picture in raster scan in MB units + */ + WORD32 i4_mb_y; + + /** + * MB start address + */ + WORD32 i4_mb_cnt; + + /** + * MB start address + */ + WORD32 i4_mb_start_add; + + /** + * MB end address + */ + WORD32 i4_mb_end_add; + + /** + * Input width in mbs + */ + WORD32 i4_wd_mbs; + + /** + * Input height in mbs + */ + WORD32 i4_ht_mbs; + + /** + * Bitstream structure + */ + bitstrm_t *ps_bitstrm; + +#if ENABLE_RE_ENC_AS_SKIP + bitstrm_t *ps_bitstrm_after_slice_hdr; +#endif + + /** + * transform_8x8_mode_flag + */ + WORD8 i1_transform_8x8_mode_flag; + + /** + * entropy_coding_mode_flag + */ + WORD8 u1_entropy_coding_mode_flag; + + /** + * Pointer to the top row nnz for luma + */ + UWORD8 (*pu1_top_nnz_luma)[4]; + + /** + * left nnz for luma + */ + UWORD32 u4_left_nnz_luma; + + /** + * Pointer to zero runs before for the mb + */ + UWORD8 au1_zero_run[16]; + + /** + * Pointer to the top row nnz for chroma + */ + UWORD8 (*pu1_top_nnz_cbcr)[4]; + + /** + * left nnz for chroma + */ + UWORD8 u4_left_nnz_cbcr; + + /** + * Pointer frame level mb subblock coeff data + */ + void *pv_pic_mb_coeff_data; + + /** + * Pointer to mb subblock coeff data and number of subblocks and scan idx + * Incremented each time a coded subblock is processed + */ + void *pv_mb_coeff_data; + + /** + * Pointer frame level mb header data + */ + void *pv_pic_mb_header_data; + + /** + * Pointer to mb header data and + * incremented each time a coded mb is encoded + */ + void *pv_mb_header_data; + + /** + * Error code during parse stage + */ + IH264E_ERROR_T i4_error_code; + + /** + * Void pointer to job context + */ + void *pv_proc_jobq, *pv_entropy_jobq; + + /** + * Flag to signal end of frame + */ + WORD32 i4_end_of_frame; + + /** + * Abs POC count of the frame + */ + WORD32 i4_abs_pic_order_cnt; + + /** + * mb skip run + */ + WORD32 *pi4_mb_skip_run; + + /** + * Flag to signal end of sequence + */ + UWORD32 u4_is_last; + + /** + * Lower 32bits of time-stamp corresponding to the buffer being encoded + */ + UWORD32 u4_timestamp_low; + + /** + * Upper 32bits of time-stamp corresponding to the buffer being encoded + */ + UWORD32 u4_timestamp_high; + + /** + * Current Picture count - used for synchronization + */ + WORD32 i4_pic_cnt; + + /** + * Number of bits consumed by header for I and P mb types + */ + UWORD32 u4_header_bits[MAX_MB_TYPE]; + + /** + * Number of bits consumed by residue for I and P mb types + */ + UWORD32 u4_residue_bits[MAX_MB_TYPE]; + + UWORD8 u1_spatial_layer_id; + +} isvce_entropy_ctxt_t; + +/** + ****************************************************************************** + * @brief Rate control related variables + ****************************************************************************** + */ +typedef struct isvce_rate_control_ctxt_t +{ + void *apps_rate_control_api[MAX_NUM_SPATIAL_LAYERS]; + + void *pps_frame_time; + + void *pps_time_stamp; + + void *pps_pd_frm_rate; + + /** + * frame rate pull down + */ + WORD32 pre_encode_skip[MAX_CTXT_SETS]; + + /** + * skip frame (cbr) + */ + WORD32 post_encode_skip[MAX_CTXT_SETS]; + + /** + * rate control type + */ + rc_type_e e_rc_type; + + /** + * pic type + */ + picture_type_e e_pic_type; + + /** + * rc utils context + */ + svc_rc_utils_ctxt_t s_rc_utils; + + /** + * intra cnt in previous frame + */ + WORD32 ai4_num_intra_in_prev_frame[MAX_NUM_SPATIAL_LAYERS]; + + /** + * avg activity of prev frame + */ + WORD32 ai4_avg_activity[MAX_NUM_SPATIAL_LAYERS]; + +} isvce_rate_control_ctxt_t; + +typedef struct +{ + /** + * mb type and mode + */ + UWORD8 u1_mb_type_mode; + + /** + * CBP + */ + UWORD8 u1_cbp; + + /** + * MB qp delta + */ + UWORD8 u1_mb_qp; + + /** + * Element to align structure to 2 byte boundary + */ + UWORD8 u1_pad; + + UWORD8 u1_base_mode_flag; + + UWORD8 u1_residual_prediction_flag; + +} isvce_mb_hdr_common_t; + +/** +****************************************************************************** +* @brief macro block info for I4x4 MB +****************************************************************************** +*/ +typedef struct +{ + /** + * Common MB header params + */ + isvce_mb_hdr_common_t common; + + /** + * Sub block modes, 2 modes per byte + */ + UWORD8 au1_sub_blk_modes[8]; +} isvce_mb_hdr_i4x4_t; + +/** +****************************************************************************** +* @brief macro block info for I8x8 MB +****************************************************************************** +*/ +typedef struct +{ + /** + * Common MB header params + */ + isvce_mb_hdr_common_t common; + + /** + * Sub block modes, 2 modes per byte + */ + UWORD8 au1_sub_blk_modes[2]; +} isvce_mb_hdr_i8x8_t; + +/** +****************************************************************************** +* @brief macro block info for I16x16 MB +****************************************************************************** +*/ +typedef struct +{ + /** + * Common MB header params + */ + isvce_mb_hdr_common_t common; + +} isvce_mb_hdr_i16x16_t; + +/** +****************************************************************************** +* @brief macro block info for P16x16 MB +****************************************************************************** +*/ +typedef struct +{ + /** + * Common MB header params + */ + isvce_mb_hdr_common_t common; + + /** + * MV + */ + WORD16 ai2_mvd[2]; + + UWORD8 u1_mvp_idx; +} isvce_mb_hdr_p16x16_t; + +/** +****************************************************************************** +* @brief macro block info for PSKIP MB +****************************************************************************** +*/ +typedef struct +{ + /** + * Common MB header params + */ + isvce_mb_hdr_common_t common; + +} isvce_mb_hdr_pskip_t; + +/** +****************************************************************************** +* @brief macro block info for B16x16 MB +****************************************************************************** +*/ +typedef struct +{ + /** + * Common MB header params + */ + isvce_mb_hdr_common_t common; + + /** + * MV + */ + WORD16 ai2_mvd[NUM_PRED_DIRS][2]; + + UWORD8 au1_mvp_idx[NUM_PRED_DIRS]; +} isvce_mb_hdr_b16x16_t; + +/** +****************************************************************************** +* @brief macro block info for BDIRECT MB +****************************************************************************** +*/ +typedef struct +{ + /** + * Common MB header params + */ + isvce_mb_hdr_common_t common; + +} isvce_mb_hdr_bdirect_t; + +/** +****************************************************************************** +* @brief macro block info for PSKIP MB +****************************************************************************** +*/ +typedef struct +{ + /** + * Common MB header params + */ + isvce_mb_hdr_common_t common; + +} isvce_mb_hdr_bskip_t; + +/** +****************************************************************************** +* @brief macro block info for IBL MB +****************************************************************************** +*/ +typedef struct isvce_mb_hdr_base_mode_t +{ + /** + * Common MB header params + */ + isvce_mb_hdr_common_t common; + +} isvce_mb_hdr_base_mode_t; + +/** +****************************************************************************** +* @brief Union of mb_hdr structures for size calculation +* and to access first few common elements +****************************************************************************** +*/ + +typedef union isvce_mb_hdr_t +{ + isvce_mb_hdr_i4x4_t mb_hdr_i4x4; + isvce_mb_hdr_i8x8_t mb_hdr_i8x8; + isvce_mb_hdr_i16x16_t mb_hdr_i16x16; + isvce_mb_hdr_p16x16_t mb_hdr_p16x16; + isvce_mb_hdr_pskip_t mb_hdr_pskip; + isvce_mb_hdr_b16x16_t mb_hdr_b16x16; + isvce_mb_hdr_bdirect_t mb_hdr_bdirect; + isvce_mb_hdr_bskip_t mb_hdr_bskip; + isvce_mb_hdr_base_mode_t mb_hdr_base_mode; +} isvce_mb_hdr_t; + +typedef struct isvce_bs_ctxt_t +{ + /** + * MB's x position within a picture in raster scan in MB units + */ + WORD32 i4_mb_x; + + /** + * MB's y position within a picture in raster scan in MB units + */ + WORD32 i4_mb_y; + + /** + * MB's x position within a Slice in raster scan in MB units + */ + WORD32 i4_mb_slice_x; + + /** + * MB's y position within a Slice in raster scan in MB units + */ + WORD32 i4_mb_slice_y; + + /** + * Vertical strength, Two bits per edge. + * Stored in format. BS[15] | BS[14] | .. |BS[0] + */ + UWORD32 *pu4_pic_vert_bs; + + UWORD32 *pu4_intra_base_vert_bs; + + /** + * Boundary strength, Two bits per edge. + * Stored in format. BS[15] | BS[14] | .. |BS[0] + */ + UWORD32 *pu4_pic_horz_bs; + + UWORD32 *pu4_intra_base_horz_bs; + + /** + * Qp array stored for each mb + */ + UWORD8 *pu1_pic_qp; + +} isvce_bs_ctxt_t; + +typedef struct isvce_deblk_ctxt_t +{ + /** + * MB's x position within a picture in raster scan in MB units + */ + WORD32 i4_mb_x; + + /** + * MB's y position within a picture in raster scan in MB units + */ + WORD32 i4_mb_y; + + /** + * structure that contains BS and QP frame level arrays + */ + isvce_bs_ctxt_t s_bs_ctxt; + + /* + * Recon Buffers + */ + yuv_buf_props_t s_rec_pic_buf_props; + + /** + * Points to the array of slice indices which is used to identify the slice + * to which each MB in a frame belongs. + */ + UWORD8 *pu1_slice_idx; + +} isvce_deblk_ctxt_t; + +/** +************************************************************************** +* @brief isvce_me_ctxt_t +* +* Structure encapsulating the parameters used in the motion estimation +* context +************************************************************************** +*/ +typedef struct isvce_me_ctxt_t +{ + /** + * Ref pointer to current MB luma for each ref list + */ + UWORD8 *apu1_ref_buf_luma[MAX_NUM_REFLIST]; + + /** + * Src pointer to current MB luma + */ + UWORD8 *pu1_src_buf_luma; + + /** + * source stride + * (strides for luma and chroma are the same) + */ + WORD32 i4_src_strd; + + /** + * recon stride + * (strides for luma and chroma are the same) + */ + WORD32 ai4_rec_strd[MAX_NUM_REFLIST]; + + /** + * Offset for half pel x plane from the pic buf + */ + UWORD32 u4_half_x_offset; + + /** + * Offset for half pel y plane from half x plane + */ + UWORD32 u4_half_y_offset; + + /** + * Offset for half pel xy plane from half y plane + */ + UWORD32 u4_half_xy_offset; + + /** + * Search range in the X, Y axis in terms of pixels + */ + WORD32 ai2_srch_boundaries[2]; + + /** + * Search range in the north direction in terms of pixels + */ + WORD32 i4_srch_range_n; + + /** + * Search range in the south direction in terms of pixels + */ + WORD32 i4_srch_range_s; + + /** + * Search range in the east direction in terms of pixels + */ + WORD32 i4_srch_range_e; + + /** + * Search range in the west direction in terms of pixels + */ + WORD32 i4_srch_range_w; + + /** + * left mb motion vector + */ + ime_mv_t s_left_mv; + + /** + * top left mb motion vector + */ + ime_mv_t s_top_left_mv; + + /* + * ilp MVs for ME candidates * + */ + ilp_me_cands_t *ps_ilp_me_cands; + + /** + * Number of valid candidates for the Initial search position + */ + UWORD32 u4_num_candidates[MAX_NUM_REFLIST + 1]; + + /** + * Motion vector predictors derived from neighboring + * blocks for each of the six block partitions + */ + ime_mv_t as_mv_init_search[MAX_NUM_REFLIST + 1][MAX_FPEL_SEARCH_CANDIDATES]; + + /** + * mv bits + */ + UWORD8 *pu1_mv_bits; + + /** + * lambda (lagrange multiplier for cost computation) + */ + UWORD32 u4_lambda_motion; + + /** + * enabled fast sad computation + */ + UWORD32 u4_enable_fast_sad; + + /* + * Enable SKIP block prediction based on SATQD + */ + UWORD32 u4_enable_stat_sad; + + /* + * Minimum distortion to search for + * */ + WORD32 i4_min_sad; + + /* + * Signal that minimum sad has been reached in ME + * */ + UWORD32 u4_min_sad_reached; + + /** + * Flag to enable/disbale half pel motion estimation + */ + UWORD32 u4_enable_hpel; + + /** + * Diamond search Iteration Max Cnt + */ + UWORD32 u4_num_layers; + + /** + * encoder me speed + */ + UWORD32 u4_me_speed_preset; + + UWORD32 u4_left_is_intra; + + UWORD32 u4_left_is_skip; + + /* skip_type can be PREDL0, PREDL1 or BIPRED */ + WORD32 i4_skip_type; + + /* Biasing given for skip prediction */ + WORD32 i4_skip_bias[2]; + + /** + * Structure to store the MB partition info + * We need 1(L0)+1(L1)+1(bi) + */ + mb_part_ctxt as_mb_part[MAX_NUM_REFLIST + 1]; + /* + * Threshold to compare the sad with + */ + UWORD16 *pu2_sad_thrsh; + + /** + * fn ptrs for compute sad routines + */ + ime_compute_sad_ft *pf_ime_compute_sad_16x16[2]; + ime_compute_sad_ft *pf_ime_compute_sad_16x8; + ime_compute_sad4_diamond *pf_ime_compute_sad4_diamond; + ime_compute_sad3_diamond *pf_ime_compute_sad3_diamond; + ime_compute_sad2_diamond *pf_ime_compute_sad2_diamond; + ime_sub_pel_compute_sad_16x16_ft *pf_ime_sub_pel_compute_sad_16x16; + + /* + * Function poitners for SATQD + */ + ime_compute_sad_stat *pf_ime_compute_sad_stat_luma_16x16; + + /** + * Qp + */ + UWORD8 u1_mb_qp; + + /* + * Buffers for holding subpel and bipred temp buffers + */ + UWORD8 *apu1_subpel_buffs[SUBPEL_BUFF_CNT]; + + WORD32 u4_subpel_buf_strd; + + /* + * Buffers to store the best halfpel plane* + */ + UWORD8 *pu1_hpel_buf; + +} isvce_me_ctxt_t; + +typedef struct isvce_mb_info_nmb_t +{ + UWORD32 u4_mb_type; + UWORD32 u4_min_sad; + UWORD32 u4_min_sad_reached; + WORD32 i4_mb_cost; + WORD32 i4_mb_distortion; + + isvce_enc_pu_mv_t as_skip_mv[4]; + + isvce_enc_pu_mv_t as_pred_mv[2]; + + block_neighbors_t s_ngbr_avbl; + + /* + * Buffer to hold best subpel buffer in each MB of NMB + */ + UWORD8 *pu1_best_sub_pel_buf; + + /* + * Stride for subpel buffer + */ + UWORD32 u4_bst_spel_buf_strd; + +} isvce_mb_info_nmb_t; + +typedef struct isvce_process_ctxt_t +{ + svc_params_t s_svc_params; + + /* Resolves circular dependency with svc_ilp_mv_ctxt_t */ + void *ps_svc_ilp_mv_ctxt; + + /* Resolves circular dependency with svc_res_pred_ctxt_t */ + void *ps_res_pred_ctxt; + + /* Resolves circular dependency with svc_intra_pred_ctxt_t */ + void *ps_intra_pred_ctxt; + + /* Resolves circular dependency with svc_sub_pic_rc_ctxt_t */ + void *ps_sub_pic_rc_ctxt; + + yuv_buf_props_t *ps_mb_pred_buf; + + yuv_buf_props_t *ps_mb_res_buf; + + ilp_mv_t *ps_ilp_mv; + + /** + * entropy context + */ + isvce_entropy_ctxt_t s_entropy; + + /** + * me context + */ + isvce_me_ctxt_t s_me_ctxt; + + /* Resolves circular dependency with isvce_codec_t */ + void *ps_codec; + + /** + * N mb process contest + */ + n_mb_process_ctxt_t s_n_mb_ctxt; + + /* + * Src Buffers + */ + yuv_buf_props_t s_src_buf_props; + + /* + * Recon Buffers + */ + yuv_buf_props_t s_rec_buf_props; + + /* + * Reference Frame Buffers + */ + yuv_buf_props_t as_ref_buf_props[MAX_REF_PIC_CNT]; + + /* + * Src Buffers + */ + yuv_buf_props_t s_src_pic_buf_props; + + /* + * Recon Buffers + */ + yuv_buf_props_t s_rec_pic_buf_props; + + /* + * Reference Frame Buffers + */ + yuv_buf_props_t as_ref_pic_buf_props[MAX_REF_PIC_CNT]; + + /** + * Pointer to ME NMB info + */ + isvce_mb_info_nmb_t *ps_nmb_info; + + isvce_mb_info_nmb_t *ps_cur_mb; + + /** + * Offset for half pel x plane from the pic buf + */ + UWORD32 u4_half_x_offset; + + /** + * Offset for half pel y plane from half x plane + */ + UWORD32 u4_half_y_offset; + + /** + * Offset for half pel xy plane from half y plane + */ + UWORD32 u4_half_xy_offset; + + /** + * pred buffer pointer (temp buffer 1) + */ + UWORD8 *pu1_pred_mb; + + /** + * pred buffer pointer (prediction buffer for intra 16x16 + */ + UWORD8 *pu1_pred_mb_intra_16x16; + + /** + * pred buffer pointer (prediction buffer for intra 16x16_plane + */ + UWORD8 *pu1_pred_mb_intra_16x16_plane; + + /** + * pred buffer pointer (prediction buffer for intra chroma + */ + UWORD8 *pu1_pred_mb_intra_chroma; + + /** + * pred buffer pointer (prediction buffer for intra chroma plane + */ + UWORD8 *pu1_pred_mb_intra_chroma_plane; + + /** + * temp. reference buffer ptr for intra 4x4 when rdopt is on + */ + UWORD8 *pu1_ref_mb_intra_4x4; + + /** + * prediction buffer stride + */ + WORD32 i4_pred_strd; + + /** + * transform buffer pointer (temp buffer 2) + */ + WORD16 *pi2_res_buf; + + /** + * temp. transform buffer ptr for intra 4x4 when rdopt is on + */ + WORD16 *pi2_res_buf_intra_4x4; + + /** + * transform buffer stride + */ + WORD32 i4_res_strd; + + /** + * scratch buffer for inverse transform (temp buffer 3) + */ + void *pv_scratch_buff; + + /** + * frame num + */ + WORD32 i4_frame_num; + + /** + * start address of frame / sub-frame + */ + WORD32 i4_frame_strt_add; + + /** + * IDR pic + */ + UWORD32 u4_is_idr; + + /** + * idr_pic_id + */ + UWORD32 u4_idr_pic_id; + + /** + * Input width in mbs + */ + WORD32 i4_wd_mbs; + + /** + * Input height in mbs + */ + WORD32 i4_ht_mbs; + + /** + * slice_type + */ + WORD32 i4_slice_type; + + /** + * Current slice idx + */ + WORD32 i4_cur_slice_idx; + + /** + * MB's x position within a picture in raster scan in MB units + */ + WORD32 i4_mb_x; + + /** + * MB's y position within a picture in raster scan in MB units + */ + WORD32 i4_mb_y; + + /** + * MB's x position within a Slice in raster scan in MB units + */ + WORD32 i4_mb_slice_x; + + /** + * MB's y position within a Slice in raster scan in MB units + */ + WORD32 i4_mb_slice_y; + + /** + * mb neighbor availability pointer + */ + block_neighbors_t *ps_ngbr_avbl; + + /** + * lambda (lagrange multiplier for cost computation) + */ + UWORD32 u4_lambda; + + /** + * mb distortion + */ + WORD32 i4_mb_distortion; + + /** + * mb cost + */ + WORD32 i4_mb_cost; + + /********************************************************************/ + /* i4_ngbr_avbl_mb_16 - ngbr avbl of curr mb */ + /* i4_ngbr_avbl_sb_8 - ngbr avbl of all 8x8 sub blocks of curr mb */ + /* i4_ngbr_avbl_sb_4 - ngbr avbl of all 4x4 sub blocks of curr mb */ + /* i4_ngbr_avbl_mb_c - chroma ngbr avbl of curr mb */ + /********************************************************************/ + WORD32 i4_ngbr_avbl_16x16_mb; + WORD32 ai4_neighbor_avail_8x8_subblks[4]; + UWORD8 au1_ngbr_avbl_4x4_subblks[16]; + WORD32 i4_chroma_neighbor_avail_8x8_mb; + + /** + * array to store the mode of mb sub blocks + */ + UWORD8 au1_intra_luma_mb_4x4_modes[16]; + + /** + * array to store the predicted mode of mb sub blks + */ + UWORD8 au1_predicted_intra_luma_mb_4x4_modes[16]; + + /** + * macro block intra 16x16 mode + */ + UWORD8 u1_l_i16_mode; + + /** + * array to store the mode of the macro block intra 8x8 4 modes + */ + UWORD8 au1_intra_luma_mb_8x8_modes[4]; + + /** + * intra chroma mb mode + */ + UWORD8 u1_c_i8_mode; + + /********************************************************************/ + /* array to store pixels from the neighborhood for intra prediction */ + /* i16 - 16 left pels + 1 top left pel + 16 top pels = 33 pels */ + /* i8 - 8 lpels + 1 tlpels + 8 tpels + 8 tr pels = 25 pels */ + /* i4 - 4 lpels + 1 tlpels + 4 tpels + 4 tr pels = 13 pels */ + /* ic - 8 left pels + 1 top left pel + 8 top pels )*2 */ + /********************************************************************/ + UWORD8 au1_ngbr_pels[34]; + + /** + * array for 8x8 intra pels filtering (temp buff 4) + */ + UWORD8 au1_neighbor_pels_i8x8_unfiltered[25]; + + /** + * Number of sub partitons in the inter pred MB + */ + UWORD32 u4_num_sub_partitions; + + /** + * Pointer to hold num PUs each MB in a picture + */ + UWORD32 *pu4_mb_pu_cnt; + + /** + * Pointer to the array of structures having motion vectors, size + * and position of sub partitions + */ + isvce_mb_info_t *ps_mb_info; + + /** + * Pointer to the pu of current co-located MB in list 1 + */ + isvce_mb_info_t *ps_col_mb; + + /** + * predicted motion vector + */ + isvce_enc_pu_mv_t *ps_skip_mv; + + /** + * predicted motion vector + */ + isvce_enc_pu_mv_t *ps_pred_mv; + + /** + * top row mb syntax information base + * In normal working scenarios, for a given context set, + * the mb syntax info pointer is identical across all process threads. + * But when the hard bound on slices are enabled, in multi core, frame + * is partitioned in to sections equal to set number of cores and each + * partition is run independently. In this scenario, a ctxt set will alone + * appear to run multiple frames at a time. For this to occur, the common + * pointers across the proc ctxt should disappear. + * + * This is done by allocating MAX_PROCESS_THREADS memory and distributing + * across individual ctxts when byte bnd per slice is enabled. + */ + svc_nbr_info_t s_nbr_info_base; + + nbr_info_t s_nbr_info; + + /** + * mb neighbor availability pointer + */ + block_neighbors_t s_ngbr_avbl; + + /** + * coded block pattern + */ + UWORD32 u4_cbp; + + /** + * number of non zero coeffs + */ + UWORD32 au4_nnz[5]; + + UWORD8 au1_chroma_nnz[2 * (NUM_4x4_IN_8x8 + 1)]; + + /** + * number of non zero coeffs for intra 4x4 when rdopt is on + */ + UWORD32 au4_nnz_intra_4x4[4]; + + /** + * frame qp & mb qp + */ + UWORD8 u1_frame_qp; + + UWORD8 u1_mb_qp; + + /** + * quantization parameters for luma & chroma planes + */ + quant_params_t *ps_qp_params[3]; + + /** + * Pointer frame level mb subblock coeff data + */ + void *pv_pic_mb_coeff_data; + + /** + * Pointer to mb subblock coeff data and number of subblocks and scan idx + * Incremented each time a coded subblock is processed + */ + void *pv_mb_coeff_data; + + /** + * Pointer frame level mb header data + */ + void *pv_pic_mb_header_data; + + /** + * Pointer to mb header data and + * incremented each time a coded mb is encoded + */ + void *pv_mb_header_data; + + /** + * Signal that pic_init is called first time + */ + WORD32 i4_first_pic_init; + + /** + * Current MV Bank's buffer ID + */ + WORD32 i4_cur_mv_bank_buf_id; + + /** + * Void pointer to job context + */ + void *pv_proc_jobq, *pv_entropy_jobq; + + /** + * Number of MBs to be processed in the current Job + */ + WORD32 i4_mb_cnt; + + /** + * ID for the current context - Used for debugging + */ + WORD32 i4_id; + + /** + * Pointer to current picture buffer structure + */ + svc_au_buf_t *ps_cur_pic; + + /** + * Pointer to current picture's mv buffer structure + */ + svc_au_data_t *ps_cur_mv_buf; + + /** + * Flag to indicate if ps_proc was initialized at least once in a frame. + * This is needed to handle cases where a core starts to handle format + * conversion jobs directly + */ + WORD32 i4_init_done; + + /** + * Process status: one byte per MB + */ + UWORD8 *pu1_proc_map; + + /** + * Deblk status: one byte per MB + */ + UWORD8 *pu1_deblk_map; + + /** + * Process status: one byte per MB + */ + UWORD8 *pu1_me_map; + + /* + * Intra refresh mask. + * Indicates if an Mb is coded in intra mode within the current AIR interval + * NOTE Refreshes after each AIR period + * NOTE The map is shared between process + */ + UWORD8 *pu1_is_intra_coded; + + /** + * Disable deblock level (0: Enable completely, 3: Disable completely + */ + UWORD32 u4_disable_deblock_level; + + /** + * Pointer to the structure that contains deblock context + */ + isvce_deblk_ctxt_t s_deblk_ctxt; + + /** + * Points to the array of slice indices which is used to identify the + * independent slice to which each MB in a frame belongs. + */ + UWORD8 *pu1_slice_idx; + + /** + * Pointer to base of svc_nalu_ext structure array + */ + svc_nalu_ext_t *ps_svc_nalu_ext_base; + + /** + * Pointer to base of subset sequence parameter set structure array + */ + subset_sps_t *ps_subset_sps_base; + + /** + * Pointer to base of slice header structure array + */ + slice_header_t *ps_slice_hdr_base; + + /** + * Pointer to base of SVC slice header structure array + */ + svc_slice_header_t *ps_svc_slice_hdr_base; + + /** + * Number of mb's to process in one loop + */ + WORD32 i4_nmb_ntrpy; + + /** + * Number of mb's to process in one loop + */ + UWORD32 u4_nmb_me; + + /** + * Structure for current input buffer + */ + isvce_inp_buf_t s_inp_buf; + + /** + * api call cnt + */ + WORD32 i4_encode_api_call_cnt; + + /** + * Current Picture count - used for synchronization + */ + WORD32 i4_pic_cnt; + + /** + * Intermediate buffer for interpred leaf level functions + */ + WORD32 ai16_pred1[HP_BUFF_WD * HP_BUFF_HT]; + + /** + * Reference picture for the current picture + * TODO: Only 2 reference assumed currently + */ + svc_au_buf_t *aps_ref_pic[MAX_REF_PIC_CNT]; + + /** + * Reference MV buff for the current picture + */ + svc_au_data_t *aps_mv_buf[MAX_REF_PIC_CNT]; + + /** + * frame info used by RC + */ + frame_info_t s_frame_info; + + /* + * NOTE NOT PERSISTANT INSIDE FUNCTIONS + * Min sad for current MB + * will be populated initially + * Once a sad less than eq to u4_min_sad is reached, the value will be copied + * to the cariable + */ + UWORD32 u4_min_sad; + + /* + * indicates weather we have rached minimum sa or not + */ + UWORD32 u4_min_sad_reached; + + /** + * Current error code + */ + WORD32 i4_error_code; + + /* + * Enables or disables computation of recon + */ + UWORD32 u4_compute_recon; + + /* + * Temporary buffers to be used for subpel computation + */ + UWORD8 *apu1_subpel_buffs[SUBPEL_BUFF_CNT]; + + /* + * Buffer holding best sub pel values + */ + UWORD8 *pu1_best_subpel_buf; + + /* + * Stride for buffer holding best sub pel + */ + UWORD32 u4_bst_spel_buf_strd; + + /* + * SVC spatial layer ID + */ + UWORD8 u1_spatial_layer_id; +} isvce_process_ctxt_t; + +typedef UWORD8 FT_CORE_CODING(isvce_process_ctxt_t *ps_proc); + +typedef WORD32 FT_FIND_SKIP_PARAMS(isvce_process_ctxt_t *, WORD32); + +typedef void FT_ME_ALGORITHM(isvce_process_ctxt_t *); + +typedef struct enc_loop_fxns_t +{ + /** + * luma core coding function pointer + */ + FT_CORE_CODING *apf_luma_energy_compaction[MAX_MBTYPES]; + + /** + * chroma core coding function pointer + */ + FT_CORE_CODING *apf_chroma_energy_compaction[2]; + + /** + * forward transform for intra blk of mb type 16x16 + */ + FT_LUMA_16X16_RESI_TRANS_DCTRANS_QUANT + *pf_resi_trans_dctrans_quant_16x16; + + /** + * inverse transform for intra blk of mb type 16x16 + */ + FT_LUMA_16X16_IDCTRANS_IQUANT_ITRANS_RECON + *pf_idctrans_iquant_itrans_recon_16x16; + + /** + * forward transform for 4x4 blk luma + */ + FT_RESI_TRANS_QUANT *apf_resi_trans_quant_4x4[NUM_RESI_TRANS_QUANT_VARIANTS]; + + /** + * forward transform for 4x4 blk luma + */ + FT_RESI_TRANS_QUANT + *apf_resi_trans_quant_chroma_4x4[NUM_RESI_TRANS_QUANT_VARIANTS]; + + /* + * hadamard transform and quant for a 4x4 block + */ + FT_HADAMARD_QUANT *pf_hadamard_quant_4x4; + + /* + * hadamard transform and quant for a 4x4 block + */ + FT_HADAMARD_QUANT *pf_hadamard_quant_2x2_uv; + + /** + * inverse transform for 4x4 blk + */ + FT_IQ_IT_RECON *apf_iquant_itrans_recon_4x4[NUM_IQ_IT_RECON_VARIANTS]; + + /** + * inverse transform for chroma 4x4 blk + */ + FT_IQ_IT_RECON *apf_iquant_itrans_recon_chroma_4x4[NUM_IQ_IT_RECON_VARIANTS]; + + /** + * inverse transform for 4x4 blk with only single dc coeff + */ + FT_IQ_IT_RECON *apf_iquant_itrans_recon_4x4_dc[NUM_IQ_IT_RECON_VARIANTS]; + + /** + * inverse transform for chroma 4x4 blk with only single dc coeff + */ + FT_IQ_IT_RECON + *apf_iquant_itrans_recon_chroma_4x4_dc[NUM_IQ_IT_RECON_VARIANTS]; + + /* + * Inverse hadamard transform and iquant for a 4x4 block + */ + FT_IHADAMARD_SCALING *pf_ihadamard_scaling_4x4; + + /* + * Inverse hadamard transform and iquant for a 4x4 block + */ + FT_IHADAMARD_SCALING *pf_ihadamard_scaling_2x2_uv; + + /** + * forward transform for 8x8 blk + */ + FT_RESI_TRANS_QUANT *apf_resi_trans_quant_8x8[NUM_RESI_TRANS_QUANT_VARIANTS]; + + /** + * inverse transform for 8x8 blk + */ + FT_IQ_IT_RECON *apf_iquant_itrans_recon_8x8[NUM_IQ_IT_RECON_VARIANTS]; + + FT_IQ_IT_RECON *pf_zcbf_iquant_itrans_recon_4x4; + + FT_IQ_IT_RECON *pf_chroma_zcbf_iquant_itrans_recon_4x4; + +} enc_loop_fxns_t; + +typedef struct inter_pred_fxns_t +{ + FT_INTER_PRED_LUMA *pf_inter_pred_luma_copy; + + FT_INTER_PRED_LUMA *pf_inter_pred_luma_horz; + + FT_INTER_PRED_LUMA *pf_inter_pred_luma_vert; + + FT_INTER_PRED_LUMA_BILINEAR *pf_inter_pred_luma_bilinear; + + FT_INTER_PRED_CHROMA *pf_inter_pred_chroma; +} inter_pred_fxns_t; + +typedef struct mem_fxns_t +{ + FT_MEMCPY *pf_mem_cpy; + + FT_MEMSET *pf_mem_set; + + FT_MEMCPY *pf_mem_cpy_mul8; + + FT_MEMSET *pf_mem_set_mul8; + + FT_COPY_2D *pf_copy_2d; + + FT_MEMSET_2D *pf_memset_2d; + + FT_16BIT_INTERLEAVED_COPY *pf_16bit_interleaved_copy; + + FT_16BIT_INTERLEAVED_MEMSET *pf_16bit_interleaved_memset; + + FT_NONZERO_CHECKER *pf_nonzero_checker; + +} mem_fxns_t; + +typedef struct isa_dependent_fxns_t +{ + enc_loop_fxns_t s_enc_loop_fxns; + + inter_pred_fxns_t s_inter_pred_fxns; + + mem_fxns_t s_mem_fxns; +} isa_dependent_fxns_t; + +/** + * Reference set containing pointers to MV buf and pic buf + */ +typedef struct +{ + /** Picture count */ + WORD32 i4_pic_cnt; + + /** POC */ + WORD32 i4_poc; + + /** picture buffer */ + svc_au_buf_t *ps_pic_buf; + + /** mv buffer */ + svc_au_data_t *ps_svc_au_data; + +} isvce_ref_set_t; + +typedef struct isvce_codec_t +{ + /** + * downscaler context + */ + downscaler_ctxt_t s_scaler; + + svc_ilp_data_t s_svc_ilp_data; + + nalu_descriptors_t as_nalu_descriptors[MAX_NUM_SPATIAL_LAYERS]; + + isa_dependent_fxns_t s_isa_dependent_fxns; + +#if ENABLE_MODE_STAT_VISUALISER + /* Resolves circular dependency with mode_stat_visualiser_t */ + void *ps_mode_stat_visualiser; +#endif + + /** enable constrained intra prediction */ + UWORD32 au4_constrained_intra_pred[MAX_NUM_SPATIAL_LAYERS]; + + /** + * Id of current pic (input order) + */ + WORD32 i4_poc; + + /** + * Number of encode frame API calls made + * This variable must only be used for context selection [Read only] + */ + WORD32 i4_encode_api_call_cnt; + + /** + * Number of pictures encoded + */ + WORD32 i4_pic_cnt; + + /** + * Number of threads created + */ + WORD32 i4_proc_thread_cnt; + + /** + * Mutex used to keep the control calls thread-safe + */ + void *pv_ctl_mutex; + + /** + * Current active config parameters + */ + isvce_cfg_params_t s_cfg; + + /** + * Array containing the config parameter sets + */ + isvce_cfg_params_t as_cfg[MAX_ACTIVE_CONFIG_PARAMS]; + + /** + * Color format used by encoder internally + */ + IV_COLOR_FORMAT_T e_codec_color_format; + + /** + * recon stride + * (strides for luma and chroma are the same) + */ + WORD32 i4_rec_strd; + + /** + * Flag to enable/disable deblocking of a frame + */ + WORD32 u4_disable_deblock_level; + + /** + * Number of continuous frames where deblocking was disabled + */ + WORD32 u4_disable_deblock_level_cnt; + + /** + * frame type + */ + PIC_TYPE_T pic_type; + + /** + * frame qp + */ + UWORD32 au4_frame_qp[MAX_NUM_SPATIAL_LAYERS]; + + /** + * Enable inital QP calculation based on BPP and GPP + */ + UWORD8 u1_enable_init_qp; + + /** + * frame num + */ + WORD32 i4_frame_num; + + /** + * slice_type + */ + WORD32 i4_slice_type; + + /* + * Force current frame to specific type + */ + IV_PICTURE_CODING_TYPE_T force_curr_frame_type; + + /** + * IDR pic + */ + UWORD32 u4_is_idr; + + /** + * idr_pic_id + */ + WORD32 i4_idr_pic_id; + + /** + * Flush mode + */ + WORD32 i4_flush_mode; + + /** + * Encode header mode + */ + WORD32 i4_header_mode; + + /** + * Flag to indicate if header has already + * been generated when i4_api_call_cnt 0 + */ + UWORD32 u4_header_generated; + + /** + * Encode generate header + */ + WORD32 i4_gen_header; + + /** + * To signal successful completion of init + */ + WORD32 i4_init_done; + + /** + * To signal that at least one picture was decoded + */ + WORD32 i4_first_pic_done; + + /** + * Reset flag - Codec is reset if this flag is set + */ + WORD32 i4_reset_flag; + + /** + * Current error code + */ + WORD32 i4_error_code; + + /** + * threshold residue + */ + WORD32 u4_thres_resi; + + /** + * disable intra inter gating + */ + UWORD32 u4_inter_gate; + + /** + * Holds mem records passed during init. + * This will be used to return the mem records during retrieve call + */ + iv_mem_rec_t *ps_mem_rec_backup; + + /** + * Flag to determine if the entropy thread is active + */ + volatile UWORD32 au4_entropy_thread_active[MAX_CTXT_SETS]; + + /** + * Mutex used to keep the entropy calls thread-safe + */ + void *pv_entropy_mutex; + + /** + * Job queue buffer base + */ + void *pv_proc_jobq_buf, *pv_entropy_jobq_buf; + + /** + * Job Queue mem tab size + */ + WORD32 i4_proc_jobq_buf_size, i4_entropy_jobq_buf_size; + + /** + * Memory for svc_au_data buffer manager + */ + void *pv_svc_au_data_store_mgr_base; + + /** + * svc_au_data buffer manager + */ + void *pv_svc_au_data_store_mgr; + + /** + * Pointer to svc_au_data structure array + */ + svc_au_data_t *ps_svc_au_data; + + /** + * Base address for svc_au_data + */ + svc_au_data_t *ps_svc_au_data_base; + + /** + * svc_au_data size + */ + WORD32 i4_svc_au_data_size; + + /** + * Memory for Picture buffer manager for reference pictures + */ + void *pv_ref_buf_mgr_base; + + /** + * Picture buffer manager for reference pictures + */ + void *pv_ref_buf_mgr; + + /** + * Number of reference buffers added to the buffer manager + */ + WORD32 i4_ref_buf_cnt; + + /** + * Pointer to Pic Buf structure array + */ + svc_au_buf_t *ps_pic_buf; + + /** + * Base address for Picture buffer + */ + svc_au_buf_t *ps_pic_buf_base; + + /** + * Total pic buffer size allocated + */ + WORD32 i4_total_pic_buf_size; + + /** + * Memory for Buffer manager for output buffers + */ + void *pv_out_buf_mgr_base; + + /** + * Buffer manager for output buffers + */ + void *pv_out_buf_mgr; + + /** + * Current output buffer's buffer ID + */ + WORD32 i4_out_buf_id; + + /** + * Number of output buffers added to the buffer manager + */ + WORD32 i4_out_buf_cnt; + + /** + * Memory for Picture buffer manager for input buffers + */ + void *pv_inp_buf_mgr_base; + + /** + * Picture buffer manager for input buffers + */ + void *pv_inp_buf_mgr; + + /** + * Current input buffer's buffer ID + */ + WORD32 i4_inp_buf_id; + + /** + * Number of input buffers added to the buffer manager + */ + WORD32 i4_inp_buf_cnt; + + /** + * Pointer to dpb manager structure + */ + void *pv_dpb_mgr; + + /** + * Pointer to base of Sequence parameter set structure array + */ + sps_t *ps_sps_base; + + /** + * Pointer to base of Picture parameter set structure array + */ + pps_t *ps_pps_base; + + /** + * Pointer to base of svc_nalu_ext structure array + */ + svc_nalu_ext_t *ps_svc_nalu_ext_base; + + /** + * Pointer to base of subset sequence parameter set structure array + */ + subset_sps_t *ps_subset_sps_base; + + /** + * Pointer to base of slice header structure array + */ + slice_header_t *ps_slice_hdr_base; + + /** + * Pointer to base of SVC slice header structure array + */ + svc_slice_header_t *ps_svc_slice_hdr_base; + + /** + * packed residue coeff data size for 1 row of mbs + */ + UWORD32 u4_size_coeff_data; + + /** + * packed header data size for 1 row of mbs + */ + UWORD32 u4_size_header_data; + + /** + * Processing context - One for each processing thread + * Create two sets, each set used for alternate frames + */ + isvce_process_ctxt_t as_process[MAX_PROCESS_CTXT]; + + /** + * Thread handle for each of the processing threads + */ + void *apv_proc_thread_handle[MAX_PROCESS_THREADS]; + + /** + * Thread created flag for each of the processing threads + */ + WORD32 ai4_process_thread_created[MAX_PROCESS_THREADS]; + + /** + * Void pointer to process job context + */ + void *pv_proc_jobq, *pv_entropy_jobq; + + /** + * Number of MBs processed together for better instruction cache handling + */ + WORD32 i4_proc_nmb; + + /** + * Previous POC lsb + */ + WORD32 i4_prev_poc_lsb; + + /** + * Previous POC msb + */ + WORD32 i4_prev_poc_msb; + + /** + * Max POC lsb that has arrived till now + */ + WORD32 i4_max_prev_poc_lsb; + + /** + * Context for format conversion + */ + fmt_conv_t s_fmt_conv; + + /** + * Absolute pic order count + */ + WORD32 i4_abs_pic_order_cnt; + + /** + * Pic order count of lsb + */ + WORD32 i4_pic_order_cnt_lsb; + + /** + * Array giving current picture being processed in each context set + */ + WORD32 ai4_pic_cnt[MAX_CTXT_SETS]; + + /* + * Min sad to search for + */ + UWORD32 u4_min_sad; + + /** + * Reference picture set + */ + isvce_ref_set_t as_ref_set[MAX_DPB_SIZE + MAX_CTXT_SETS]; + + /* + * Air pic cnt + * Contains the number of pictures that have been encoded with air + * This value is moudulo air refresh period + */ + WORD32 i4_air_pic_cnt; + + /* + * Intra refresh map + * Stores the frames at which intra refresh should occur for a MB + */ + UWORD16 *pu2_intr_rfrsh_map; + + /* + * Indicates if the current frame is used as a reference frame + */ + UWORD32 u4_is_curr_frm_ref; + + /* + * Indicates if there can be non reference frames in the stream + */ + WORD32 i4_non_ref_frames_in_stream; + + /* + * Memory for color space conversion for luma plane + */ + UWORD8 *pu1_y_csc_buf_base; + + /* + * Memory for color space conversion foe chroma plane + */ + UWORD8 *pu1_uv_csc_buf_base; + + /** + * Function pointers for intra pred leaf level functions luma + */ + pf_intra_pred apf_intra_pred_16_l[MAX_I16x16]; + pf_intra_pred apf_intra_pred_8_l[MAX_I8x8]; + pf_intra_pred apf_intra_pred_4_l[MAX_I4x4]; + + /** + * Function pointers for intra pred leaf level functions chroma + */ + pf_intra_pred apf_intra_pred_c[MAX_CH_I8x8]; + + /** + * deblock vertical luma edge with blocking strength 4 + */ + ih264_deblk_edge_bs4_ft *pf_deblk_luma_vert_bs4; + + /** + * deblock vertical chroma edge with blocking strength 4 + */ + ih264_deblk_chroma_edge_bs4_ft *pf_deblk_chroma_vert_bs4; + + /** + * deblock vertical luma edge with blocking strength less than 4 + */ + ih264_deblk_edge_bslt4_ft *pf_deblk_luma_vert_bslt4; + + /** + * deblock vertical chroma edge with blocking strength less than 4 + */ + ih264_deblk_chroma_edge_bslt4_ft *pf_deblk_chroma_vert_bslt4; + + /** + * deblock horizontal luma edge with blocking strength 4 + */ + ih264_deblk_edge_bs4_ft *pf_deblk_luma_horz_bs4; + + /** + * deblock horizontal chroma edge with blocking strength 4 + */ + ih264_deblk_chroma_edge_bs4_ft *pf_deblk_chroma_horz_bs4; + + /** + * deblock horizontal luma edge with blocking strength less than 4 + */ + ih264_deblk_edge_bslt4_ft *pf_deblk_luma_horz_bslt4; + + /** + * deblock horizontal chroma edge with blocking strength less than 4 + */ + ih264_deblk_chroma_edge_bslt4_ft *pf_deblk_chroma_horz_bslt4; + + /** + * functions for padding + */ + pf_pad pf_pad_top; + pf_pad pf_pad_bottom; + pf_pad pf_pad_left_luma; + pf_pad pf_pad_left_chroma; + pf_pad pf_pad_right_luma; + pf_pad pf_pad_right_chroma; + + /** + * fn ptrs for compute sad routines + */ + ime_compute_sad_ft *apf_compute_sad_16x16[2]; + ime_compute_sad_ft *pf_compute_sad_16x8; + + /** + * Function pointer for computing ME + * 1 for PSLICE and 1 for BSLICE + */ + FT_ME_ALGORITHM *apf_compute_me[2]; + + /** + * Function pointers for computing SKIP parameters + */ + FT_FIND_SKIP_PARAMS *apf_find_skip_params_me[2]; + + /** + * intra mode eval -encoder level function + */ + pf_evaluate_intra_modes pf_ih264e_evaluate_intra16x16_modes; + pf_evaluate_intra_modes pf_ih264e_evaluate_intra_chroma_modes; + pf_evaluate_intra_4x4_modes pf_ih264e_evaluate_intra_4x4_modes; + + /* Half pel generation function - encoder level + * + */ + pf_sixtapfilter_horz pf_ih264e_sixtapfilter_horz; + pf_sixtap_filter_2dvh_vert pf_ih264e_sixtap_filter_2dvh_vert; + + /** + * color space conversion from YUV 420P to YUV 420Sp + */ + pf_fmt_conv_420p_to_420sp pf_ih264e_conv_420p_to_420sp; + + /** + * color space conversion from YUV 420P to YUV 420Sp + */ + pf_fmt_conv_422ile_to_420sp pf_ih264e_fmt_conv_422i_to_420sp; + + /** + * write mb layer for a given slice I, P, B + */ + IH264E_ERROR_T (*pf_write_mb_syntax_layer[2][3])(isvce_entropy_ctxt_t *ps_ent_ctxt); + + /** + * Output buffer + */ + isvce_out_buf_t as_out_buf[MAX_CTXT_SETS]; + + /** + * recon buffer + */ + isvce_rec_buf_t as_rec_buf[MAX_CTXT_SETS]; + + /** + * rate control context + */ + isvce_rate_control_ctxt_t s_rate_control; + + /** + * input buffer queue + */ + isvce_inp_buf_t as_inp_list[SVC_MAX_NUM_INP_FRAMES]; + + /** + * Flag to indicate if any IDR requests are pending + */ + WORD32 i4_pending_idr_flag; + + /** + *Flag to indicate if we have recived the last input frame + */ + WORD32 i4_last_inp_buff_received; + + /* + * Max num reference frames to be signaled in SPS + */ + WORD32 i4_max_num_reference_frames; + + /** + * backup sei params for comparison + */ + sei_params_t s_sei; +} isvce_codec_t; + +#endif diff --git a/encoder/svc/isvce_sub_pic_rc.c b/encoder/svc/isvce_sub_pic_rc.c new file mode 100644 index 0000000..92a66b7 --- /dev/null +++ b/encoder/svc/isvce_sub_pic_rc.c @@ -0,0 +1,906 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** +******************************************************************************* +* @file +* isvce_sub_pic_rc.c +* +* @brief +* Contains functions used in sub-pic RC +* +******************************************************************************* +*/ +#include +#include +#include +#include + +#include "ih264_typedefs.h" +#include "ih264_cavlc_tables.h" +#include "ih264_platform_macros.h" +#include "ithread.h" +#include "isvc_defs.h" +#include "isvc_structs.h" +#include "isvce_structs.h" +#include "isvce_defs.h" +#include "isvce_sub_pic_rc.h" +#include "isvce_sub_pic_rc_private_defs.h" + +/* Dependencies of 'irc_picture_type.h' */ +#include "irc_mem_req_and_acq.h" + +/* Dependencies of 'irc_rate_control_api_structs' */ +#include "irc_picture_type.h" +#include "irc_rd_model.h" +#include "irc_vbr_storage_vbv.h" +#include "irc_est_sad.h" +#include "irc_bit_allocation.h" +#include "irc_mb_model_based.h" +#include "irc_cbr_buffer_control.h" +#include "irc_vbr_str_prms.h" +#include "irc_common.h" + +#include "irc_rate_control_api_structs.h" +#include "irc_rate_control_api.h" + +/** +******************************************************************************* +* +* @brief +* Returns size of buffers for storing subPicRC ctxt +* +* @returns Size of buffers +* +******************************************************************************* +*/ +UWORD32 isvce_get_sub_pic_rc_ctxt_size(UWORD8 u1_num_spatial_layers, DOUBLE d_spatial_res_ratio, + UWORD32 u4_wd, UWORD32 u4_ht) +{ + WORD32 i; + + UWORD32 u4_size = MAX_PROCESS_CTXT * sizeof(svc_sub_pic_rc_ctxt_t); + + u4_size += sizeof(sub_pic_rc_state_t); + u4_size += ithread_get_mutex_struct_size(); + + for(i = u1_num_spatial_layers - 1; i >= 0; i--) + { + WORD32 i4_layer_wd = + (WORD32) ((DOUBLE) u4_wd / pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - i)) + + 0.99; + WORD32 i4_layer_ht = + ((DOUBLE) u4_ht / pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - i)) + 0.99; + WORD32 i4_layer_mbs = (i4_layer_wd / MB_SIZE) * (i4_layer_ht / MB_SIZE); + + /* ps_mb_bits_info */ + u4_size += i4_layer_mbs * sizeof(mb_bits_info_t); + +#if DUMP_SUB_PIC_RC_DATA + /* ps_mb_bits_actual */ + u4_size += i4_layer_mbs * sizeof(mb_bits_info_t); +#endif + } + + return u4_size; +} + +void isvce_sub_pic_rc_ctxt_init(isvce_codec_t *ps_codec, iv_mem_rec_t *ps_mem_rec) +{ + sub_pic_rc_state_t *ps_sub_pic_rc_state; + + WORD32 i, j; + + DOUBLE d_spatial_res_ratio = ps_codec->s_cfg.s_svc_params.d_spatial_res_ratio; + UWORD8 u1_num_spatial_layers = ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers; + UWORD32 u4_wd = ps_codec->s_cfg.u4_wd; + UWORD32 u4_ht = ps_codec->s_cfg.u4_ht; + UWORD8 *pu1_buf = ps_mem_rec->pv_base; + WORD64 i8_alloc_mem_size = + isvce_get_sub_pic_rc_ctxt_size(u1_num_spatial_layers, d_spatial_res_ratio, u4_wd, u4_ht); + + for(i = 0; i < MAX_PROCESS_CTXT; i++) + { + svc_sub_pic_rc_ctxt_t *ps_sub_pic_rc_ctxt = ps_codec->as_process[i].ps_sub_pic_rc_ctxt = + (svc_sub_pic_rc_ctxt_t *) pu1_buf; + + pu1_buf += sizeof(ps_sub_pic_rc_ctxt[0]); + i8_alloc_mem_size -= sizeof(ps_sub_pic_rc_ctxt[0]); + + if(0 == i) + { + ps_sub_pic_rc_ctxt->s_sub_pic_rc_constants.pv_state = ps_sub_pic_rc_state = + (sub_pic_rc_state_t *) pu1_buf; + pu1_buf += sizeof(ps_sub_pic_rc_state[0]); + i8_alloc_mem_size -= sizeof(ps_sub_pic_rc_state[0]); + + ASSERT(i8_alloc_mem_size >= 0); + ASSERT(NULL != ps_codec->s_rate_control.apps_rate_control_api); + ASSERT(NULL != ps_codec->as_process->s_me_ctxt.pu1_mv_bits); + + ps_sub_pic_rc_state->s_svc_params = ps_codec->s_cfg.s_svc_params; + ps_sub_pic_rc_state->pu1_uev_codeword_to_bits_map = gau1_uev_codeword_to_bits_map; + ps_sub_pic_rc_state->pu1_sev_codeword_to_bits_map = + ps_codec->as_process->s_me_ctxt.pu1_mv_bits; + ps_sub_pic_rc_state->e_rc_mode = ps_codec->s_cfg.e_rc_mode; + + ps_sub_pic_rc_state->pv_bits_accumulator_mutex = (void *) pu1_buf; + pu1_buf += ithread_get_mutex_struct_size(); + i8_alloc_mem_size -= ithread_get_mutex_struct_size(); + ithread_mutex_init(ps_sub_pic_rc_state->pv_bits_accumulator_mutex); + + for(j = u1_num_spatial_layers - 1; j >= 0; j--) + { + sub_pic_rc_layer_state_t *ps_layer_state = + &ps_sub_pic_rc_state->as_sub_pic_rc_layer_states[j]; + + WORD32 i4_layer_wd = + (WORD32) ((DOUBLE) u4_wd / + pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - j)) + + 0.99; + WORD32 i4_layer_ht = + ((DOUBLE) u4_ht / pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - j)) + + 0.99; + WORD32 i4_layer_mbs = (i4_layer_wd / MB_SIZE) * (i4_layer_ht / MB_SIZE); + + ps_layer_state->i4_wd = i4_layer_wd; + ps_layer_state->i4_ht = i4_layer_ht; + ps_layer_state->i4_num_mbs = i4_layer_mbs; + ps_layer_state->pv_layer_rc_ctxt = + ps_codec->s_rate_control.apps_rate_control_api[j]; + ps_layer_state->ps_mb_bits_info = (mb_bits_info_t *) pu1_buf; + pu1_buf += i4_layer_mbs * sizeof(ps_layer_state->ps_mb_bits_info[0]); + i8_alloc_mem_size -= i4_layer_mbs * sizeof(ps_layer_state->ps_mb_bits_info[0]); + + ASSERT(i8_alloc_mem_size >= 0); + +#if DUMP_SUB_PIC_RC_DATA + ps_layer_state->ps_mb_bits_actual = (mb_bits_info_t *) pu1_buf; + pu1_buf += i4_layer_mbs * sizeof(ps_layer_state->ps_mb_bits_actual[0]); + i8_alloc_mem_size -= i4_layer_mbs * sizeof(ps_layer_state->ps_mb_bits_actual[0]); + + ASSERT(i8_alloc_mem_size >= 0); + + { + UWORD8 au1_file_path[MAX_SUB_PIC_RC_DUMP_FILE_PATH_LENGTH + 1]; + + sprintf((WORD8 *) au1_file_path, "%ssubPicRC%1d.txt", SUB_PIC_RC_DUMP_FILE_PATH, + j); + + ps_layer_state->ps_data_dump_file = fopen(au1_file_path, "w"); + + ASSERT(NULL != ps_layer_state->ps_data_dump_file); + } +#endif + } + } + else + { + svc_sub_pic_rc_ctxt_t *ps_sub_pic_rc_ctxt_src = + ps_codec->as_process[0].ps_sub_pic_rc_ctxt; + svc_sub_pic_rc_ctxt_t *ps_sub_pic_rc_ctxt_dst = + ps_codec->as_process[i].ps_sub_pic_rc_ctxt; + sub_pic_rc_state_t *ps_proc0_state = + (sub_pic_rc_state_t *) ps_sub_pic_rc_ctxt_src->s_sub_pic_rc_constants.pv_state; + + ps_sub_pic_rc_ctxt_dst->s_sub_pic_rc_constants.pv_state = ps_proc0_state; + } + } +} + +static FORCEINLINE void isvce_sub_pic_rc_qp_params_init(sub_pic_rc_qp_params_t *ps_qp_params, + UWORD8 u1_min_qp, UWORD8 u1_max_qp) +{ + ps_qp_params->u1_min_qp = u1_min_qp; + ps_qp_params->u1_max_qp = u1_max_qp; + ps_qp_params->pu4_qp_to_qscale_map = gau4_qp_to_qscale_map; + ps_qp_params->pu1_qscale_to_qp_map = gau1_qscale_to_qp_map; +} + +void isvce_sub_pic_rc_ctxt_layer_init(svc_sub_pic_rc_ctxt_t *ps_sub_pic_rc_ctxt) +{ + sub_pic_rc_layer_state_t *ps_layer_state; + + svc_sub_pic_rc_constants_t *ps_sub_pic_rc_constants = + &ps_sub_pic_rc_ctxt->s_sub_pic_rc_constants; + svc_sub_pic_rc_variables_t *ps_sub_pic_rc_variables = + &ps_sub_pic_rc_ctxt->s_sub_pic_rc_variables; + sub_pic_rc_state_t *ps_sub_pic_rc_state = + (sub_pic_rc_state_t *) ps_sub_pic_rc_constants->pv_state; + + UWORD8 u1_spatial_layer_id = ps_sub_pic_rc_variables->s_layer_variables.u1_spatial_layer_id; + + ps_layer_state = &ps_sub_pic_rc_state->as_sub_pic_rc_layer_states[u1_spatial_layer_id]; + + memset(&ps_layer_state->s_cumulative_mb_bits, 0, sizeof(ps_layer_state->s_cumulative_mb_bits)); + ps_layer_state->u4_num_mbs_sampled = 0; + + /* Frames with frameNum=0 are usually IDR's. RC model will be reset for IDR's. + */ + /* Hence, using VBVBufSize as a proxy for estimated bits */ + if(0 == ps_sub_pic_rc_variables->s_layer_variables.i4_frame_num) + { + ps_layer_state->u4_allocated_bits = + irc_get_vbv_buf_size(ps_layer_state->pv_layer_rc_ctxt) / 10.; + } + else + { + ps_layer_state->u4_allocated_bits = + irc_get_prev_frm_est_bits(ps_layer_state->pv_layer_rc_ctxt); + } + + isvce_sub_pic_rc_qp_params_init(&ps_layer_state->s_qp_params, + ps_sub_pic_rc_variables->s_layer_variables.u1_min_qp, + ps_sub_pic_rc_variables->s_layer_variables.u1_max_qp); +} + +static FORCEINLINE UWORD32 isvce_sub_pic_rc_get_res_pred_flag_bits( + svc_sub_pic_rc_variables_t *ps_sub_pic_rc_variables, sub_pic_rc_state_t *ps_sub_pic_rc_state) +{ + isvce_mb_info_t *ps_mb_info = ps_sub_pic_rc_variables->s_mb_variables.ps_mb_info; + + UNUSED(ps_sub_pic_rc_state); + + return (ENABLE_RESIDUAL_PREDICTION && !ps_mb_info->u1_is_intra); +} + +static FORCEINLINE UWORD32 isvce_sub_pic_rc_get_cbp_bits( + svc_sub_pic_rc_variables_t *ps_sub_pic_rc_variables, sub_pic_rc_state_t *ps_sub_pic_rc_state) +{ + isvce_mb_info_t *ps_mb_info = ps_sub_pic_rc_variables->s_mb_variables.ps_mb_info; + + UWORD32 u4_cbp = ps_sub_pic_rc_variables->s_mb_variables.u4_cbp; + bool b_use_inter_cbp_map = !ps_mb_info->u1_is_intra || ps_mb_info->u1_base_mode_flag; + + return ps_sub_pic_rc_state + ->pu1_uev_codeword_to_bits_map[gu1_cbp_map_tables[u4_cbp][b_use_inter_cbp_map]]; +} + +static FORCEINLINE UWORD32 isvce_sub_pic_rc_get_mb_type_bits( + svc_sub_pic_rc_variables_t *ps_sub_pic_rc_variables, sub_pic_rc_state_t *ps_sub_pic_rc_state) +{ + UWORD32 u4_mb_type; + + isvce_mb_info_t *ps_mb_info = ps_sub_pic_rc_variables->s_mb_variables.ps_mb_info; + + UWORD32 u4_cbp = ps_sub_pic_rc_variables->s_mb_variables.u4_cbp; + UWORD32 au4_cbps[NUM_SP_COMPONENTS] = {u4_cbp & 15, u4_cbp >> 4}; + + switch(ps_mb_info->u2_mb_type) + { + case I16x16: + { + u4_mb_type = ps_mb_info->s_intra_pu.s_i16x16_mode_data.u1_mode + 1 + + (au4_cbps[UV] << 2) + (au4_cbps[Y] == 15) * 12; + + break; + } + case I4x4: + { + u4_mb_type = 5 * (ps_sub_pic_rc_variables->s_layer_variables.i4_slice_type != ISLICE); + + break; + } + case P16x16: + { + u4_mb_type = 0; + + break; + } + default: + { + return 0; + } + } + + return ps_sub_pic_rc_state->pu1_uev_codeword_to_bits_map[u4_mb_type]; +} + +static FORCEINLINE UWORD32 isvce_sub_pic_rc_get_mb_pred_bits( + svc_sub_pic_rc_variables_t *ps_sub_pic_rc_variables, sub_pic_rc_state_t *ps_sub_pic_rc_state) +{ + WORD32 i; + + isvce_mb_info_t *ps_mb_info = ps_sub_pic_rc_variables->s_mb_variables.ps_mb_info; + + UWORD32 u4_bits = 0; + + switch(ps_mb_info->u2_mb_type) + { + case I16x16: + { + /* intra_chroma_pred_mode */ + u4_bits += + ps_sub_pic_rc_state + ->pu1_uev_codeword_to_bits_map[ps_mb_info->s_intra_pu.u1_chroma_intra_mode]; + + break; + } + case I4x4: + { + intra4x4_mode_data_t *ps_i4x4_mode_data = ps_mb_info->s_intra_pu.as_i4x4_mode_data; + + for(i = 0; i < MAX_TU_IN_MB; i++) + { + /* prev_intra4x4_pred_mode_flag */ + u4_bits += 1; + + /* rem_intra4x4_pred_mode */ + u4_bits += + 3 * (ps_i4x4_mode_data[i].u1_mode != ps_i4x4_mode_data[i].u1_predicted_mode); + } + + /* intra_chroma_pred_mode */ + u4_bits += + ps_sub_pic_rc_state + ->pu1_uev_codeword_to_bits_map[ps_mb_info->s_intra_pu.u1_chroma_intra_mode]; + + break; + } + case P16x16: + { + mv_t s_mvd; + + /* motion_prediction_flag_l0 */ + u4_bits += USE_ILP_MV_AS_MVP; + + /* ref_idx_l0 */ + if(2 == ps_sub_pic_rc_variables->s_layer_variables.i4_max_num_reference_frames) + { + u4_bits += 1; + } + else if(2 < ps_sub_pic_rc_variables->s_layer_variables.i4_max_num_reference_frames) + { + u4_bits += ps_sub_pic_rc_state->pu1_uev_codeword_to_bits_map + [ps_mb_info->as_pu->as_me_info[L0].i1_ref_idx]; + } + + /* mvd_l0 */ + s_mvd.i2_mvx = ps_mb_info->as_pu->as_me_info[L0].s_mv.i2_mvx - + ps_sub_pic_rc_variables->s_mb_variables + .aps_mvps[ps_mb_info->as_pu->au1_mvp_idx[L0]] + ->s_mv.i2_mvx; + s_mvd.i2_mvy = ps_mb_info->as_pu->as_me_info[L0].s_mv.i2_mvy - + ps_sub_pic_rc_variables->s_mb_variables + .aps_mvps[ps_mb_info->as_pu->au1_mvp_idx[L0]] + ->s_mv.i2_mvy; + u4_bits += ps_sub_pic_rc_state->pu1_sev_codeword_to_bits_map[s_mvd.i2_mvx]; + u4_bits += ps_sub_pic_rc_state->pu1_sev_codeword_to_bits_map[s_mvd.i2_mvy]; + + break; + } + default: + { + break; + } + } + + return u4_bits; +} + +static void ihevce_svc_sub_pic_rc_set_header_bits(svc_sub_pic_rc_ctxt_t *ps_sub_pic_rc_ctxt) +{ + sub_pic_rc_layer_state_t *ps_layer_state; + mb_bits_info_t *ps_mb_bits_info; + + UWORD32 u4_mb_idx; + + svc_sub_pic_rc_constants_t *ps_sub_pic_rc_constants = + &ps_sub_pic_rc_ctxt->s_sub_pic_rc_constants; + svc_sub_pic_rc_variables_t *ps_sub_pic_rc_variables = + &ps_sub_pic_rc_ctxt->s_sub_pic_rc_variables; + sub_pic_rc_state_t *ps_sub_pic_rc_state = + (sub_pic_rc_state_t *) ps_sub_pic_rc_constants->pv_state; + isvce_mb_info_t *ps_mb_info = ps_sub_pic_rc_variables->s_mb_variables.ps_mb_info; + + UWORD8 u1_spatial_layer_id = ps_sub_pic_rc_variables->s_layer_variables.u1_spatial_layer_id; + + ps_layer_state = &ps_sub_pic_rc_state->as_sub_pic_rc_layer_states[u1_spatial_layer_id]; + u4_mb_idx = ps_sub_pic_rc_variables->s_mb_variables.s_mb_pos.i4_abscissa + + ps_sub_pic_rc_variables->s_mb_variables.s_mb_pos.i4_ordinate * + (ps_layer_state->i4_wd / MB_SIZE); + ps_mb_bits_info = &ps_layer_state->ps_mb_bits_info[u4_mb_idx]; + + /* Hypotheses used for header bits estimation - */ + /* 1. mb_skip_run, base_mode_flag, mb_type, mb_pred, residual_prediction_flag, + * and cbp */ + /* are considered as contibuting to header bits. */ + /* 2. mb_skip_run = 1 bit */ + /* 3. base_mode_flag = 1 bit */ + /* 4. mb_type = LUT mapping mbType to corresponding ue(v) */ + /* 5. mb_pred.I4x4 = 1 bit for 16 'prev_intra4x4_pred_mode_flag'; */ + /* 3 bits for each explicitly signaled + * 'rem_intra4x4_pred_mode' */ + /* 6. mb_pred.Inter = 1 bit for 'motion_prediction_flag_l0' and + * 'motion_prediction_flag_l1', when necessary; */ + /* mvbits LUT for 'mvd_l0' and 'mvd_l1' */ + /* 7. mb_pred.intra_chroma_pred_mode = LUT mapping intra_chroma_pred_mode to + * corresponding ue(v) */ + /* 8. residual_prediction_flag = 1 bit */ + /* 9. coded_block_pattern = LUT mapping mbType to corresponding me(v) */ + + /* mb_skip_run is assumed to be either 0 or 1 */ + ps_mb_bits_info->i8_header_bits += 1; + + /* 'base_mode_flag' */ + if((ENABLE_ILP_MV || ENABLE_IBL_MODE) && u1_spatial_layer_id) + { + ps_mb_bits_info->i8_header_bits += 1; + + if(ps_mb_info->u1_base_mode_flag) + { + /* 'residual_prediction_flag' */ + ps_mb_bits_info->i8_header_bits += isvce_sub_pic_rc_get_res_pred_flag_bits( + ps_sub_pic_rc_variables, ps_sub_pic_rc_state); + + /* 'coded_block_pattern' */ + ps_mb_bits_info->i8_header_bits += + isvce_sub_pic_rc_get_cbp_bits(ps_sub_pic_rc_variables, ps_sub_pic_rc_state); + + return; + } + } + + /* 'mb_type' */ + ps_mb_bits_info->i8_header_bits += + isvce_sub_pic_rc_get_mb_type_bits(ps_sub_pic_rc_variables, ps_sub_pic_rc_state); + + if(PSKIP == ps_mb_info->u2_mb_type) + { + return; + } + + /* 'mb_pred' */ + ps_mb_bits_info->i8_header_bits += + isvce_sub_pic_rc_get_mb_pred_bits(ps_sub_pic_rc_variables, ps_sub_pic_rc_state); + + /* 'residual_prediction_flag' */ + ps_mb_bits_info->i8_header_bits += + isvce_sub_pic_rc_get_res_pred_flag_bits(ps_sub_pic_rc_variables, ps_sub_pic_rc_state); +} + +static FORCEINLINE UWORD32 isvce_sub_pic_rc_get_tu_residual_bits( + svc_sub_pic_rc_variables_t *ps_sub_pic_rc_variables, WORD32 i4_coeff_start_idx, + UWORD8 u1_num_coded_coeffs, UWORD8 u1_num_coeffs, bool b_is_chroma) +{ + WORD32 i; + UWORD32 u4_num_bits; + + UWORD32 u4_bits = 0; + WORD16 *pi2_coeff = + ((WORD16 *) ps_sub_pic_rc_variables->s_mb_variables.as_quant_coeffs[b_is_chroma ? UV : Y] + .pv_data) + + i4_coeff_start_idx; + + if(0 == u1_num_coded_coeffs) + { + return 0; + } + + GETRANGE(u4_num_bits, u1_num_coded_coeffs); + u4_bits += u4_num_bits; + + for(i = 0; i < u1_num_coeffs; i++) + { + if(pi2_coeff[i]) + { + GETRANGE(u4_num_bits, pi2_coeff[i]); + u4_bits += u4_num_bits; + } + } + return u4_bits; +} + +static void ihevce_svc_sub_pic_rc_set_texture_bits(svc_sub_pic_rc_ctxt_t *ps_sub_pic_rc_ctxt) +{ + sub_pic_rc_layer_state_t *ps_layer_state; + mb_bits_info_t *ps_mb_bits_info; + + UWORD32 u4_mb_idx; + WORD32 i, j; + + svc_sub_pic_rc_constants_t *ps_sub_pic_rc_constants = + &ps_sub_pic_rc_ctxt->s_sub_pic_rc_constants; + svc_sub_pic_rc_variables_t *ps_sub_pic_rc_variables = + &ps_sub_pic_rc_ctxt->s_sub_pic_rc_variables; + sub_pic_rc_state_t *ps_sub_pic_rc_state = + (sub_pic_rc_state_t *) ps_sub_pic_rc_constants->pv_state; + isvce_mb_info_t *ps_mb_info = ps_sub_pic_rc_variables->s_mb_variables.ps_mb_info; + + UWORD8 u1_spatial_layer_id = ps_sub_pic_rc_variables->s_layer_variables.u1_spatial_layer_id; + UWORD32 au4_cbps[NUM_SP_COMPONENTS] = {ps_sub_pic_rc_variables->s_mb_variables.u4_cbp & 15, + ps_sub_pic_rc_variables->s_mb_variables.u4_cbp >> 4}; + + if(0 == ps_sub_pic_rc_variables->s_mb_variables.u4_cbp) + { + return; + } + + if(MIN_TU_SIZE != ps_mb_info->u1_tx_size) + { + return; + } + + ps_layer_state = &ps_sub_pic_rc_state->as_sub_pic_rc_layer_states[u1_spatial_layer_id]; + u4_mb_idx = ps_sub_pic_rc_variables->s_mb_variables.s_mb_pos.i4_abscissa + + ps_sub_pic_rc_variables->s_mb_variables.s_mb_pos.i4_ordinate * + (ps_layer_state->i4_wd / MB_SIZE); + ps_mb_bits_info = &ps_layer_state->ps_mb_bits_info[u4_mb_idx]; + + /* Hypotheses used for texture bits estimation - */ + /* 1. Only level information is considered. */ + /* 2. nnz is used as a proxy for coeff_token. */ + /* 3. Both of the above are assumed coded via i(n). */ + if(au4_cbps[Y]) + { + /* Y - DC */ + if(I16x16 == ps_mb_info->u2_mb_type) + { + ps_mb_bits_info->i8_texture_bits += isvce_sub_pic_rc_get_tu_residual_bits( + ps_sub_pic_rc_variables, 0, ps_sub_pic_rc_variables->s_mb_variables.apu1_nnzs[Y][0], + NUM_COEFFS_IN_MIN_TU, false); + } + + for(i = 0; i < MIN_TU_IN_MB; i++) + { + if(au4_cbps[Y] & (1 << i)) + { + UWORD32 u4_csbp = (ps_mb_info->u4_csbp >> (4 * i)) & 15; + + for(j = 0; j < NUM_4x4_IN_8x8; j++) + { + if(u4_csbp & (1 << j)) + { + /* 1 added to account for DC TU */ + UWORD8 u1_blk_id = 1 + gau4_tu_zscan_id_to_rasterscan_id_map[i][j]; + UWORD8 u1_nnz = + ps_sub_pic_rc_variables->s_mb_variables.apu1_nnzs[Y][u1_blk_id]; + + if(u1_nnz && (I16x16 == ps_mb_info->u2_mb_type)) + { + u1_nnz -= !!(((WORD16 *) (ps_sub_pic_rc_variables->s_mb_variables + .as_quant_coeffs[Y] + .pv_data))[u1_blk_id - 1]); + + ps_mb_bits_info->i8_texture_bits += + isvce_sub_pic_rc_get_tu_residual_bits( + ps_sub_pic_rc_variables, + u1_blk_id * ps_sub_pic_rc_variables->s_mb_variables + .as_quant_coeffs[Y] + .i4_data_stride + + (I16x16 == ps_mb_info->u2_mb_type), + u1_nnz, + NUM_COEFFS_IN_MIN_TU - (I16x16 == ps_mb_info->u2_mb_type), + false); + } + } + } + } + } + } + + if(au4_cbps[UV]) + { + for(i = ((WORD32) U); i <= ((WORD32) V); i++) + { + bool b_is_v = (i == ((WORD32) V)); + + ps_mb_bits_info->i8_texture_bits += isvce_sub_pic_rc_get_tu_residual_bits( + ps_sub_pic_rc_variables, b_is_v * NUM_4x4_IN_8x8, + ps_sub_pic_rc_variables->s_mb_variables + .apu1_nnzs[UV][0 + b_is_v * (1 + NUM_4x4_IN_8x8)], + NUM_4x4_IN_8x8, true); + + for(j = 0; j < NUM_4x4_IN_8x8; j++) + { + UWORD8 u1_nnz = ps_sub_pic_rc_variables->s_mb_variables + .apu1_nnzs[UV][j + b_is_v * (1 + NUM_4x4_IN_8x8) + 1]; + + if(u1_nnz) + { + u1_nnz -= + !!(((WORD16 *) (ps_sub_pic_rc_variables->s_mb_variables.as_quant_coeffs[UV] + .pv_data))[j + b_is_v * NUM_4x4_IN_8x8]); + + ps_mb_bits_info->i8_texture_bits += isvce_sub_pic_rc_get_tu_residual_bits( + ps_sub_pic_rc_variables, + (j + b_is_v * NUM_4x4_IN_8x8 + 1) * + ps_sub_pic_rc_variables->s_mb_variables.as_quant_coeffs[UV] + .i4_data_stride + + 1, + u1_nnz, NUM_COEFFS_IN_MIN_TU - 1, true); + } + } + } + } +} + +void isvce_sub_pic_rc_ctxt_update(svc_sub_pic_rc_ctxt_t *ps_sub_pic_rc_ctxt) +{ + sub_pic_rc_layer_state_t *ps_layer_state; + mb_bits_info_t *ps_mb_bits_info; + + UWORD32 u4_mb_idx; + + svc_sub_pic_rc_constants_t *ps_sub_pic_rc_constants = + &ps_sub_pic_rc_ctxt->s_sub_pic_rc_constants; + svc_sub_pic_rc_variables_t *ps_sub_pic_rc_variables = + &ps_sub_pic_rc_ctxt->s_sub_pic_rc_variables; + sub_pic_rc_state_t *ps_sub_pic_rc_state = + (sub_pic_rc_state_t *) ps_sub_pic_rc_constants->pv_state; + isvce_mb_info_t *ps_mb_info = ps_sub_pic_rc_variables->s_mb_variables.ps_mb_info; + + UWORD8 u1_spatial_layer_id = ps_sub_pic_rc_variables->s_layer_variables.u1_spatial_layer_id; + bool b_is_skip_mb = (PSKIP == ps_mb_info->u2_mb_type) || (BSKIP == ps_mb_info->u2_mb_type); + + if(!ENABLE_IN_FRAME_RC || (IVE_RC_NONE == ps_sub_pic_rc_state->e_rc_mode)) + { + return; + } + + ps_layer_state = &ps_sub_pic_rc_state->as_sub_pic_rc_layer_states[u1_spatial_layer_id]; + u4_mb_idx = ps_sub_pic_rc_variables->s_mb_variables.s_mb_pos.i4_abscissa + + ps_sub_pic_rc_variables->s_mb_variables.s_mb_pos.i4_ordinate * + (ps_layer_state->i4_wd / MB_SIZE); + ps_mb_bits_info = &ps_layer_state->ps_mb_bits_info[u4_mb_idx]; + + memset(ps_mb_bits_info, 0, sizeof(ps_mb_bits_info[0])); + + if(!b_is_skip_mb) + { + ihevce_svc_sub_pic_rc_set_header_bits(ps_sub_pic_rc_ctxt); + + ihevce_svc_sub_pic_rc_set_texture_bits(ps_sub_pic_rc_ctxt); + } + + ithread_mutex_lock(ps_sub_pic_rc_state->pv_bits_accumulator_mutex); + + ps_layer_state->s_cumulative_mb_bits.i8_header_bits += ps_mb_bits_info->i8_header_bits; + ps_layer_state->s_cumulative_mb_bits.i8_texture_bits += ps_mb_bits_info->i8_texture_bits; + ps_layer_state->u4_num_mbs_sampled++; + + ithread_mutex_unlock(ps_sub_pic_rc_state->pv_bits_accumulator_mutex); +} + +UWORD8 isvce_sub_pic_rc_get_mb_qp(svc_sub_pic_rc_ctxt_t *ps_sub_pic_rc_ctxt, UWORD8 u1_cur_mb_qp) +{ + sub_pic_rc_layer_state_t *ps_layer_state; + + DOUBLE d_bit_consumption_ratio; + UWORD32 u4_frame_qscale; + UWORD8 u1_mb_qp; + UWORD32 u4_num_mbs_sampled; + WORD32 i4_cumulative_mb_bits; + + svc_sub_pic_rc_constants_t *ps_sub_pic_rc_constants = + &ps_sub_pic_rc_ctxt->s_sub_pic_rc_constants; + svc_sub_pic_rc_variables_t *ps_sub_pic_rc_variables = + &ps_sub_pic_rc_ctxt->s_sub_pic_rc_variables; + sub_pic_rc_state_t *ps_sub_pic_rc_state = + (sub_pic_rc_state_t *) ps_sub_pic_rc_constants->pv_state; + + UWORD8 u1_spatial_layer_id = ps_sub_pic_rc_variables->s_layer_variables.u1_spatial_layer_id; + UWORD8 u1_frame_qp = ps_sub_pic_rc_variables->s_layer_variables.u1_frame_qp; + + if(!ENABLE_IN_FRAME_RC || (IVE_RC_NONE == ps_sub_pic_rc_state->e_rc_mode)) + { + return u1_cur_mb_qp; + } + + ps_layer_state = &ps_sub_pic_rc_state->as_sub_pic_rc_layer_states[u1_spatial_layer_id]; + + ithread_mutex_lock(ps_sub_pic_rc_state->pv_bits_accumulator_mutex); + + u4_num_mbs_sampled = ps_layer_state->u4_num_mbs_sampled; + + if(u4_num_mbs_sampled < (MIN_SAMPLED_MB_RATIO * ps_layer_state->i4_num_mbs)) + { + ithread_mutex_unlock(ps_sub_pic_rc_state->pv_bits_accumulator_mutex); + + return u1_cur_mb_qp; + } + + i4_cumulative_mb_bits = (WORD32) (ps_layer_state->s_cumulative_mb_bits.i8_header_bits + + ps_layer_state->s_cumulative_mb_bits.i8_texture_bits); + + d_bit_consumption_ratio = + (((DOUBLE) i4_cumulative_mb_bits) * ((DOUBLE) ps_layer_state->i4_num_mbs)) / + (((DOUBLE) ps_layer_state->u4_allocated_bits) * ((DOUBLE) u4_num_mbs_sampled)); + + ithread_mutex_unlock(ps_sub_pic_rc_state->pv_bits_accumulator_mutex); + + if((d_bit_consumption_ratio > BIT_RATIO_FOR_OVERCONSUMPTION) || + (d_bit_consumption_ratio < BIT_RATIO_FOR_UNDERCONSUMPTION)) + { + u4_frame_qscale = ps_layer_state->s_qp_params.pu4_qp_to_qscale_map[u1_frame_qp] * + d_bit_consumption_ratio + + 0.5; + u4_frame_qscale = CLIP3(ps_layer_state->s_qp_params.pu4_qp_to_qscale_map[0], MAX_SVC_QSCALE, + u4_frame_qscale); + u1_mb_qp = ps_layer_state->s_qp_params.pu1_qscale_to_qp_map[u4_frame_qscale]; + u1_mb_qp = CLIP3(ps_layer_state->s_qp_params.u1_min_qp, + ps_layer_state->s_qp_params.u1_max_qp, u1_mb_qp); + u1_mb_qp = CLIP3(MAX(MIN_H264_QP, ((WORD16) u1_cur_mb_qp) - MAX_MB_QP_DECREMENT), + MIN(MAX_H264_QP, ((WORD16) u1_cur_mb_qp) + MAX_MB_QP_INCREMENT), + ((WORD16) u1_mb_qp)); + /* This ensures mb_qp_delta stays within the interval [-26, 25] */ + u1_mb_qp = CLIP3(MAX(MIN_H264_QP, ((WORD16) u1_frame_qp) - MAX_FRAME_QP_DECREMENT), + MIN(MAX_H264_QP, ((WORD16) u1_frame_qp) + MAX_FRAME_QP_INCREMENT), + ((WORD16) u1_mb_qp)); + } + else + { + u1_mb_qp = u1_cur_mb_qp; + } + + { + vbv_buf_status_e e_vbv_buf_status; + picture_type_e e_rc_pic_type; + + DOUBLE d_est_frame_bits; + + WORD32 i4_num_bits_to_prevent_vbv_underflow; + + d_est_frame_bits = ((DOUBLE) i4_cumulative_mb_bits) * ((DOUBLE) ps_layer_state->i4_num_mbs); + d_est_frame_bits /= u4_num_mbs_sampled; + + switch(ps_sub_pic_rc_variables->s_layer_variables.i4_slice_type) + { + case ISLICE: + { + e_rc_pic_type = I_PIC; + break; + } + case PSLICE: + { + e_rc_pic_type = P_PIC; + break; + } + default: + { + e_rc_pic_type = B_PIC; + break; + } + } + + e_vbv_buf_status = + irc_get_buffer_status(ps_layer_state->pv_layer_rc_ctxt, (WORD32) d_est_frame_bits, + e_rc_pic_type, &i4_num_bits_to_prevent_vbv_underflow); + + /* This models dec VBV buffer */ + if(VBV_OVERFLOW == e_vbv_buf_status) + { + u1_mb_qp--; + } + else if(VBV_UNDERFLOW == e_vbv_buf_status) + { + u1_mb_qp++; + } + + /* This ensures mb_qp_delta stays within the interval [-26, 25] */ + u1_mb_qp = CLIP3(ps_layer_state->s_qp_params.u1_min_qp, + ps_layer_state->s_qp_params.u1_max_qp, u1_mb_qp); + u1_mb_qp = CLIP3(MAX(MIN_H264_QP, ((WORD16) u1_frame_qp) - MAX_FRAME_QP_DECREMENT), + MIN(MAX_H264_QP, ((WORD16) u1_frame_qp) + MAX_FRAME_QP_INCREMENT), + ((WORD16) u1_mb_qp)); + } + + return u1_mb_qp; +} + +void isvce_sub_pic_rc_get_entropy_data(svc_sub_pic_rc_ctxt_t *ps_sub_pic_rc_ctxt) +{ +#if DUMP_SUB_PIC_RC_DATA + sub_pic_rc_layer_state_t *ps_layer_state; + + UWORD32 u4_mb_idx; + + svc_sub_pic_rc_constants_t *ps_sub_pic_rc_constants = + &ps_sub_pic_rc_ctxt->s_sub_pic_rc_constants; + svc_sub_pic_rc_entropy_variables_t *ps_sub_pic_rc_variables = + &ps_sub_pic_rc_ctxt->s_sub_pic_rc_entropy_variables; + sub_pic_rc_state_t *ps_sub_pic_rc_state = + (sub_pic_rc_state_t *) ps_sub_pic_rc_constants->pv_state; + + UWORD8 u1_spatial_layer_id = ps_sub_pic_rc_variables->u1_spatial_layer_id; + + if(!ENABLE_IN_FRAME_RC || (IVE_RC_NONE == ps_sub_pic_rc_state->e_rc_mode)) + { + return; + } + + ps_layer_state = &ps_sub_pic_rc_state->as_sub_pic_rc_layer_states[u1_spatial_layer_id]; + u4_mb_idx = ps_sub_pic_rc_variables->s_mb_pos.i4_abscissa + + ps_sub_pic_rc_variables->s_mb_pos.i4_ordinate * (ps_layer_state->i4_wd / MB_SIZE); + + ps_layer_state->ps_mb_bits_actual[u4_mb_idx] = ps_sub_pic_rc_variables->s_mb_bits; +#else + UNUSED(ps_sub_pic_rc_ctxt); +#endif +} + +void isvce_sub_pic_rc_dump_data(svc_sub_pic_rc_ctxt_t *ps_sub_pic_rc_ctxt) +{ +#if DUMP_SUB_PIC_RC_DATA + WORD32 i, j, k; + + svc_sub_pic_rc_constants_t *ps_sub_pic_rc_constants = + &ps_sub_pic_rc_ctxt->s_sub_pic_rc_constants; + sub_pic_rc_state_t *ps_sub_pic_rc_state = + (sub_pic_rc_state_t *) ps_sub_pic_rc_constants->pv_state; + + if(!ENABLE_IN_FRAME_RC || (IVE_RC_NONE == ps_sub_pic_rc_state->e_rc_mode)) + { + return; + } + + for(i = 0; i < ps_sub_pic_rc_state->s_svc_params.u1_num_spatial_layers; i++) + { + sub_pic_rc_layer_state_t *ps_layer_state = + &ps_sub_pic_rc_state->as_sub_pic_rc_layer_states[i]; + + for(j = 0; j < (ps_layer_state->i4_ht / MB_SIZE); j++) + { + for(k = 0; k < (ps_layer_state->i4_wd / MB_SIZE); k++) + { + mb_bits_info_t *ps_mb_bits_est = + &ps_layer_state->ps_mb_bits_info[k + j * (ps_layer_state->i4_wd / MB_SIZE)]; + mb_bits_info_t *ps_mb_bits_actual = + &ps_layer_state->ps_mb_bits_actual[k + j * (ps_layer_state->i4_wd / MB_SIZE)]; + + fprintf(ps_layer_state->ps_data_dump_file, "%ld,%ld,%ld,%ld,\n", + ps_mb_bits_est->i8_header_bits, ps_mb_bits_est->i8_texture_bits, + ps_mb_bits_actual->i8_header_bits, ps_mb_bits_actual->i8_texture_bits); + } + } + } +#else + UNUSED(ps_sub_pic_rc_ctxt); +#endif +} + +void isvce_sub_pic_rc_ctxt_delete(svc_sub_pic_rc_ctxt_t *ps_sub_pic_rc_ctxt) +{ + sub_pic_rc_state_t *ps_sub_pic_rc_state = + (sub_pic_rc_state_t *) ps_sub_pic_rc_ctxt->s_sub_pic_rc_constants.pv_state; + + ithread_mutex_destroy(ps_sub_pic_rc_state->pv_bits_accumulator_mutex); + +#if DUMP_SUB_PIC_RC_DATA + { + WORD32 i; + + UWORD8 u1_num_spatial_layers = ps_sub_pic_rc_state->s_svc_params.u1_num_spatial_layers; + + for(i = u1_num_spatial_layers - 1; i >= 0; i--) + { + sub_pic_rc_layer_state_t *ps_layer_state = + &ps_sub_pic_rc_state->as_sub_pic_rc_layer_states[i]; + + if(ps_layer_state->ps_data_dump_file) + { + fclose(ps_layer_state->ps_data_dump_file); + } + + ps_layer_state->ps_data_dump_file = NULL; + } + } +#endif +} diff --git a/encoder/svc/isvce_sub_pic_rc.h b/encoder/svc/isvce_sub_pic_rc.h new file mode 100644 index 0000000..f51ded9 --- /dev/null +++ b/encoder/svc/isvce_sub_pic_rc.h @@ -0,0 +1,131 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** +******************************************************************************* +* @file +* isvce_sub_pic_rc.h +* +* @brief +* Contains typdefs and externs used for invoking sub-pic RC +* +******************************************************************************* +*/ + +#ifndef _ISVCE_SUB_PIC_RC_H_ +#define _ISVCE_SUB_PIC_RC_H_ + +#include "ih264_typedefs.h" +#include "isvce_pred_structs.h" +#include "isvce_defs.h" + +/* Structs */ +typedef struct svc_sub_pic_rc_constants_t +{ + void *pv_state; + +} svc_sub_pic_rc_constants_t; + +typedef struct mb_bits_info_t +{ + WORD64 i8_header_bits; + + WORD64 i8_texture_bits; +} mb_bits_info_t; + +typedef struct svc_sub_pic_rc_entropy_variables_t +{ + coordinates_t s_mb_pos; + + mb_bits_info_t s_mb_bits; + + UWORD8 u1_spatial_layer_id; +} svc_sub_pic_rc_entropy_variables_t; + +typedef struct svc_sub_pic_rc_layer_variables_t +{ + WORD32 i4_max_num_reference_frames; + + WORD32 i4_slice_type; + + WORD32 i4_frame_num; + + UWORD8 u1_frame_qp; + + UWORD8 u1_min_qp; + + UWORD8 u1_max_qp; + + UWORD8 u1_spatial_layer_id; +} svc_sub_pic_rc_layer_variables_t; + +typedef struct svc_sub_pic_rc_mb_variables_t +{ + buffer_container_t as_quant_coeffs[NUM_SP_COMPONENTS]; + + isvce_enc_pu_mv_t *aps_mvps[MAX_MVP_IDX + 1]; + + coordinates_t s_mb_pos; + + isvce_mb_info_t *ps_mb_info; + + UWORD8 *apu1_nnzs[NUM_SP_COMPONENTS]; + + UWORD32 u4_cbp; +} svc_sub_pic_rc_mb_variables_t; + +typedef struct svc_sub_pic_rc_variables_t +{ + svc_sub_pic_rc_layer_variables_t s_layer_variables; + + svc_sub_pic_rc_mb_variables_t s_mb_variables; + +} svc_sub_pic_rc_variables_t; + +typedef struct svc_sub_pic_rc_ctxt_t +{ + svc_sub_pic_rc_constants_t s_sub_pic_rc_constants; + + svc_sub_pic_rc_variables_t s_sub_pic_rc_variables; + + svc_sub_pic_rc_entropy_variables_t s_sub_pic_rc_entropy_variables; +} svc_sub_pic_rc_ctxt_t; + +/* Function declarations */ +extern UWORD32 isvce_get_sub_pic_rc_ctxt_size(UWORD8 u1_num_spatial_layers, + DOUBLE d_spatial_res_ratio, UWORD32 u4_wd, + UWORD32 u4_ht); + +extern void isvce_sub_pic_rc_ctxt_init(isvce_codec_t *ps_codec, iv_mem_rec_t *ps_mem_rec); + +extern void isvce_sub_pic_rc_ctxt_layer_init(svc_sub_pic_rc_ctxt_t *ps_sub_pic_rc_ctxt); + +extern void isvce_sub_pic_rc_ctxt_delete(svc_sub_pic_rc_ctxt_t *ps_sub_pic_rc_ctxt); + +extern void isvce_sub_pic_rc_ctxt_update(svc_sub_pic_rc_ctxt_t *ps_sub_pic_rc_ctxt); + +extern UWORD8 isvce_sub_pic_rc_get_mb_qp(svc_sub_pic_rc_ctxt_t *ps_sub_pic_rc_ctxt, + UWORD8 u1_cur_mb_qp); + +extern void isvce_sub_pic_rc_get_entropy_data(svc_sub_pic_rc_ctxt_t *ps_sub_pic_rc_ctxt); + +extern void isvce_sub_pic_rc_dump_data(svc_sub_pic_rc_ctxt_t *ps_sub_pic_rc_ctxt); + +#endif diff --git a/encoder/svc/isvce_sub_pic_rc_private_defs.h b/encoder/svc/isvce_sub_pic_rc_private_defs.h new file mode 100644 index 0000000..14f77ba --- /dev/null +++ b/encoder/svc/isvce_sub_pic_rc_private_defs.h @@ -0,0 +1,256 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** +******************************************************************************* +* @file +* isvce_sub_pic_rc_private_defs.h +* +* @brief +* Contains typdefs and externs used exclusively by sub-pic RC functions +* +******************************************************************************* +*/ + +#ifndef _ISVCE_SUB_PIC_RC_PRIVATE_DEFS_H_ +#define _ISVCE_SUB_PIC_RC_PRIVATE_DEFS_H_ + +#include + +#include "ih264_typedefs.h" +#include "ive2.h" +#include "isvc_defs.h" +#include "ithread.h" +#include "isvce_structs.h" +#include "isvce_sub_pic_rc.h" + +/* Macros */ +#define DUMP_SUB_PIC_RC_DATA 0 + +#if DUMP_SUB_PIC_RC_DATA +#ifdef WINDOWS +#define SUB_PIC_RC_DUMP_FILE_PATH "D:\\H264\\" +#else +#define SUB_PIC_RC_DUMP_FILE_PATH "/mnt/d/H264/" +#endif + +#define MAX_SUB_PIC_RC_DUMP_FILE_PATH_LENGTH 100 +#endif + +#define MAX_UEV_CODEWORD (1 << 6) + +#define SVC_QSCALE_Q_FACTOR 3 + +/* (2 ^ ((MAX_H264_QP - 4) / 6)) * (2 ^ (SVC_QSCALE_Q_FACTOR)) */ +#define MAX_SVC_QSCALE 1824 + +#define BIT_RATIO_FOR_OVERCONSUMPTION 1.2 + +#define BIT_RATIO_FOR_UNDERCONSUMPTION 0.7 + +#define MIN_SAMPLED_MB_RATIO 0.05 + +#define MAX_MB_QP_DECREMENT 1 + +#define MAX_MB_QP_INCREMENT 1 + +#define MAX_FRAME_QP_DECREMENT 12 + +#define MAX_FRAME_QP_INCREMENT 13 + +#define ENABLE_IN_FRAME_RC 1 + +/* Globals */ +static const UWORD8 gau1_uev_codeword_to_bits_map[MAX_UEV_CODEWORD + 1] = { + 1, 3, 3, 5, 5, 5, 5, 7, 7, 7, 7, 7, 7, 7, 7, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 13, 13, +}; + +static const UWORD32 gau4_tu_zscan_id_to_rasterscan_id_map[MIN_TU_IN_MB][NUM_4x4_IN_8x8] = { + { + 0, + 1, + 4, + 5, + }, + { + 2, + 3, + 6, + 7, + }, + { + 8, + 9, + 12, + 13, + }, + { + 10, + 11, + 14, + 15, + }, +}; + +static const UWORD32 gau4_qp_to_qscale_map[MAX_H264_QP + 1] = { + 5, 5, 6, 7, 8, 8, 10, 11, 12, 14, 16, 17, 20, 22, 25, 28, 32, 35, + 40, 45, 50, 57, 64, 71, 80, 90, 101, 114, 128, 143, 161, 181, 203, 228, 256, 287, + 322, 362, 406, 456, 512, 574, 645, 724, 812, 912, 1024, 1149, 1290, 1448, 1625, 1824, +}; + +static const UWORD8 gau1_qscale_to_qp_map[MAX_SVC_QSCALE + 1] = { + 0, 0, 0, 0, 0, 0, 2, 3, 4, 5, 6, 7, 8, 8, 9, 9, 10, 11, 11, 11, 12, 12, 13, 13, + 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 17, 17, 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19, 19, + 20, 20, 20, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, + 27, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, + 29, 29, 29, 29, 29, 29, 29, 29, 29, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, + 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, + 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, + 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, + 33, 33, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, + 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, + 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, + 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 41, 41, 41, 41, 41, 41, 41, 41, 41, + 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, + 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, + 41, 41, 41, 41, 41, 41, 41, 41, 41, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, + 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, + 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, + 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, + 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, + 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, + 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, + 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, + 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, + 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, + 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, + 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, + 46, 46, 46, 46, 46, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, + 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, + 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, + 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, + 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, + 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 49, + 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, + 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, + 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, + 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, + 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, + 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, + 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 50, + 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 51, 51, 51, 51, 51, + 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, + 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, + 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, + 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, + 51, +}; + +/* Structs */ +typedef struct sub_pic_rc_qp_params_t +{ + const UWORD32 *pu4_qp_to_qscale_map; + + const UWORD8 *pu1_qscale_to_qp_map; + + UWORD8 u1_min_qp; + + UWORD8 u1_max_qp; + +} sub_pic_rc_qp_params_t; + +typedef struct sub_pic_rc_layer_state_t +{ + void *pv_layer_rc_ctxt; + + sub_pic_rc_qp_params_t s_qp_params; + + /* Array of size NumMB's */ + mb_bits_info_t *ps_mb_bits_info; + + mb_bits_info_t s_cumulative_mb_bits; + + UWORD32 u4_allocated_bits; + + UWORD32 u4_num_mbs_sampled; + + WORD32 i4_wd; + + WORD32 i4_ht; + + WORD32 i4_num_mbs; + +#if DUMP_SUB_PIC_RC_DATA + FILE *ps_data_dump_file; + + mb_bits_info_t *ps_mb_bits_actual; +#endif +} sub_pic_rc_layer_state_t; + +typedef struct sub_pic_rc_state_t +{ + sub_pic_rc_layer_state_t as_sub_pic_rc_layer_states[MAX_NUM_SPATIAL_LAYERS]; + + svc_params_t s_svc_params; + + void *pv_bits_accumulator_mutex; + + const UWORD8 *pu1_sev_codeword_to_bits_map; + + const UWORD8 *pu1_uev_codeword_to_bits_map; + + IVE_RC_MODE_T e_rc_mode; +} sub_pic_rc_state_t; + +#endif diff --git a/encoder/svc/isvce_utils.c b/encoder/svc/isvce_utils.c new file mode 100644 index 0000000..eb40181 --- /dev/null +++ b/encoder/svc/isvce_utils.c @@ -0,0 +1,4542 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** +******************************************************************************* +* @file +* ih264e_svc_utils.c +* +* @brief +* Contains utility functions used for SVC encoding +* +* @author +* ittiam +* +* @par List of Functions: +* - ih264e_svc_ref_list_refresh() +* - ih264e_svc_inp_params_validate() +* +* @remarks +* None +* +******************************************************************************* +*/ +#include +#include + +#include "ih264_typedefs.h" + +/* Dependencies of ih264_buf_mgr.h */ +/* Dependencies of ih264_list.h */ +#include "ih264_error.h" + +#include "ih264_buf_mgr.h" +#include "ih264_list.h" +#include "ih264_trans_data.h" +#include "ih264_size_defs.h" + +/* Dependencies of ih264_common_tables.h */ +#include "ih264_defs.h" +#include "ih264_structs.h" + +#include "ih264_common_tables.h" + +/* Dependencies of ih264e_bitstream.h */ +#include "ih264e_error.h" + +/* Dependencies of ih264e_cabac_structs.h */ +#include "ih264_cabac_tables.h" + +/* Dependencies of ime_structs.h */ +#include "ime_defs.h" +#include "ime_distortion_metrics.h" + +/* Dependencies of ih264e_structs.h */ +#include "iv2.h" +#include "ive2.h" +#include "ih264_defs.h" +#include "ih264_deblk_edge_filters.h" +#include "ih264_inter_pred_filters.h" +#include "ih264_structs.h" +#include "ih264_trans_quant_itrans_iquant.h" +#include "ih264e_bitstream.h" +#include "ih264e_cabac_structs.h" +#include "ime_statistics.h" +#include "ime_structs.h" +/* Dependencies of 'irc_picture_type.h' */ +#include "irc_cntrl_param.h" +#include "irc_frame_info_collector.h" +#include "irc_mem_req_and_acq.h" +/* Dependencies of 'irc_rate_control_api_structs' */ +#include "irc_picture_type.h" +#include "irc_rd_model.h" +#include "irc_vbr_storage_vbv.h" +#include "irc_est_sad.h" +#include "irc_bit_allocation.h" +#include "irc_mb_model_based.h" +#include "irc_cbr_buffer_control.h" +#include "irc_vbr_str_prms.h" +#include "irc_common.h" +#include "irc_rate_control_api_structs.h" +#include "irc_rate_control_api.h" +#include "irc_svc_rate_control_api.h" +/* Dependencies of 'ih264e_utils.h' */ +#include "ih264e_defs.h" +#include "ih264e_structs.h" +/* Dependencies of 'ih264e_utils.h' */ +#include "ih264e_rc_mem_interface.h" +#include "ih264e_time_stamp.h" +#include "ih264e_utils.h" +#include "ime.h" +#include "isvc_macros.h" +#include "isvce_cabac.h" +#include "isvce_core_coding.h" +#include "isvce_defs.h" +#include "isvce_error.h" +#include "isvce_me.h" +#include "isvce_utils.h" +#include "isvce_downscaler.h" +#include "isvce_encode_header.h" +#include "isvce_rate_control.h" +#include "isvce_sub_pic_rc.h" + +static const UWORD32 gu4_downscaler_blk_size = 96; + +static FORCEINLINE UWORD32 isvce_get_downscaler_blk_dims(UWORD32 u4_frame_dim, UWORD32 u4_blk_pos, + UWORD32 u4_default_blk_size) +{ + return ((u4_frame_dim - u4_blk_pos * u4_default_blk_size) < u4_default_blk_size) + ? (u4_frame_dim - u4_blk_pos * u4_default_blk_size) + : u4_default_blk_size; +} + +/** +******************************************************************************* +* +* @brief +* Reference and MV bank Buffer Manager for SVC +* +* @par Description: +* Here we will +* 1) Find the correct ref pics for the current frame +* 2) Free the ref pics that are not going to be used anymore +* +* 1) Finding correct ref pic +* All pics needed for future are arranged in a picture list called +* ps_codec->as_ref_set. Each picture in this will have a pic buffer and +* MV buffer that is marked appropriately as BUF_MGR_REF, BUF_MGR_IO or +* BUF_MGR_CODEC. pic_cnt, poc, and temporal_id will also be present. +* The strategy is to pick the closest references that belongs to the +* same temporal_id or lesser. The closeness is measured via the +* smallest absolute difference between ref and cur pocs. +* +* Note that i4_pic_cnt == -1 is used to filter uninitialised ref pics. +* Now since we only have max two ref pics, we will always find max 2 +* ref pics. +* +* 2) Self explanatory +* +* @param[in] ps_codec +* Pointer to codeec context +* +* @param[in] pps_ref_pics +* Array of pointers to refPicBufs +* +* @param[in] pps_mv_bufs +* Array of pointers to refMVBufs +* +* @param[in] e_pic_type +* Picture type +* +* @returns Nothing +* +******************************************************************************* +*/ +static WORD32 isvce_ref_list_refresh(isvce_codec_t *ps_codec, svc_au_buf_t **pps_ref_pics, + svc_au_data_t **pps_mv_bufs, WORD32 *pi4_ref_set_id, + PIC_TYPE_T e_pic_type) +{ + typedef struct + { + WORD32 i4_buf_id; + + WORD32 i4_abs_poc_diff; + + WORD8 i1_temporal_id; + } ref_pic_props_t; + + ref_pic_props_t s_ref_pic_props = {0, 0, -1}; + + WORD32 i, buf_status; + + WORD32 i4_cur_pic_poc = ps_codec->i4_poc; + WORD32 i4_cur_pic_temporal_id = isvce_svc_temporal_id_compute( + ps_codec->i4_poc, ps_codec->s_cfg.s_svc_params.u1_num_temporal_layers, e_pic_type); + + if(e_pic_type == PIC_B) + { + return IH264E_FAIL; + } + + ASSERT(1 == MAX_LAYER_REFERENCE_PICS); + + /* Pick a ref_pic for the current picture */ + if(e_pic_type != PIC_IDR) + { + for(i = 0; i < ps_codec->i4_ref_buf_cnt; i++) + { + WORD32 i4_abs_poc_diff; + WORD8 i1_temporal_id; + + if(ps_codec->as_ref_set[i].i4_pic_cnt == -1) + { + continue; + } + + buf_status = ih264_buf_mgr_get_status(ps_codec->pv_ref_buf_mgr, + ps_codec->as_ref_set[i].ps_pic_buf->i4_buf_id); + + if(buf_status & BUF_MGR_REF) + { + i4_abs_poc_diff = ABS(ps_codec->as_ref_set[i].i4_poc - i4_cur_pic_poc); + i1_temporal_id = ps_codec->as_ref_set[i].ps_pic_buf->i1_temporal_id; + + if(s_ref_pic_props.i1_temporal_id > -1) + { + if((i1_temporal_id <= i4_cur_pic_temporal_id) && + (s_ref_pic_props.i4_abs_poc_diff > i4_abs_poc_diff)) + { + s_ref_pic_props.i4_abs_poc_diff = i4_abs_poc_diff; + s_ref_pic_props.i1_temporal_id = i1_temporal_id; + s_ref_pic_props.i4_buf_id = i; + } + } + else if(i1_temporal_id <= i4_cur_pic_temporal_id) + { + s_ref_pic_props.i4_abs_poc_diff = i4_abs_poc_diff; + s_ref_pic_props.i1_temporal_id = i1_temporal_id; + s_ref_pic_props.i4_buf_id = i; + } + } + } + + if(s_ref_pic_props.i1_temporal_id < 0) + { + return IH264E_FAIL; + } + + pps_ref_pics[0] = pps_ref_pics[1] = + ps_codec->as_ref_set[s_ref_pic_props.i4_buf_id].ps_pic_buf; + pps_mv_bufs[0] = pps_mv_bufs[1] = + ps_codec->as_ref_set[s_ref_pic_props.i4_buf_id].ps_svc_au_data; + + /* Pick all ref pic_bufs to be freed. */ + for(i = 0; i < ps_codec->i4_ref_buf_cnt; i++) + { + if(ps_codec->as_ref_set[i].i4_pic_cnt == -1) + { + continue; + } + + buf_status = ih264_buf_mgr_get_status(ps_codec->pv_ref_buf_mgr, + ps_codec->as_ref_set[i].ps_pic_buf->i4_buf_id); + + if((buf_status & (BUF_MGR_REF | BUF_MGR_CODEC | BUF_MGR_IO)) == 0) + { + ps_codec->as_ref_set[i].i4_pic_cnt = -1; + ps_codec->as_ref_set[i].i4_poc = 32768; + + continue; + } + + if(buf_status & BUF_MGR_REF) + { + if((i4_cur_pic_temporal_id <= ps_codec->as_ref_set[i].ps_pic_buf->i1_temporal_id) && + (pps_ref_pics[0]->i4_frame_num != + ps_codec->as_ref_set[i].ps_pic_buf->i4_frame_num) && + (pps_ref_pics[0]->i4_frame_num != + ps_codec->as_ref_set[i].ps_pic_buf->i4_frame_num)) + { + ih264_buf_mgr_release(ps_codec->pv_svc_au_data_store_mgr, + ps_codec->as_ref_set[i].ps_pic_buf->i4_buf_id, + BUF_MGR_REF); + + ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr, + ps_codec->as_ref_set[i].ps_pic_buf->i4_buf_id, + BUF_MGR_REF); + } + } + } + } + else + { + for(i = 0; i < ps_codec->i4_ref_buf_cnt; i++) + { + if(ps_codec->as_ref_set[i].i4_pic_cnt == -1) + { + continue; + } + + buf_status = ih264_buf_mgr_get_status(ps_codec->pv_ref_buf_mgr, + ps_codec->as_ref_set[i].ps_pic_buf->i4_buf_id); + + if((buf_status & (BUF_MGR_REF | BUF_MGR_CODEC | BUF_MGR_IO)) == 0) + { + ps_codec->as_ref_set[i].i4_pic_cnt = -1; + ps_codec->as_ref_set[i].i4_poc = 32768; + + continue; + } + + if(buf_status & BUF_MGR_REF) + { + ih264_buf_mgr_release(ps_codec->pv_svc_au_data_store_mgr, + ps_codec->as_ref_set[i].ps_pic_buf->i4_buf_id, BUF_MGR_REF); + + ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr, + ps_codec->as_ref_set[i].ps_pic_buf->i4_buf_id, BUF_MGR_REF); + } + } + } + + /* + * Mark all reference pic with unused buffers to be free + * We need this step since each one, ie ref, recon io etc only unset their + * respective flags. Hence we need to combine togather and mark the ref set + * accordingly + */ + pi4_ref_set_id[0] = -1; + + for(i = 0; i < ps_codec->i4_ref_buf_cnt; i++) + { + if(ps_codec->as_ref_set[i].i4_pic_cnt == -1) + { + pi4_ref_set_id[0] = i; + continue; + } + + buf_status = ih264_buf_mgr_get_status(ps_codec->pv_ref_buf_mgr, + ps_codec->as_ref_set[i].ps_pic_buf->i4_buf_id); + + if((buf_status & (BUF_MGR_REF | BUF_MGR_CODEC | BUF_MGR_IO)) == 0) + { + ps_codec->as_ref_set[i].i4_pic_cnt = -1; + ps_codec->as_ref_set[i].i4_poc = 32768; + + pi4_ref_set_id[0] = i; + } + } + + /* An asssert failure here means we donot have any free buffs */ + if(pi4_ref_set_id[0] < 0) + { + return IH264E_FAIL; + } + + return IH264E_SUCCESS; +} + +/** +******************************************************************************* +* +* @brief +* Validates SVC AU properties +* +* @param[in] ps_cfg +* Cfg parameters +* +* @returns error code in conformance with 'IH264E_ERROR_T' +* +******************************************************************************* +*/ +WORD32 isvce_svc_au_props_validate(svc_inp_params_t *ps_svc_inp_params, UWORD32 u4_inp_wd, + UWORD32 u4_inp_ht, UWORD32 u4_svc_comp_wd, + UWORD32 u4_svc_comp_ht) +{ + typedef struct + { + DOUBLE d_spatial_res_ratio; + + UWORD8 u1_max_num_spatial_layers; + } spatial_layer_props_t; + + UWORD8 i; + UWORD32 au4_svc_wd[MAX_NUM_SPATIAL_LAYERS]; + UWORD32 au4_svc_ht[MAX_NUM_SPATIAL_LAYERS]; + + DOUBLE d_scaling_factor = ps_svc_inp_params->d_spatial_res_ratio; + UWORD8 u1_num_spatial_layers = ps_svc_inp_params->u1_num_spatial_layers; + const spatial_layer_props_t gas_valid_spatial_layer_props[] = {{1.5, 2}, {2, 3}}; + UWORD32 u4_error_code = IV_SUCCESS; + const UWORD8 u1_min_num_temporal_layers = 1; + const UWORD8 u1_min_num_spatial_layers = 1; + const UWORD8 u1_max_num_temporal_layers = MAX_NUM_TEMPORAL_LAYERS; + const UWORD8 u1_max_num_spatial_layers = MAX_NUM_SPATIAL_LAYERS; + const UWORD8 u1_num_valid_spatial_layer_props = + sizeof(gas_valid_spatial_layer_props) / sizeof(gas_valid_spatial_layer_props[0]); + + if((ps_svc_inp_params->u1_num_temporal_layers < u1_min_num_temporal_layers) || + (ps_svc_inp_params->u1_num_temporal_layers > u1_max_num_temporal_layers)) + { + u4_error_code |= IH264E_INVALID_SVC_PARAMS | IH264E_INVALID_NUM_TEMPORAL_LAYERS; + } + + if((ps_svc_inp_params->u1_num_spatial_layers < u1_min_num_spatial_layers) || + (ps_svc_inp_params->u1_num_spatial_layers > u1_max_num_spatial_layers)) + { + u4_error_code |= IH264E_INVALID_SVC_PARAMS | IH264E_INVALID_NUM_SPATIAL_LAYERS; + } + + { + UWORD8 u1_is_input_ratio_valid = 0; + + for(i = 0; i < u1_num_valid_spatial_layer_props; i++) + { + if(ps_svc_inp_params->d_spatial_res_ratio == + gas_valid_spatial_layer_props[i].d_spatial_res_ratio) + { + u1_is_input_ratio_valid = 1; + + if(ps_svc_inp_params->u1_num_spatial_layers > + gas_valid_spatial_layer_props[i].u1_max_num_spatial_layers) + { + u4_error_code |= IH264E_INVALID_SVC_PARAMS | IH264E_INVALID_NUM_SPATIAL_LAYERS; + } + + break; + } + } + + if(!u1_is_input_ratio_valid) + { + u4_error_code |= IH264E_INVALID_SVC_PARAMS | IH264E_INVALID_SPATIAL_RES_RATIO; + } + } + + if((u4_svc_comp_wd > SVCE_MAX_INP_DIM) || (u4_svc_comp_ht > SVCE_MAX_INP_DIM) || + ((u4_svc_comp_wd * u4_svc_comp_ht) > SVCE_MAX_INP_FRAME_SIZE) || + (u4_svc_comp_wd % 16 != 0) || (u4_svc_comp_ht % 16 != 0)) + { + u4_error_code |= IH264E_INVALID_SVC_INPUT_DIMENSIONS; + } + + /* Constraint from padding intrinsics */ + if((u4_svc_comp_wd - u4_inp_wd) % 16) + { + u4_error_code |= IH264E_INVALID_SVC_INPUT_DIMENSIONS; + } + + /* Constraint from 420p to 420sp conversion */ + if((u4_svc_comp_ht - u4_inp_ht) % 4) + { + u4_error_code |= IH264E_INVALID_SVC_INPUT_DIMENSIONS; + } + + au4_svc_wd[u1_num_spatial_layers - 1] = u4_svc_comp_wd; + au4_svc_ht[u1_num_spatial_layers - 1] = u4_svc_comp_ht; + + for(i = (u1_num_spatial_layers - 1); i > 0; i--) + { + au4_svc_wd[i - 1] = au4_svc_wd[i] / d_scaling_factor; + au4_svc_ht[i - 1] = au4_svc_ht[i] / d_scaling_factor; + + if((au4_svc_wd[i - 1] * d_scaling_factor != au4_svc_wd[i]) || + (au4_svc_ht[i - 1] * d_scaling_factor != au4_svc_ht[i]) || + (au4_svc_ht[i - 1] % 16 != 0) || (au4_svc_ht[i - 1] % 16 != 0)) + { + u4_error_code |= IH264E_INVALID_SVC_INPUT_DIMENSIONS; + } + } + + return u4_error_code; +} + +/** +******************************************************************************* +* +* @brief +* Validates SVC input params +* +* @param[in] ps_cfg +* Cfg parameters +* +* @returns error code in conformance with 'IH264E_ERROR_T' +* +******************************************************************************* +*/ +WORD32 isvce_svc_inp_params_validate(isvce_init_ip_t *ps_ip, isvce_cfg_params_t *ps_cfg) +{ + UWORD32 u4_error_code = isvce_svc_au_props_validate(&ps_ip->s_svc_inp_params, ps_ip->u4_wd, + ps_ip->u4_ht, ps_cfg->u4_wd, ps_cfg->u4_ht); + + if(ps_cfg->u4_enable_alt_ref) + { + u4_error_code |= IH264E_INVALID_ALT_REF_OPTION; + } + + if(ps_cfg->u4_num_bframes) + { + u4_error_code |= IH264E_BFRAMES_NOT_SUPPORTED; + } + + if(ps_cfg->e_slice_mode != IVE_SLICE_MODE_NONE) + { + u4_error_code |= IH264E_SLICE_TYPE_INPUT_INVALID; + } + + if(ps_cfg->e_content_type != IV_PROGRESSIVE) + { + u4_error_code |= IH264E_CONTENT_TYPE_NOT_SUPPORTED; + } + + if(ps_cfg->u4_weighted_prediction) + { + u4_error_code |= IH264E_WEIGHTED_PRED_NOT_SUPPORTED; + } + + return u4_error_code; +} + +/** +******************************************************************************* +* +* @brief +* Used to get reference picture buffer size for a given level and +* and padding used +* +* @param[in] ps_svc_inp_params +* Struct containing SVC specific input params +* +* @param[in] i4_pic_size +* Number of luma samples (Width * Height) +* +* @param[in] i4_level +* Level +* +* @param[in] i4_horz_pad +* Total padding used in horizontal direction +* +* @param[in] i4_vert_pad +* Total padding used in vertical direction +* +* @param[in] i4_num_ref_frames +* Num Reference Frames +* +* @param[in] i4_num_reorder_frames +* Num Reorder Frames +* +* @returns Total picture buffer size +* +******************************************************************************* +*/ +WORD32 isvce_get_total_svc_au_buf_size(svc_inp_params_t *ps_svc_inp_params, WORD32 i4_pic_size, + WORD32 i4_level, WORD32 i4_horz_pad, WORD32 i4_vert_pad, + WORD32 i4_num_ref_frames, WORD32 i4_num_reorder_frames) +{ + WORD32 i; + WORD32 size; + WORD32 num_luma_samples; + WORD32 lvl_idx; + WORD32 max_wd, min_ht; + WORD32 num_samples; + WORD32 max_num_bufs; + + WORD32 pad = MAX(i4_horz_pad, i4_vert_pad); + DOUBLE d_svc_size_multiplier = 1; + + for(i = 1; i < ps_svc_inp_params->u1_num_spatial_layers; i++) + { + d_svc_size_multiplier += 1. / pow(ps_svc_inp_params->d_spatial_res_ratio, i); + } + + /* + * If i4_num_ref_frames and num_reorder_frmaes is specified + * Use minimum value + */ + max_num_bufs = (i4_num_ref_frames + i4_num_reorder_frames + MAX_CTXT_SETS + + ps_svc_inp_params->u1_num_temporal_layers); + + /* Get i4_level index */ + lvl_idx = ih264e_get_lvl_idx(i4_level); + + /* Maximum number of luma samples in a picture at given i4_level */ + num_luma_samples = gai4_ih264_max_luma_pic_size[lvl_idx]; + num_luma_samples = MAX(num_luma_samples, i4_pic_size); + + /* Account for chroma */ + num_samples = num_luma_samples * 3 / 2; + + /* Maximum width of luma samples in a picture at given i4_level */ + max_wd = gai4_ih264_max_wd_ht[lvl_idx]; + + /* Minimum height of luma samples in a picture at given i4_level */ + min_ht = gai4_ih264_min_wd_ht[lvl_idx]; + + /* Allocation is required for + * (Wd + i4_horz_pad) * (Ht + i4_vert_pad) * (2 * max_dpb_size + 1) + * + * Above expanded as + * ((Wd * Ht) + (i4_horz_pad * i4_vert_pad) + Wd * i4_vert_pad + Ht * + * i4_horz_pad) * (2 * max_dpb_size + 1) (Wd * Ht) * (2 * max_dpb_size + 1) + + * ((i4_horz_pad * i4_vert_pad) + Wd * i4_vert_pad + Ht * i4_horz_pad) * (2 * + * max_dpb_size + 1) Now max_dpb_size increases with smaller Wd and Ht, but Wd + * * ht * max_dpb_size will still be lesser or equal to max_wd * max_ht * + * dpb_size + * + * In the above equation (Wd * Ht) * (2 * max_dpb_size + 1) is accounted by + * using num_samples * (2 * max_dpb_size + 1) below + * + * For the padded area use MAX(i4_horz_pad, i4_vert_pad) as pad + * ((pad * pad) + pad * (Wd + Ht)) * (2 * max_dpb_size + 1) has to accounted + * from the above for padding + * + * Since Width and Height can change worst Wd + Ht is when One of the + * dimensions is max and other is min So use max_wd and min_ht + */ + + /* Number of bytes in reference pictures */ + size = num_samples * max_num_bufs; + + /* Account for Spatial Layers */ + size = (WORD32) (size * d_svc_size_multiplier + 0.99); + + /* Account for padding area */ + size += ((pad * pad) + pad * (max_wd + min_ht)) * 3 / 2 * max_num_bufs * + ps_svc_inp_params->u1_num_spatial_layers; + + size += ps_svc_inp_params->u1_num_spatial_layers * sizeof(yuv_buf_props_t); + + return size; +} + +/** +******************************************************************************* +* +* @brief +* Used to get size of buffers used for storing prediction data +* +* @param[in] ps_svc_inp_params +* Struct containing SVC specific input params +* +* @param[in] i4_num_luma_samples +* Number of luma samples (Width * Height) +* +* @returns Size of buffers used for storing prediction data +* +******************************************************************************* +*/ +UWORD32 isvce_get_total_svc_au_data_size(WORD32 i4_num_luma_samples, UWORD8 u1_num_spatial_layers, + DOUBLE d_spatial_res_ratio) +{ + WORD32 i; + + UWORD32 u4_svc_au_data_size = 0; + + u4_svc_au_data_size += u1_num_spatial_layers * sizeof(svc_layer_data_t); + + for(i = 0; i < u1_num_spatial_layers; i++) + { + WORD32 i4_layer_luma_samples = + ((DOUBLE) i4_num_luma_samples) / pow(pow(d_spatial_res_ratio, i), 2) + 0.99; + WORD32 i4_num_mbs = i4_layer_luma_samples / (MB_SIZE * MB_SIZE); + + /* isvce_mb_info_t */ + u4_svc_au_data_size += i4_num_mbs * sizeof(isvce_mb_info_t); + + /* pu4_num_pus_in_mb */ + u4_svc_au_data_size += i4_num_mbs * sizeof(UWORD32); + } + + return u4_svc_au_data_size; +} + +/** +******************************************************************************* +* +* @brief Function to add buffers to SVC AU Data Store Manager +* +* @param[in] ps_codec +* Pointer to codec context +* +* @returns error status +* +******************************************************************************* +*/ +IH264E_ERROR_T isvce_svc_au_data_mgr_add_bufs(isvce_codec_t *ps_codec) +{ + IH264_ERROR_T ret; + + WORD32 i, j; + UWORD8 *pu1_buf; + + svc_au_data_t *ps_svc_au_data = ps_codec->ps_svc_au_data_base; + + WORD32 i4_max_dpb_size = ps_codec->i4_ref_buf_cnt; + WORD64 i8_alloc_mem_size = ps_codec->i4_svc_au_data_size; + WORD32 i4_num_luma_samples = ALIGN16(ps_codec->s_cfg.u4_wd) * ALIGN16(ps_codec->s_cfg.u4_ht); + UWORD8 u1_num_spatial_layers = ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers; + DOUBLE d_spatial_res_ratio = ps_codec->s_cfg.s_svc_params.d_spatial_res_ratio; + + ps_codec->ps_svc_au_data = ps_svc_au_data; + pu1_buf = (UWORD8 *) ps_svc_au_data; + pu1_buf += BUF_MGR_MAX_CNT * sizeof(ps_svc_au_data[0]); + + i8_alloc_mem_size -= (BUF_MGR_MAX_CNT * sizeof(ps_svc_au_data[0])); + + i = 0; + + while(i < i4_max_dpb_size) + { + ps_svc_au_data->ps_svc_layer_data = (svc_layer_data_t *) pu1_buf; + pu1_buf += u1_num_spatial_layers * sizeof(ps_svc_au_data->ps_svc_layer_data[0]); + i8_alloc_mem_size -= u1_num_spatial_layers * sizeof(ps_svc_au_data->ps_svc_layer_data[0]); + + for(j = u1_num_spatial_layers - 1; j >= 0; j--) + { + WORD32 i4_layer_luma_samples = + ((DOUBLE) i4_num_luma_samples) / + pow(pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - j), 2) + + 0.99; + WORD32 i4_num_mbs = i4_layer_luma_samples / (MB_SIZE * MB_SIZE); + + ps_svc_au_data->ps_svc_layer_data[j].pu4_num_pus_in_mb = (UWORD32 *) pu1_buf; + pu1_buf += + i4_num_mbs * sizeof(ps_svc_au_data->ps_svc_layer_data[j].pu4_num_pus_in_mb[0]); + i8_alloc_mem_size -= + i4_num_mbs * sizeof(ps_svc_au_data->ps_svc_layer_data[j].pu4_num_pus_in_mb[0]); + + ps_svc_au_data->ps_svc_layer_data[j].ps_mb_info = (isvce_mb_info_t *) pu1_buf; + pu1_buf += i4_num_mbs * sizeof(ps_svc_au_data->ps_svc_layer_data[j].ps_mb_info[0]); + i8_alloc_mem_size -= + i4_num_mbs * sizeof(ps_svc_au_data->ps_svc_layer_data[j].ps_mb_info[0]); + + ASSERT(i8_alloc_mem_size >= 0); + } + + if(i8_alloc_mem_size < 0) + { + ps_codec->i4_error_code = IH264E_INSUFFICIENT_MEM_MVBANK; + + return IH264E_INSUFFICIENT_MEM_MVBANK; + } + + ret = + ih264_buf_mgr_add((buf_mgr_t *) ps_codec->pv_svc_au_data_store_mgr, ps_svc_au_data, i); + + if(IH264_SUCCESS != ret) + { + ps_codec->i4_error_code = IH264E_BUF_MGR_ERROR; + + return IH264E_BUF_MGR_ERROR; + } + + ps_svc_au_data++; + i++; + } + + return IH264E_SUCCESS; +} + +/** +******************************************************************************* +* +* @brief +* Function to initialize svc_au_buf_t structs add au buffers to +* buffer manager in case of non-shared mode +* +* @param[in] ps_codec +* Pointer to codec context +* +* @returns error status +* +******************************************************************************* +*/ +IH264E_ERROR_T isvce_svc_au_buf_mgr_add_bufs(isvce_codec_t *ps_codec) +{ + WORD32 i, j; + WORD32 buf_ret; + + svc_au_buf_t *ps_pic_buf = ps_codec->ps_pic_buf; + + IH264E_ERROR_T ret = IH264E_SUCCESS; + + WORD32 i4_max_dpb_size = ps_codec->i4_ref_buf_cnt; + WORD64 i8_alloc_mem_size = + ps_codec->i4_total_pic_buf_size - BUF_MGR_MAX_CNT * sizeof(ps_pic_buf[0]); + UWORD8 *pu1_buf = (UWORD8 *) ps_codec->ps_pic_buf; + UWORD8 u1_num_spatial_layers = ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers; + DOUBLE d_spatial_res_ratio = ps_codec->s_cfg.s_svc_params.d_spatial_res_ratio; + + pu1_buf += BUF_MGR_MAX_CNT * sizeof(svc_au_buf_t); + + for(i = 0; i < i4_max_dpb_size; i++) + { + WORD32 i4_total_fpel_mem_size = 0; + + ps_pic_buf->ps_layer_yuv_buf_props = (yuv_buf_props_t *) pu1_buf; + pu1_buf += u1_num_spatial_layers * sizeof(ps_pic_buf->ps_layer_yuv_buf_props[0]); + i8_alloc_mem_size -= u1_num_spatial_layers * sizeof(ps_pic_buf->ps_layer_yuv_buf_props[0]); + + if(i8_alloc_mem_size < 0) + { + ps_codec->i4_error_code = IH264E_INSUFFICIENT_MEM_PICBUF; + return IH264E_INSUFFICIENT_MEM_PICBUF; + } + + for(j = u1_num_spatial_layers - 1; j >= 0; j--) + { + WORD32 i4_layer_luma_wd = ((DOUBLE) ps_codec->s_cfg.u4_wd / + pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - j)) + + 0.99; + WORD32 i4_layer_luma_ht = ((DOUBLE) ps_codec->s_cfg.u4_ht / + pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - j)) + + 0.99; + WORD32 i4_layer_luma_samples = + (ALIGN16(i4_layer_luma_wd) + PAD_WD) * (i4_layer_luma_ht + PAD_HT); + WORD32 i4_layer_uv_wd = i4_layer_luma_wd; + WORD32 i4_layer_uv_ht = i4_layer_luma_ht / 2.0 + 0.99; + WORD32 i4_layer_uv_samples = + (ALIGN16(i4_layer_uv_wd) + PAD_WD) * (i4_layer_uv_ht + PAD_HT); + + ps_pic_buf->ps_layer_yuv_buf_props[j].as_component_bufs[0].i4_data_stride = + ALIGN16(i4_layer_luma_wd) + PAD_WD; + ps_pic_buf->ps_layer_yuv_buf_props[j].as_component_bufs[0].pv_data = + pu1_buf + + ps_pic_buf->ps_layer_yuv_buf_props[j].as_component_bufs[0].i4_data_stride * + PAD_TOP + + PAD_LEFT; + + pu1_buf += i4_layer_luma_samples; + + ps_pic_buf->ps_layer_yuv_buf_props[j].as_component_bufs[1].i4_data_stride = + ALIGN16(i4_layer_uv_wd) + PAD_WD; + ps_pic_buf->ps_layer_yuv_buf_props[j].as_component_bufs[1].pv_data = + pu1_buf + + ps_pic_buf->ps_layer_yuv_buf_props[j].as_component_bufs[1].i4_data_stride * + (PAD_TOP / 2) + + PAD_LEFT; + + pu1_buf += i4_layer_uv_samples; + + ps_pic_buf->ps_layer_yuv_buf_props[j].u4_width = i4_layer_luma_wd; + ps_pic_buf->ps_layer_yuv_buf_props[j].u4_height = i4_layer_luma_ht; + ps_pic_buf->ps_layer_yuv_buf_props[j].u1_bit_depth = 8; + ps_pic_buf->ps_layer_yuv_buf_props[j].e_color_format = IV_YUV_420SP_UV; + + i8_alloc_mem_size -= i4_layer_luma_samples + i4_layer_uv_samples; + i4_total_fpel_mem_size += i4_layer_luma_samples + i4_layer_uv_samples; + + if(i8_alloc_mem_size < 0) + { + ps_codec->i4_error_code = IH264E_INSUFFICIENT_MEM_PICBUF; + return IH264E_INSUFFICIENT_MEM_PICBUF; + } + } + + buf_ret = ih264_buf_mgr_add((buf_mgr_t *) ps_codec->pv_ref_buf_mgr, ps_pic_buf, i); + + if(0 != buf_ret) + { + ps_codec->i4_error_code = IH264E_BUF_MGR_ERROR; + return IH264E_BUF_MGR_ERROR; + } + + pu1_buf += (HPEL_PLANES_CNT - 1) * i4_total_fpel_mem_size; + ps_pic_buf++; + } + + return ret; +} + +/** +******************************************************************************* +* +* @brief +* Returns size of buffers for storing SVC input data +* +* @param[in] u1_num_spatial_layers +* Num Spatial Layers +* +* @param[in] d_spatial_res_ratio +* Resolution Ratio b/w spatial layers +* +* @param[in] u4_wd +* Input Width +* +* @param[in] u4_ht +* Input Height +* +* @returns Size of buffers +* +******************************************************************************* +*/ +UWORD32 isvce_get_svc_inp_buf_size(UWORD8 u1_num_spatial_layers, DOUBLE d_spatial_res_ratio, + UWORD32 u4_wd, UWORD32 u4_ht) +{ + padding_dims_t s_pad_dims; + + UWORD32 i; + UWORD8 u1_filter_padding_size_x, u1_filter_padding_size_y; + + UWORD32 u4_size = 0; + + isvce_get_downscaler_padding_dims(&s_pad_dims); + + u1_filter_padding_size_x = s_pad_dims.u1_left_pad_size + s_pad_dims.u1_right_pad_size; + + u1_filter_padding_size_y = s_pad_dims.u1_top_pad_size + s_pad_dims.u1_bottom_pad_size; + + for(i = 0; i < u1_num_spatial_layers; i++) + { + WORD32 i4_layer_luma_wd = ((DOUBLE) u4_wd / pow(d_spatial_res_ratio, i)) + 0.99; + WORD32 i4_layer_luma_ht = ((DOUBLE) u4_ht / pow(d_spatial_res_ratio, i)) + 0.99; + WORD32 i4_layer_luma_samples = + (ALIGN16(i4_layer_luma_wd) + PAD_WD + u1_filter_padding_size_x) * + (i4_layer_luma_ht + PAD_HT + u1_filter_padding_size_y); + WORD32 i4_layer_uv_wd = i4_layer_luma_wd; + WORD32 i4_layer_uv_ht = i4_layer_luma_ht / 2.0 + 0.99; + /* u1_filter_padding_size_x * 2 because U and V + both need same amount of padding */ + WORD32 i4_layer_uv_samples = + (ALIGN16(i4_layer_uv_wd) + PAD_WD + u1_filter_padding_size_x * 2) * + (i4_layer_uv_ht + PAD_HT + u1_filter_padding_size_y); + + u4_size += (i4_layer_luma_samples + i4_layer_uv_samples) * sizeof(UWORD8); + } + + return SVC_MAX_NUM_INP_FRAMES * u4_size; +} + +/** +******************************************************************************* +* +* @brief +* Function to initialize svc input buffers +* +* @param[in] ps_codec +* Pointer to codec context +* +* @param[in] ps_mem_rec +* Pointer to memory allocated for input buffers +* +******************************************************************************* +*/ +void isvce_svc_inp_buf_init(isvce_codec_t *ps_codec, iv_mem_rec_t *ps_mem_rec) +{ + padding_dims_t s_pad_dims; + + WORD32 i, j; + UWORD8 u1_filter_padding_size_x, u1_filter_padding_size_y; + + DOUBLE d_spatial_res_ratio = ps_codec->s_cfg.s_svc_params.d_spatial_res_ratio; + UWORD8 u1_num_spatial_layers = ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers; + UWORD32 u4_wd = ps_codec->s_cfg.u4_wd; + UWORD32 u4_ht = ps_codec->s_cfg.u4_ht; + UWORD8 *pu1_buf = ps_mem_rec->pv_base; + WORD64 i8_alloc_mem_size = + isvce_get_svc_inp_buf_size(u1_num_spatial_layers, d_spatial_res_ratio, u4_wd, u4_ht); + + isvce_get_downscaler_padding_dims(&s_pad_dims); + + u1_filter_padding_size_x = s_pad_dims.u1_left_pad_size + s_pad_dims.u1_right_pad_size; + + u1_filter_padding_size_y = s_pad_dims.u1_top_pad_size + s_pad_dims.u1_bottom_pad_size; + + for(i = 0; i < SVC_MAX_NUM_INP_FRAMES; i++) + { + ps_codec->as_inp_list[i].s_svc_params = ps_codec->s_cfg.s_svc_params; + + for(j = u1_num_spatial_layers - 1; j >= 0; j--) + { + WORD32 i4_layer_luma_wd = + ((DOUBLE) u4_wd / pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - j)) + 0.99; + WORD32 i4_layer_luma_ht = + ((DOUBLE) u4_ht / pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - j)) + 0.99; + WORD32 i4_layer_luma_samples = + (ALIGN16(i4_layer_luma_wd) + PAD_WD + u1_filter_padding_size_x) * + (i4_layer_luma_ht + PAD_HT + u1_filter_padding_size_y); + WORD32 i4_layer_uv_wd = i4_layer_luma_wd; + WORD32 i4_layer_uv_ht = i4_layer_luma_ht / 2.0 + 0.99; + /* u1_filter_padding_size_x * 2 because U and V + both need same amount of padding */ + WORD32 i4_layer_uv_samples = + (ALIGN16(i4_layer_uv_wd) + PAD_WD + u1_filter_padding_size_x * 2) * + (i4_layer_uv_ht + PAD_HT + u1_filter_padding_size_y); + + ps_codec->as_inp_list[i].as_layer_yuv_buf_props[j].as_component_bufs[Y].i4_data_stride = + ALIGN16(i4_layer_luma_wd) + PAD_WD + u1_filter_padding_size_x; + ps_codec->as_inp_list[i].as_layer_yuv_buf_props[j].as_component_bufs[Y].pv_data = + pu1_buf + + ps_codec->as_inp_list[i] + .as_layer_yuv_buf_props[j] + .as_component_bufs[Y] + .i4_data_stride * + (PAD_TOP + s_pad_dims.u1_top_pad_size) + + (PAD_LEFT + s_pad_dims.u1_left_pad_size); + pu1_buf += i4_layer_luma_samples * sizeof(UWORD8); + i8_alloc_mem_size -= i4_layer_luma_samples * sizeof(UWORD8); + + ps_codec->as_inp_list[i] + .as_layer_yuv_buf_props[j] + .as_component_bufs[UV] + .i4_data_stride = ALIGN16(i4_layer_uv_wd) + PAD_WD + u1_filter_padding_size_x * 2; + ps_codec->as_inp_list[i].as_layer_yuv_buf_props[j].as_component_bufs[UV].pv_data = + pu1_buf + + ps_codec->as_inp_list[i] + .as_layer_yuv_buf_props[j] + .as_component_bufs[UV] + .i4_data_stride * + (PAD_TOP + s_pad_dims.u1_top_pad_size) + + (PAD_LEFT + s_pad_dims.u1_left_pad_size * 2); + pu1_buf += i4_layer_uv_samples * sizeof(UWORD8); + i8_alloc_mem_size -= i4_layer_uv_samples * sizeof(UWORD8); + + /* Chroma is always stored interleaved */ + ps_codec->as_inp_list[i].as_layer_yuv_buf_props[j].as_component_bufs[V].pv_data = NULL; + + ps_codec->as_inp_list[i].as_layer_yuv_buf_props[j].u1_bit_depth = 8; + ps_codec->as_inp_list[i].as_layer_yuv_buf_props[j].e_color_format = IV_YUV_420SP_UV; + ps_codec->as_inp_list[i].as_layer_yuv_buf_props[j].u4_width = i4_layer_luma_wd; + ps_codec->as_inp_list[i].as_layer_yuv_buf_props[j].u4_height = i4_layer_luma_ht; + + ASSERT(i8_alloc_mem_size >= 0); + } + } +} + +void isvce_init_svc_dimension(isvce_inp_buf_t *ps_inp) +{ + WORD32 i; + + UWORD8 u1_num_spatial_layers = ps_inp->s_svc_params.u1_num_spatial_layers; + DOUBLE d_spatial_res_ratio = ps_inp->s_svc_params.d_spatial_res_ratio; + UWORD32 u4_wd = ps_inp->s_inp_props.s_raw_buf.au4_wd[Y]; + UWORD32 u4_ht = ps_inp->s_inp_props.s_raw_buf.au4_ht[Y]; + + for(i = 0; i < u1_num_spatial_layers; i++) + { + ps_inp->as_layer_yuv_buf_props[i].u4_width = + ((DOUBLE) u4_wd / pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - i)) + 0.99; + ps_inp->as_layer_yuv_buf_props[i].u4_height = + ((DOUBLE) u4_ht / pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - i)) + 0.99; + } +} + +/** +******************************************************************************* +* +* @brief +* Pads input buf as assumed by the downscaler filter +* +* @param[in] ps_codec +* Pointer to codec ctxt +* +* @param[in] ps_inp +* Pointer to svc input buffer +* +* @param[in] u1_svc_layer_index +* SVC layer index of the buffer +* +******************************************************************************* +*/ + +static void isvce_pad_buf_for_filtering(isvce_codec_t *ps_codec, isvce_inp_buf_t *ps_inp, + UWORD8 u1_svc_layer_index) +{ + padding_dims_t s_pad_dims; + + UWORD8 *pu1_buf; + UWORD32 u4_buf_width, u4_buf_height; + + UWORD8 u1_pad_left_size; + UWORD8 u1_pad_right_size; + UWORD8 u1_pad_top_size; + UWORD8 u1_pad_bottom_size; + UWORD8 u1_filter_padding_size_x; + UWORD8 u1_filter_padding_size_chroma_x; + + ASSERT(ps_inp->as_layer_yuv_buf_props[u1_svc_layer_index].e_color_format == IV_YUV_420SP_UV); + + isvce_get_downscaler_padding_dims(&s_pad_dims); + + u1_pad_left_size = s_pad_dims.u1_left_pad_size; + u1_pad_right_size = s_pad_dims.u1_right_pad_size; + u1_pad_top_size = s_pad_dims.u1_top_pad_size; + u1_pad_bottom_size = s_pad_dims.u1_bottom_pad_size; + u1_filter_padding_size_x = u1_pad_left_size + u1_pad_right_size; + u1_filter_padding_size_chroma_x = u1_filter_padding_size_x * 2; + + u4_buf_width = ps_inp->as_layer_yuv_buf_props[u1_svc_layer_index].u4_width; + + u4_buf_height = ps_inp->as_layer_yuv_buf_props[u1_svc_layer_index].u4_height; + + pu1_buf = (UWORD8 *) (ps_inp->as_layer_yuv_buf_props[u1_svc_layer_index] + .as_component_bufs[0] + .pv_data); + + ps_codec->pf_pad_left_luma( + pu1_buf, + ps_inp->as_layer_yuv_buf_props[u1_svc_layer_index].as_component_bufs[0].i4_data_stride, + u4_buf_height, u1_pad_left_size); + + pu1_buf = (UWORD8 *) (ps_inp->as_layer_yuv_buf_props[u1_svc_layer_index] + .as_component_bufs[0] + .pv_data); + + pu1_buf += u4_buf_width; + + ps_codec->pf_pad_right_luma( + pu1_buf, + ps_inp->as_layer_yuv_buf_props[u1_svc_layer_index].as_component_bufs[0].i4_data_stride, + u4_buf_height, u1_pad_right_size); + + pu1_buf = (UWORD8 *) (ps_inp->as_layer_yuv_buf_props[u1_svc_layer_index] + .as_component_bufs[1] + .pv_data); + + ps_codec->pf_pad_left_chroma( + pu1_buf, + ps_inp->as_layer_yuv_buf_props[u1_svc_layer_index].as_component_bufs[1].i4_data_stride, + u4_buf_height / 2, u1_pad_left_size * 2); + + pu1_buf = (UWORD8 *) (ps_inp->as_layer_yuv_buf_props[u1_svc_layer_index] + .as_component_bufs[1] + .pv_data); + + pu1_buf += u4_buf_width; + + ps_codec->pf_pad_right_chroma( + pu1_buf, + ps_inp->as_layer_yuv_buf_props[u1_svc_layer_index].as_component_bufs[1].i4_data_stride, + u4_buf_height / 2, u1_pad_right_size * 2); + + pu1_buf = (UWORD8 *) (ps_inp->as_layer_yuv_buf_props[u1_svc_layer_index] + .as_component_bufs[0] + .pv_data) - + u1_pad_left_size; + + ps_codec->pf_pad_top( + pu1_buf, + ps_inp->as_layer_yuv_buf_props[u1_svc_layer_index].as_component_bufs[0].i4_data_stride, + (u4_buf_width + u1_filter_padding_size_x), u1_pad_top_size); + + pu1_buf = (UWORD8 *) (ps_inp->as_layer_yuv_buf_props[u1_svc_layer_index] + .as_component_bufs[0] + .pv_data) - + u1_pad_left_size; + + pu1_buf += + (u4_buf_height * + ps_inp->as_layer_yuv_buf_props[u1_svc_layer_index].as_component_bufs[0].i4_data_stride); + + ps_codec->pf_pad_bottom( + pu1_buf, + ps_inp->as_layer_yuv_buf_props[u1_svc_layer_index].as_component_bufs[0].i4_data_stride, + (u4_buf_width + u1_filter_padding_size_x), u1_pad_bottom_size); + + pu1_buf = (UWORD8 *) (ps_inp->as_layer_yuv_buf_props[u1_svc_layer_index] + .as_component_bufs[1] + .pv_data) - + u1_pad_left_size * 2; + + ps_codec->pf_pad_top( + pu1_buf, + ps_inp->as_layer_yuv_buf_props[u1_svc_layer_index].as_component_bufs[1].i4_data_stride, + (u4_buf_width + u1_filter_padding_size_chroma_x), u1_pad_top_size); + + pu1_buf = (UWORD8 *) (ps_inp->as_layer_yuv_buf_props[u1_svc_layer_index] + .as_component_bufs[1] + .pv_data) - + u1_pad_left_size * 2; + + pu1_buf += + ((u4_buf_height / 2) * + ps_inp->as_layer_yuv_buf_props[u1_svc_layer_index].as_component_bufs[1].i4_data_stride); + + ps_codec->pf_pad_bottom( + pu1_buf, + ps_inp->as_layer_yuv_buf_props[u1_svc_layer_index].as_component_bufs[1].i4_data_stride, + (u4_buf_width + u1_filter_padding_size_chroma_x), u1_pad_bottom_size); +} + +/** +******************************************************************************* +* +* @brief +* Pads raw input to satisfy SVC compliant input dimensions +* +* @param[in] ps_codec +* Pointer to codec ctxt +* +* @param[in] ps_inp +* Pointer to svc input buffer +* +******************************************************************************* +*/ + +static void isvce_pad_input_to_svc_compliant_dims(isvce_codec_t *ps_codec, isvce_inp_buf_t *ps_inp) +{ + UWORD8 *pu1_buf; + UWORD32 u4_raw_input_wd, u4_raw_input_ht, u4_padded_width, u4_padded_height, u4_width_delta, + u4_height_delta; + UWORD8 u1_num_layers = ps_inp->s_svc_params.u1_num_spatial_layers; + + ASSERT(ps_inp->as_layer_yuv_buf_props[u1_num_layers - 1].e_color_format == IV_YUV_420SP_UV); + + u4_padded_width = ps_inp->as_layer_yuv_buf_props[u1_num_layers - 1].u4_width; + u4_padded_height = ps_inp->as_layer_yuv_buf_props[u1_num_layers - 1].u4_height; + u4_raw_input_wd = ps_inp->s_inp_props.s_raw_buf.au4_wd[0]; + u4_raw_input_ht = ps_inp->s_inp_props.s_raw_buf.au4_ht[0]; + u4_width_delta = u4_padded_width - u4_raw_input_wd; + u4_height_delta = u4_padded_height - u4_raw_input_ht; + + ASSERT(!(u4_width_delta & 1)); + ASSERT(!(u4_height_delta & 1)); + + if(u4_width_delta) + { + pu1_buf = (UWORD8 *) (ps_inp->as_layer_yuv_buf_props[u1_num_layers - 1] + .as_component_bufs[0] + .pv_data); + + pu1_buf += ((u4_width_delta / 2) + (ps_inp->as_layer_yuv_buf_props[u1_num_layers - 1] + .as_component_bufs[0] + .i4_data_stride) * + (u4_height_delta / 2)); + + ps_codec->pf_pad_left_luma( + pu1_buf, + ps_inp->as_layer_yuv_buf_props[u1_num_layers - 1].as_component_bufs[0].i4_data_stride, + u4_padded_height, u4_width_delta / 2); + + pu1_buf = (UWORD8 *) (ps_inp->as_layer_yuv_buf_props[u1_num_layers - 1] + .as_component_bufs[0] + .pv_data); + + pu1_buf += ((u4_width_delta / 2) + (ps_inp->as_layer_yuv_buf_props[u1_num_layers - 1] + .as_component_bufs[0] + .i4_data_stride) * + (u4_height_delta / 2)); + + pu1_buf += u4_raw_input_wd; + + ps_codec->pf_pad_right_luma( + pu1_buf, + ps_inp->as_layer_yuv_buf_props[u1_num_layers - 1].as_component_bufs[0].i4_data_stride, + u4_padded_height, u4_width_delta / 2); + + pu1_buf = (UWORD8 *) (ps_inp->as_layer_yuv_buf_props[u1_num_layers - 1] + .as_component_bufs[1] + .pv_data); + + pu1_buf += ((u4_width_delta / 2) + (ps_inp->as_layer_yuv_buf_props[u1_num_layers - 1] + .as_component_bufs[1] + .i4_data_stride) * + (u4_height_delta / 4)); + + ps_codec->pf_pad_left_chroma( + pu1_buf, + ps_inp->as_layer_yuv_buf_props[u1_num_layers - 1].as_component_bufs[1].i4_data_stride, + u4_padded_height / 2, u4_width_delta / 2); + + pu1_buf = (UWORD8 *) (ps_inp->as_layer_yuv_buf_props[u1_num_layers - 1] + .as_component_bufs[1] + .pv_data); + + pu1_buf += ((u4_width_delta / 2) + (ps_inp->as_layer_yuv_buf_props[u1_num_layers - 1] + .as_component_bufs[1] + .i4_data_stride) * + (u4_height_delta / 4)); + + pu1_buf += u4_raw_input_wd; + + ps_codec->pf_pad_right_chroma( + pu1_buf, + ps_inp->as_layer_yuv_buf_props[u1_num_layers - 1].as_component_bufs[1].i4_data_stride, + u4_padded_height / 2, u4_width_delta / 2); + } + + if(u4_height_delta) + { + pu1_buf = (UWORD8 *) (ps_inp->as_layer_yuv_buf_props[u1_num_layers - 1] + .as_component_bufs[0] + .pv_data); + + pu1_buf += ((ps_inp->as_layer_yuv_buf_props[u1_num_layers - 1] + .as_component_bufs[0] + .i4_data_stride) * + (u4_height_delta / 2)); + + ps_codec->pf_pad_top( + pu1_buf, + ps_inp->as_layer_yuv_buf_props[u1_num_layers - 1].as_component_bufs[0].i4_data_stride, + u4_padded_width, u4_height_delta / 2); + + pu1_buf = (UWORD8 *) (ps_inp->as_layer_yuv_buf_props[u1_num_layers - 1] + .as_component_bufs[0] + .pv_data); + + pu1_buf += ((ps_inp->as_layer_yuv_buf_props[u1_num_layers - 1] + .as_component_bufs[0] + .i4_data_stride) * + (u4_height_delta / 2)); + + pu1_buf += + (u4_raw_input_ht * + ps_inp->as_layer_yuv_buf_props[u1_num_layers - 1].as_component_bufs[0].i4_data_stride); + + ps_codec->pf_pad_bottom( + pu1_buf, + ps_inp->as_layer_yuv_buf_props[u1_num_layers - 1].as_component_bufs[0].i4_data_stride, + u4_padded_width, u4_height_delta / 2); + + pu1_buf = (UWORD8 *) (ps_inp->as_layer_yuv_buf_props[u1_num_layers - 1] + .as_component_bufs[1] + .pv_data); + + pu1_buf += ((ps_inp->as_layer_yuv_buf_props[u1_num_layers - 1] + .as_component_bufs[1] + .i4_data_stride) * + (u4_height_delta / 4)); + + ps_codec->pf_pad_top( + pu1_buf, + ps_inp->as_layer_yuv_buf_props[u1_num_layers - 1].as_component_bufs[1].i4_data_stride, + u4_padded_width, u4_height_delta / 4); + + pu1_buf = (UWORD8 *) (ps_inp->as_layer_yuv_buf_props[u1_num_layers - 1] + .as_component_bufs[1] + .pv_data); + + pu1_buf += ((ps_inp->as_layer_yuv_buf_props[u1_num_layers - 1] + .as_component_bufs[1] + .i4_data_stride) * + (u4_height_delta / 4)); + + pu1_buf += + ((u4_raw_input_ht / 2) * + ps_inp->as_layer_yuv_buf_props[u1_num_layers - 1].as_component_bufs[1].i4_data_stride); + + ps_codec->pf_pad_bottom( + pu1_buf, + ps_inp->as_layer_yuv_buf_props[u1_num_layers - 1].as_component_bufs[1].i4_data_stride, + u4_padded_width, u4_height_delta / 4); + } +} + +/** +******************************************************************************* +* +* @brief +* Format conversion and downsampling for deriving spatial layer inputs +* +* @param[in] ps_inp +* Pointer to input buffer +* +******************************************************************************* +*/ +void isvce_svc_inp_buf_populate(isvce_codec_t *ps_codec, isvce_inp_buf_t *ps_inp) +{ + yuv_buf_props_t s_src_buf_props, s_dst_buf_props; + + UWORD32 i; + UWORD32 u4_blk_x, u4_blk_y; + UWORD8 *pu1_planar_y, *pu1_planar_u, *pu1_planar_v, *pu1_semi_planar_y, *pu1_semi_planar_uv; + UWORD8 *pu1_src_luma, *pu1_src_chroma, *pu1_dst_luma, *pu1_dst_chroma; + UWORD32 u4_num_blocks_x, u4_num_blocks_y; + UWORD32 u4_scaled_block_wd, u4_scaled_block_ht; + UWORD32 u4_blk_wd_luma, u4_blk_ht_luma; + + downscaler_ctxt_t *ps_scaler = &ps_codec->s_scaler; + isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns; + mem_fxns_t *ps_mem_fxns = &ps_isa_dependent_fxns->s_mem_fxns; + + const UWORD8 u1_num_yuv_components_420sp = NUM_SP_COMPONENTS; + UWORD8 u1_num_spatial_layers = ps_inp->s_svc_params.u1_num_spatial_layers; + UWORD32 u4_padded_width = ps_inp->as_layer_yuv_buf_props[u1_num_spatial_layers - 1].u4_width; + UWORD32 u4_padded_height = ps_inp->as_layer_yuv_buf_props[u1_num_spatial_layers - 1].u4_height; + UWORD32 u4_raw_input_wd = ps_inp->s_inp_props.s_raw_buf.au4_wd[0]; + UWORD32 u4_raw_input_ht = ps_inp->s_inp_props.s_raw_buf.au4_ht[0]; + UWORD32 u4_width_delta = u4_padded_width - u4_raw_input_wd; + UWORD32 u4_height_delta = u4_padded_height - u4_raw_input_ht; + + ASSERT(!(u4_width_delta & 1)); + ASSERT(!(u4_height_delta & 1)); + + ASSERT((ps_inp->s_inp_props.s_raw_buf.e_color_fmt == IV_YUV_420P) || + (ps_inp->s_inp_props.s_raw_buf.e_color_fmt == IV_YUV_420SP_UV)); + + /* Check is input is valid */ + if(!(ps_inp->s_inp_props.s_raw_buf.apv_bufs[0])) + { + ASSERT(0); + + return; + } + + /* Convert the input into semi-planar in case of other formats */ + if(ps_inp->s_inp_props.s_raw_buf.e_color_fmt == IV_YUV_420P) + { + pu1_planar_y = (UWORD8 *) ps_inp->s_inp_props.s_raw_buf.apv_bufs[0]; + pu1_planar_u = (UWORD8 *) ps_inp->s_inp_props.s_raw_buf.apv_bufs[1]; + pu1_planar_v = (UWORD8 *) ps_inp->s_inp_props.s_raw_buf.apv_bufs[2]; + + pu1_semi_planar_y = (UWORD8 *) ps_inp->as_layer_yuv_buf_props[u1_num_spatial_layers - 1] + .as_component_bufs[0] + .pv_data; + + pu1_semi_planar_uv = (UWORD8 *) ps_inp->as_layer_yuv_buf_props[u1_num_spatial_layers - 1] + .as_component_bufs[1] + .pv_data; + + pu1_semi_planar_y += + ((u4_width_delta / 2) + (ps_inp->as_layer_yuv_buf_props[u1_num_spatial_layers - 1] + .as_component_bufs[0] + .i4_data_stride) * + (u4_height_delta / 2)); + + pu1_semi_planar_uv += + ((u4_width_delta / 2) + (ps_inp->as_layer_yuv_buf_props[u1_num_spatial_layers - 1] + .as_component_bufs[1] + .i4_data_stride) * + (u4_height_delta / 4)); + + ps_codec->pf_ih264e_conv_420p_to_420sp( + pu1_planar_y, pu1_planar_u, pu1_planar_v, pu1_semi_planar_y, pu1_semi_planar_uv, + ps_inp->s_inp_props.s_raw_buf.au4_ht[0], ps_inp->s_inp_props.s_raw_buf.au4_wd[0], + ps_inp->s_inp_props.s_raw_buf.au4_strd[0], ps_inp->s_inp_props.s_raw_buf.au4_strd[1], + ps_inp->s_inp_props.s_raw_buf.au4_strd[2], + ps_inp->as_layer_yuv_buf_props[u1_num_spatial_layers - 1] + .as_component_bufs[0] + .i4_data_stride, + ps_inp->as_layer_yuv_buf_props[u1_num_spatial_layers - 1] + .as_component_bufs[1] + .i4_data_stride, + 0); + } + else + { + UWORD32 u4_wd, u4_ht; + UWORD8 u1_comp; + UWORD32 au4_arr_dims[4]; + UWORD8 *pu1_src, *pu1_dst; + + au4_arr_dims[0] = ps_inp->s_inp_props.s_raw_buf.au4_wd[0]; + au4_arr_dims[1] = ps_inp->s_inp_props.s_raw_buf.au4_ht[0]; + au4_arr_dims[2] = ps_inp->s_inp_props.s_raw_buf.au4_wd[1]; + au4_arr_dims[3] = ps_inp->s_inp_props.s_raw_buf.au4_ht[1]; + + for(u1_comp = 0; u1_comp < u1_num_yuv_components_420sp; u1_comp++) + { + u4_wd = au4_arr_dims[u1_comp * 2]; + u4_ht = au4_arr_dims[(u1_comp * 2) + 1]; + + pu1_dst = (UWORD8 *) ps_inp->as_layer_yuv_buf_props[u1_num_spatial_layers - 1] + .as_component_bufs[u1_comp] + .pv_data; + + pu1_dst += + ((u4_width_delta / 2) + (ps_inp->as_layer_yuv_buf_props[u1_num_spatial_layers - 1] + .as_component_bufs[u1_comp] + .i4_data_stride) * + ((u4_height_delta / 2) / (u1_comp + 1))); + + pu1_src = ps_inp->s_inp_props.s_raw_buf.apv_bufs[u1_comp]; + + ps_mem_fxns->pf_copy_2d(pu1_dst, + ps_inp->as_layer_yuv_buf_props[u1_num_spatial_layers - 1] + .as_component_bufs[u1_comp] + .i4_data_stride, + pu1_src, ps_inp->s_inp_props.s_raw_buf.au4_strd[u1_comp], u4_wd, + u4_ht); + } + } + + /* Padding input to satisfy SVC constraints */ + isvce_pad_input_to_svc_compliant_dims(ps_codec, ps_inp); + + /* Downscaling */ + for(i = u1_num_spatial_layers - 1; i > 0; i--) + { + const UWORD32 u4_default_scaled_blk_wd = + gu4_downscaler_blk_size / ps_codec->s_cfg.s_svc_params.d_spatial_res_ratio + 0.5; + const UWORD32 u4_default_scaled_blk_ht = + gu4_downscaler_blk_size / ps_codec->s_cfg.s_svc_params.d_spatial_res_ratio + 0.5; + + isvce_pad_buf_for_filtering(ps_codec, ps_inp, i); + + s_src_buf_props = ps_inp->as_layer_yuv_buf_props[i]; + s_dst_buf_props = ps_inp->as_layer_yuv_buf_props[i - 1]; + + u4_num_blocks_x = + (s_src_buf_props.u4_width + (gu4_downscaler_blk_size - 1)) / gu4_downscaler_blk_size; + + u4_num_blocks_y = + (s_src_buf_props.u4_height + (gu4_downscaler_blk_size - 1)) / gu4_downscaler_blk_size; + + pu1_src_luma = (UWORD8 *) s_src_buf_props.as_component_bufs[Y].pv_data; + pu1_src_chroma = (UWORD8 *) s_src_buf_props.as_component_bufs[U].pv_data; + pu1_dst_luma = (UWORD8 *) s_dst_buf_props.as_component_bufs[Y].pv_data; + pu1_dst_chroma = (UWORD8 *) s_dst_buf_props.as_component_bufs[U].pv_data; + + for(u4_blk_x = 0; u4_blk_x < u4_num_blocks_x; u4_blk_x++) + { + for(u4_blk_y = 0; u4_blk_y < u4_num_blocks_y; u4_blk_y++) + { + u4_blk_wd_luma = isvce_get_downscaler_blk_dims(s_src_buf_props.u4_width, u4_blk_x, + gu4_downscaler_blk_size); + + u4_blk_ht_luma = isvce_get_downscaler_blk_dims(s_src_buf_props.u4_height, u4_blk_y, + gu4_downscaler_blk_size); + + u4_scaled_block_wd = isvce_get_downscaler_blk_dims( + s_dst_buf_props.u4_width, u4_blk_x, u4_default_scaled_blk_wd); + + u4_scaled_block_ht = isvce_get_downscaler_blk_dims( + s_dst_buf_props.u4_height, u4_blk_y, u4_default_scaled_blk_ht); + + s_src_buf_props.as_component_bufs[Y].pv_data = + pu1_src_luma + (u4_blk_x * gu4_downscaler_blk_size + + u4_blk_y * gu4_downscaler_blk_size * + s_src_buf_props.as_component_bufs[Y].i4_data_stride); + + s_src_buf_props.as_component_bufs[U].pv_data = + pu1_src_chroma + (u4_blk_x * gu4_downscaler_blk_size + + u4_blk_y * (gu4_downscaler_blk_size / 2) * + s_src_buf_props.as_component_bufs[U].i4_data_stride); + + s_dst_buf_props.as_component_bufs[Y].pv_data = + pu1_dst_luma + (u4_blk_x * u4_default_scaled_blk_wd + + u4_blk_y * u4_default_scaled_blk_ht * + s_dst_buf_props.as_component_bufs[Y].i4_data_stride); + + s_dst_buf_props.as_component_bufs[U].pv_data = + pu1_dst_chroma + (u4_blk_x * u4_default_scaled_blk_wd + + u4_blk_y * (u4_default_scaled_blk_ht / 2) * + s_dst_buf_props.as_component_bufs[U].i4_data_stride); + + ASSERT(!(u4_scaled_block_wd & 1)); + ASSERT(!(u4_scaled_block_ht & 1)); + + isvce_process_downscaler(ps_scaler, &s_src_buf_props, &s_dst_buf_props, + u4_blk_wd_luma, u4_blk_ht_luma); + } + } + } + + UNUSED(u4_scaled_block_wd); + UNUSED(u4_scaled_block_ht); +} + +/** +******************************************************************************* +* +* @brief +* calculates the greatest common divisor between the two parameters. +* +******************************************************************************* +*/ + +static DOUBLE isvce_get_GCD(DOUBLE a, DOUBLE b) +{ + if(b == 0) + { + return a; + } + + return isvce_get_GCD(b, fmod(a, b)); +} + +/** +******************************************************************************* +* +* @brief +* calculates the least common multiple between the two parameters +* +******************************************************************************* +*/ + +static DOUBLE isvce_get_LCM(DOUBLE a, DOUBLE b) { return (a / isvce_get_GCD(a, b)) * b; } + +/** +******************************************************************************* +* +* @brief +* sets the width and height in config structure to SVC compliant width and +* height +* +* @param[in] ps_cfg +* Pointer to config struct +* +* @param[in] u4_app_wd +* width of the YUV as read by the app +* +* @param[in] u4_app_ht +* height of the YUV as read by the app +* +******************************************************************************* +*/ + +void isvce_get_svc_compliant_dimensions(UWORD8 u1_num_spatial_layers, DOUBLE d_scaling_factor, + UWORD32 u4_wd, UWORD32 u4_ht, UWORD32 *pu4_svc_comp_wd, + UWORD32 *pu4_svc_comp_ht) +{ + DOUBLE d_scaling_factor_power_num_layers_minus1 = 0; + UWORD32 u4_constraint_offset = 0; + + d_scaling_factor_power_num_layers_minus1 = pow(d_scaling_factor, u1_num_spatial_layers - 1); + + if(fmod(16, d_scaling_factor_power_num_layers_minus1)) + { + u4_constraint_offset = + (UWORD32) isvce_get_LCM(16, d_scaling_factor_power_num_layers_minus1); + } + else + { + u4_constraint_offset = (UWORD32) (16 * d_scaling_factor_power_num_layers_minus1); + } + + if(u4_wd % u4_constraint_offset) + { + *pu4_svc_comp_wd = u4_wd - ((u4_wd) % u4_constraint_offset) + u4_constraint_offset; + } + else + { + *pu4_svc_comp_wd = u4_wd; + } + + if(u4_ht % u4_constraint_offset) + { + *pu4_svc_comp_ht = u4_ht - ((u4_ht) % u4_constraint_offset) + u4_constraint_offset; + } + else + { + *pu4_svc_comp_ht = u4_ht; + } +} + +/** +******************************************************************************* +* +* @brief +* Returns size of buffers for storing SVC layer nbr info +* +* @param[in] u1_num_spatial_layers +* Num Spatial Layers +* +* @param[in] d_spatial_res_ratio +* Resolution Ratio b/w spatial layers +* +* @param[in] u4_wd +* Input Width +* +* @returns Size of buffers +* +******************************************************************************* +*/ +UWORD32 isvce_get_svc_nbr_info_buf_size(UWORD8 u1_num_spatial_layers, DOUBLE d_spatial_res_ratio, + UWORD32 u4_wd, UWORD32 u4_ht) +{ + UWORD32 i; + + UWORD32 u4_size = 0; + + ASSERT(1 == MAX_CTXT_SETS); + + u4_size += MAX_PROCESS_CTXT * u1_num_spatial_layers * sizeof(nbr_info_t); + + for(i = 0; i < u1_num_spatial_layers; i++) + { + WORD32 i4_layer_luma_wd = ((DOUBLE) u4_wd / pow(d_spatial_res_ratio, i)) + 0.99; + WORD32 i4_layer_luma_ht = ((DOUBLE) u4_ht / pow(d_spatial_res_ratio, i)) + 0.99; + WORD32 i4_num_mbs_in_row = i4_layer_luma_wd / MB_SIZE; + WORD32 i4_num_mbs_in_col = i4_layer_luma_ht / MB_SIZE; + + /* ps_top_row_mb_info */ + u4_size += (i4_num_mbs_in_row + 1) * i4_num_mbs_in_col * sizeof(isvce_mb_info_t); + + /* ps_left_mb_info */ + u4_size += MAX_PROCESS_CTXT * sizeof(isvce_mb_info_t); + + /* ps_top_mb_intra_modes */ + u4_size += (i4_num_mbs_in_row + 1) * i4_num_mbs_in_col * sizeof(mb_intra_modes_t); + + /* ps_left_mb_intra_modes */ + u4_size += MAX_PROCESS_CTXT * sizeof(mb_intra_modes_t); + } + + return u4_size; +} + +/** +******************************************************************************* +* +* @brief +* Function to initialize svc nbr info buffers +* +* @param[in] ps_codec +* Pointer to codec context +* +* @param[in] ps_mem_rec +* Pointer to memory allocated for input buffers +* +******************************************************************************* +*/ +void isvce_svc_nbr_info_buf_init(isvce_codec_t *ps_codec, iv_mem_rec_t *ps_mem_rec) +{ + WORD32 i, j; + + DOUBLE d_spatial_res_ratio = ps_codec->s_cfg.s_svc_params.d_spatial_res_ratio; + UWORD8 u1_num_spatial_layers = ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers; + UWORD32 u4_wd = ps_codec->s_cfg.u4_wd; + UWORD32 u4_ht = ps_codec->s_cfg.u4_ht; + + UWORD8 *pu1_buf = ps_mem_rec->pv_base; + WORD64 i8_alloc_mem_size = + isvce_get_svc_nbr_info_buf_size(u1_num_spatial_layers, d_spatial_res_ratio, u4_wd, u4_ht); + + ASSERT(1 == MAX_CTXT_SETS); + + for(i = 0; i < MAX_PROCESS_CTXT; i++) + { + ps_codec->as_process[i].s_nbr_info_base.ps_layer_nbr_info = (nbr_info_t *) pu1_buf; + pu1_buf += u1_num_spatial_layers * + sizeof(ps_codec->as_process[i].s_nbr_info_base.ps_layer_nbr_info[0]); + i8_alloc_mem_size -= u1_num_spatial_layers * + sizeof(ps_codec->as_process[i].s_nbr_info_base.ps_layer_nbr_info[0]); + + for(j = u1_num_spatial_layers - 1; j >= 0; j--) + { + ps_codec->as_process[i].s_nbr_info_base.ps_layer_nbr_info[j].ps_left_mb_info = + (isvce_mb_info_t *) pu1_buf; + ps_codec->as_process[i].s_nbr_info.ps_left_mb_info = (isvce_mb_info_t *) pu1_buf; + pu1_buf += sizeof(ps_codec->as_process[i].s_nbr_info.ps_left_mb_info[0]); + i8_alloc_mem_size -= sizeof(ps_codec->as_process[i].s_nbr_info.ps_left_mb_info[0]); + + ps_codec->as_process[i].s_nbr_info_base.ps_layer_nbr_info[j].ps_left_mb_intra_modes = + (mb_intra_modes_t *) pu1_buf; + ps_codec->as_process[i].s_nbr_info.ps_left_mb_intra_modes = + (mb_intra_modes_t *) pu1_buf; + pu1_buf += sizeof(ps_codec->as_process[i].s_nbr_info.ps_left_mb_intra_modes[0]); + i8_alloc_mem_size -= + sizeof(ps_codec->as_process[i].s_nbr_info.ps_left_mb_intra_modes[0]); + } + + ASSERT(i8_alloc_mem_size >= 0); + } + + for(i = u1_num_spatial_layers - 1; i >= 0; i--) + { + isvce_mb_info_t *ps_top_mb_info; + mb_intra_modes_t *ps_top_intra_modes; + + WORD32 i4_layer_luma_wd = + ((DOUBLE) u4_wd / pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - i)) + 0.99; + WORD32 i4_layer_luma_ht = + ((DOUBLE) u4_ht / pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - i)) + 0.99; + WORD32 i4_num_mbs_in_row = i4_layer_luma_wd / MB_SIZE; + WORD32 i4_num_mbs_in_col = i4_layer_luma_ht / MB_SIZE; + + ps_top_mb_info = (isvce_mb_info_t *) pu1_buf; + pu1_buf += (i4_num_mbs_in_row + 1) * i4_num_mbs_in_col * sizeof(ps_top_mb_info[0]); + i8_alloc_mem_size -= + (i4_num_mbs_in_row + 1) * i4_num_mbs_in_col * sizeof(ps_top_mb_info[0]); + + ps_top_intra_modes = (mb_intra_modes_t *) pu1_buf; + pu1_buf += (i4_num_mbs_in_row + 1) * i4_num_mbs_in_col * sizeof(ps_top_intra_modes[0]); + i8_alloc_mem_size -= + (i4_num_mbs_in_row + 1) * i4_num_mbs_in_col * sizeof(ps_top_intra_modes[0]); + + for(j = 0; j < MAX_PROCESS_CTXT; j++) + { + ps_codec->as_process[j].s_nbr_info_base.ps_layer_nbr_info[i].ps_top_row_mb_info = + ps_top_mb_info; + ps_codec->as_process[j].s_nbr_info.ps_top_row_mb_info = NULL; + + ps_codec->as_process[j].s_nbr_info_base.ps_layer_nbr_info[i].ps_top_mb_intra_modes = + ps_top_intra_modes; + ps_codec->as_process[j].s_nbr_info.ps_top_mb_intra_modes = NULL; + } + + ASSERT(i8_alloc_mem_size >= 0); + } +} + +/** +******************************************************************************* +* +* @brief +* isvce_codec_t and proc_t initialisations for an Access Unit +* +* @par Description: +* Before beginning to encode the frame, the current function initializes all +* the ctxts (proc, entropy, me, ...) basing on the input configured params. +* It locates space for storing recon in the encoder picture buffer set, fetches +* reference frame from encoder picture buffer set. Calls RC pre-enc to get +* qp and pic type for the current frame. Queues proc jobs so that +* the other threads can begin encoding. In brief, this function sets up the +* tone for the entire encoder. +* +* @param[in] ps_codec +* Pointer to codec context +* +* @param[in] ps_inp_buf +* Pointer to input buffer context +* +* @returns error_status +* +* @remarks +* +* +******************************************************************************* +*/ +IH264E_ERROR_T isvce_svc_au_init(isvce_codec_t *ps_codec, isvce_inp_buf_t *ps_inp_buf) +{ + svc_au_buf_t *ps_cur_pic; + + WORD32 cur_mv_bank_buf_id; + WORD32 cur_pic_buf_id; + WORD32 ref_set_id; + WORD32 i, j; + + svc_au_data_t *ps_mv_buf = NULL; + svc_au_buf_t *aps_ref_pic[MAX_REF_PIC_CNT] = {NULL, NULL}; + svc_au_data_t *aps_mv_buf[MAX_REF_PIC_CNT] = {NULL, NULL}; + + IH264E_ERROR_T error_status = IH264E_SUCCESS; + PIC_TYPE_T *pic_type = &ps_codec->pic_type; + + UWORD32 u4_timestamp_high = ps_inp_buf->s_inp_props.u4_timestamp_high; + UWORD32 u4_timestamp_low = ps_inp_buf->s_inp_props.u4_timestamp_low; + WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS; + /* Diamond search Iteration Max Cnt */ + UWORD32 u4_num_layers = + (ps_codec->s_cfg.u4_enc_speed_preset == IVE_FASTEST) ? (NUM_LAYERS >> 2) : NUM_LAYERS; + UWORD32 u4_enable_fast_sad = ps_codec->s_cfg.u4_enable_fast_sad; + + if((PIC_I == *pic_type) || (PIC_IDR == *pic_type)) + { + ps_codec->i4_slice_type = ISLICE; + } + else if(PIC_P == *pic_type) + { + ps_codec->i4_slice_type = PSLICE; + } + else if(PIC_B == *pic_type) + { + ps_codec->i4_slice_type = BSLICE; + } + + ps_codec->u4_is_curr_frm_ref = 0; + ps_codec->u4_is_curr_frm_ref = (*pic_type != PIC_B); + + if(ps_codec->s_cfg.u4_enable_alt_ref && (*pic_type == PIC_P) && + (ps_codec->i4_pic_cnt % (ps_codec->s_cfg.u4_enable_alt_ref + 1))) + { + ps_codec->u4_is_curr_frm_ref = 0; + } + + ps_codec->u4_is_idr = 0; + + if(PIC_IDR == *pic_type) + { + ps_codec->u4_is_idr = 1; + + ps_codec->i4_frame_num = 0; + + ps_codec->i4_idr_pic_id++; + } + + ps_codec->u4_disable_deblock_level = 1; + + if(ps_codec->s_cfg.u4_disable_deblock_level == DISABLE_DEBLK_LEVEL_0) + { + ps_codec->u4_disable_deblock_level = 0; + } + else if(ps_codec->s_cfg.u4_disable_deblock_level == DISABLE_DEBLK_LEVEL_2) + { + if(ps_codec->u4_disable_deblock_level_cnt == DISABLE_DEBLOCK_INTERVAL || + ps_codec->i4_slice_type == ISLICE) + { + ps_codec->u4_disable_deblock_level = 0; + } + } + else if(ps_codec->s_cfg.u4_disable_deblock_level == DISABLE_DEBLK_LEVEL_3) + { + if(ps_codec->i4_slice_type == ISLICE) + { + ps_codec->u4_disable_deblock_level = 0; + } + } + + if(ps_codec->u4_disable_deblock_level) + { + ps_codec->u4_disable_deblock_level_cnt++; + } + else + { + ps_codec->u4_disable_deblock_level_cnt = 0; + } + + if(ps_codec->u4_disable_deblock_level == 0) + { + if(ps_codec->s_cfg.e_slice_mode != IVE_SLICE_MODE_NONE) + { + ps_codec->i4_error_code = IH264E_SLICE_TYPE_INPUT_INVALID; + + return IH264E_SLICE_TYPE_INPUT_INVALID; + } + } + + ps_codec->i4_error_code = IH264E_SUCCESS; + + if(ps_codec->i4_gen_header) + { + sps_t *ps_sps = NULL; + pps_t *ps_pps = NULL; + subset_sps_t *ps_subset_sps = NULL; + UWORD8 u1_profile_idc = IH264_PROFILE_BASELINE; + + if(ps_codec->as_process[ctxt_sel * MAX_PROCESS_THREADS].u1_spatial_layer_id > 0) + { + u1_profile_idc = IH264_SCALABLE_BASELINE; + } + + ps_sps = ps_codec->ps_sps_base; + isvce_populate_sps(ps_codec, ps_sps, 0, u1_profile_idc, ps_inp_buf, 0); + + ps_pps = ps_codec->ps_pps_base; + isvce_populate_pps(ps_codec, ps_pps, 0, 0, 0); + + for(i = 1; i < ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers; i++) + { + ps_subset_sps = ps_codec->ps_subset_sps_base + i; + isvce_populate_subset_sps(ps_codec, ps_subset_sps, i, ps_inp_buf, i); + + /* populate pps header */ + ps_pps = ps_codec->ps_pps_base + i; + isvce_populate_pps(ps_codec, ps_pps, i, i, i); + } + } + + if(IH264E_SUCCESS != + isvce_ref_list_refresh(ps_codec, aps_ref_pic, aps_mv_buf, &ref_set_id, pic_type[0])) + { + ps_codec->i4_error_code = IH264E_NO_FREE_PICBUF; + + return IH264E_NO_FREE_PICBUF; + } + + { + ps_mv_buf = (svc_au_data_t *) ih264_buf_mgr_get_next_free( + (buf_mgr_t *) ps_codec->pv_svc_au_data_store_mgr, &cur_mv_bank_buf_id); + + if(NULL == ps_mv_buf) + { + ps_codec->i4_error_code = IH264E_NO_FREE_MVBANK; + + return IH264E_NO_FREE_MVBANK; + } + + if(ps_codec->u4_is_curr_frm_ref) + { + ih264_buf_mgr_set_status(ps_codec->pv_svc_au_data_store_mgr, cur_mv_bank_buf_id, + BUF_MGR_REF); + } + + ps_mv_buf->i4_abs_poc = ps_codec->i4_abs_pic_order_cnt; + ps_mv_buf->i4_buf_id = cur_mv_bank_buf_id; + } + + { + ps_cur_pic = (svc_au_buf_t *) ih264_buf_mgr_get_next_free( + (buf_mgr_t *) ps_codec->pv_ref_buf_mgr, &cur_pic_buf_id); + + if(NULL == ps_cur_pic) + { + ps_codec->i4_error_code = IH264E_NO_FREE_PICBUF; + + return IH264E_NO_FREE_PICBUF; + } + + if(ps_codec->u4_is_curr_frm_ref) + { + ih264_buf_mgr_set_status(ps_codec->pv_ref_buf_mgr, cur_pic_buf_id, BUF_MGR_REF); + } + + if(1 == ps_codec->s_cfg.u4_enable_recon) + { + ih264_buf_mgr_set_status(ps_codec->pv_ref_buf_mgr, cur_pic_buf_id, BUF_MGR_IO); + } + + ps_cur_pic->u4_timestamp_high = ps_inp_buf->s_inp_props.u4_timestamp_high; + ps_cur_pic->u4_timestamp_low = ps_inp_buf->s_inp_props.u4_timestamp_low; + + ps_cur_pic->i4_abs_poc = ps_codec->i4_poc; + ps_cur_pic->i4_poc_lsb = ps_codec->i4_pic_order_cnt_lsb; + ps_cur_pic->i4_frame_num = ps_codec->i4_frame_num; + + ps_cur_pic->i4_buf_id = cur_pic_buf_id; + + ps_cur_pic->i1_temporal_id = isvce_svc_temporal_id_compute( + ps_codec->i4_poc, ps_codec->s_cfg.s_svc_params.u1_num_temporal_layers, pic_type[0]); + } + + /* + * Add the current picture to ref list independent of the fact that it is used + * as reference or not. This is because, now recon is not in sync with output + * hence we may need the current recon after some delay. By adding it to ref + * list we can retrieve the recon any time we want. The information that it is + * used for ref can still be found by checking the buffer status of pic buf. + */ + ps_codec->as_ref_set[ref_set_id].i4_pic_cnt = ps_codec->i4_pic_cnt; + ps_codec->as_ref_set[ref_set_id].i4_poc = ps_codec->i4_poc; + ps_codec->as_ref_set[ref_set_id].ps_svc_au_data = ps_mv_buf; + ps_codec->as_ref_set[ref_set_id].ps_pic_buf = ps_cur_pic; + + ps_codec->s_svc_ilp_data.ps_svc_au_data = ps_mv_buf; + + { + isvce_process_ctxt_t *ps_proc = NULL; + + j = ctxt_sel * MAX_PROCESS_THREADS; + + for(i = j; i < (j + MAX_PROCESS_THREADS); i++) + { + ps_proc = &ps_codec->as_process[i]; + + ps_proc->s_svc_params = ps_codec->s_cfg.s_svc_params; + + ps_proc->i4_frame_num = ps_codec->i4_frame_num; + ps_proc->u4_is_idr = ps_codec->u4_is_idr; + ps_proc->u4_idr_pic_id = ps_codec->i4_idr_pic_id; + ps_proc->i4_slice_type = ps_codec->i4_slice_type; + + ps_proc->u4_half_x_offset = 0; + ps_proc->u4_half_y_offset = 0; + ps_proc->u4_half_xy_offset = 0; + + ps_proc->u4_disable_deblock_level = ps_codec->u4_disable_deblock_level; + + ps_proc->i4_cur_mv_bank_buf_id = cur_mv_bank_buf_id; + ps_proc->ps_cur_pic = ps_cur_pic; + ps_proc->ps_cur_mv_buf = ps_mv_buf; + + /* + * pointer to ref picture + * 0 : Temporal back reference + * 1 : Temporal forward reference + */ + ps_proc->aps_ref_pic[L0] = aps_ref_pic[L0]; + ps_proc->aps_ref_pic[L1] = aps_ref_pic[L1]; + if(ps_codec->pic_type == PIC_B) + { + ps_proc->aps_mv_buf[L0] = aps_mv_buf[L0]; + ps_proc->aps_mv_buf[L1] = aps_mv_buf[L1]; + } + else + { + /* + * Else is dummy since for non B pic we does not need this + * But an assignment here will help in not having a segfault + * when we calcualte colpic in P slices + */ + ps_proc->aps_mv_buf[L0] = ps_mv_buf; + ps_proc->aps_mv_buf[L1] = ps_mv_buf; + } + + ps_proc->s_inp_buf = ps_inp_buf[0]; + + ps_proc->i4_encode_api_call_cnt = ps_codec->i4_encode_api_call_cnt; + + ps_proc->i4_pic_cnt = ps_codec->i4_pic_cnt; + + ps_proc->i4_error_code = 0; + + { + isvce_entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy; + + ps_entropy->i4_sof = 0; + ps_entropy->i4_eof = 0; + ps_entropy->ps_sps_base = ps_codec->ps_sps_base; + ps_entropy->ps_pps_base = ps_codec->ps_pps_base; + ps_entropy->pu1_slice_idx = ps_proc->pu1_slice_idx; + ps_entropy->ps_svc_nalu_ext_base = ps_proc->ps_svc_nalu_ext_base; + ps_entropy->ps_subset_sps_base = ps_proc->ps_subset_sps_base; + ps_entropy->ps_slice_hdr_base = ps_proc->ps_slice_hdr_base; + ps_entropy->ps_svc_slice_hdr_base = ps_proc->ps_svc_slice_hdr_base; + ps_entropy->i4_abs_pic_order_cnt = ps_codec->i4_poc; + + ps_entropy->i1_transform_8x8_mode_flag = 0; + + ps_entropy->i4_error_code = IH264E_SUCCESS; + ps_proc->s_entropy.u4_is_last = ps_inp_buf->s_inp_props.u4_is_last; + ps_proc->s_entropy.i4_pic_cnt = ps_codec->i4_pic_cnt; + + ps_entropy->u4_timestamp_low = u4_timestamp_low; + ps_entropy->u4_timestamp_high = u4_timestamp_high; + } + + { + isvce_me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt; + + ps_me_ctxt->ai2_srch_boundaries[0] = ps_codec->s_cfg.u4_srch_rng_x; + ps_me_ctxt->ai2_srch_boundaries[1] = ps_codec->s_cfg.u4_srch_rng_y; + + ps_me_ctxt->u4_half_x_offset = ps_proc->u4_half_x_offset; + ps_me_ctxt->u4_half_y_offset = ps_proc->u4_half_y_offset; + ps_me_ctxt->u4_half_xy_offset = ps_proc->u4_half_xy_offset; + + ps_me_ctxt->u4_enable_fast_sad = u4_enable_fast_sad; + ps_me_ctxt->u4_enable_hpel = ps_codec->s_cfg.u4_enable_hpel; + ps_me_ctxt->u4_num_layers = u4_num_layers; + ps_me_ctxt->u4_me_speed_preset = ps_codec->s_cfg.u4_me_speed_preset; + + if((i == j) && (0 == ps_codec->i4_poc)) + { + isvce_init_mv_bits(ps_me_ctxt); + } + } + + ps_proc->ps_ngbr_avbl = &(ps_proc->s_ngbr_avbl); + } + } + + return error_status; +} + +void isvce_init_quant_params(isvce_process_ctxt_t *ps_proc, WORD32 qp) +{ + isvce_codec_t *ps_codec = ps_proc->ps_codec; + /* quant params */ + quant_params_t *ps_qp_params; + + /* ptr to forward quant threshold matrix */ + const UWORD16 *pu2_thres_mat = NULL; + + /* ptr to forward scale matrix */ + const UWORD16 *pu2_scale_mat = gu2_quant_scale_matrix_4x4; + + /* ptr to inverse scale matrix */ + const UWORD16 *pu2_iscale_mat = gau2_ih264_iquant_scale_matrix_4x4; + + /* temp var */ + UWORD32 u4_qp[3], u4_qp_div6, u4_qp_mod6; + COMPONENT_TYPE plane; + WORD32 i; + UWORD32 u4_satdq_t; + const UWORD16 *pu2_smat; + + /********************************************************************/ + /* init quant params for all planes Y, U and V */ + /********************************************************************/ + /* luma qp */ + u4_qp[Y] = qp; + + /* chroma qp + * TODO_LATER : just in case if the chroma planes use different qp's this + * needs to be corrected accordingly. + */ + u4_qp[U] = gu1_qpc_fqpi[qp]; + u4_qp[V] = gu1_qpc_fqpi[qp]; + + plane = Y; + while(plane <= V) + { + u4_qp_div6 = (u4_qp[plane] / 6); + u4_qp_mod6 = (u4_qp[plane] % 6); + + ps_qp_params = ps_proc->ps_qp_params[plane]; + + /* mb qp */ + ps_qp_params->u1_mb_qp = u4_qp[plane]; + + /* mb qp / 6 */ + ps_qp_params->u1_qp_div = u4_qp_div6; + + /* mb qp % 6 */ + ps_qp_params->u1_qp_rem = u4_qp_mod6; + + /* QP bits */ + ps_qp_params->u1_qbits = QP_BITS_h264_4x4 + u4_qp_div6; + + /* forward scale matrix */ + ps_qp_params->pu2_scale_mat = pu2_scale_mat + (u4_qp_mod6 * 16); + + /* threshold matrix & weight for quantization */ + pu2_thres_mat = gu2_forward_quant_threshold_4x4 + (u4_qp_mod6 * 16); + for(i = 0; i < 16; i++) + { + ps_qp_params->pu2_thres_mat[i] = pu2_thres_mat[i] >> (8 - u4_qp_div6); + ps_qp_params->pu2_weigh_mat[i] = 16; + } + + /* qp dependent rounding constant */ + ps_qp_params->u4_dead_zone = gu4_forward_quant_round_factor_4x4[u4_qp_div6]; + + /* slice dependent rounding constant */ + if(ps_proc->i4_slice_type != ISLICE && ps_proc->i4_slice_type != SISLICE) + { + ps_qp_params->u4_dead_zone >>= 1; + } + + /* SATQD threshold for zero block prediction */ + if(ps_codec->s_cfg.u4_enable_satqd) + { + pu2_smat = ps_qp_params->pu2_scale_mat; + + u4_satdq_t = ((1 << (ps_qp_params->u1_qbits)) - ps_qp_params->u4_dead_zone); + + ps_qp_params->pu2_sad_thrsh[0] = u4_satdq_t / MAX(pu2_smat[3], pu2_smat[11]); + ps_qp_params->pu2_sad_thrsh[1] = u4_satdq_t / MAX(pu2_smat[1], pu2_smat[9]); + ps_qp_params->pu2_sad_thrsh[2] = u4_satdq_t / pu2_smat[15]; + ps_qp_params->pu2_sad_thrsh[3] = u4_satdq_t / pu2_smat[7]; + ps_qp_params->pu2_sad_thrsh[4] = u4_satdq_t / MAX(pu2_smat[12], pu2_smat[14]); + ps_qp_params->pu2_sad_thrsh[5] = u4_satdq_t / MAX(pu2_smat[4], pu2_smat[6]); + ps_qp_params->pu2_sad_thrsh[6] = u4_satdq_t / pu2_smat[13]; + ps_qp_params->pu2_sad_thrsh[7] = u4_satdq_t / pu2_smat[5]; + ps_qp_params->pu2_sad_thrsh[8] = + u4_satdq_t / MAX(MAX3(pu2_smat[0], pu2_smat[2], pu2_smat[8]), pu2_smat[10]); + } + + /* inverse scale matrix */ + ps_qp_params->pu2_iscale_mat = pu2_iscale_mat + (u4_qp_mod6 * 16); + + plane += 1; + } +} + +/** +******************************************************************************* +* +* @brief +* isvce_codec_t and proc_t initialisations for an Access Unit +* +* @par Description: +* Before beginning to encode the frame, the current function initializes all +* the ctxts (proc, entropy, me, ...) basing on the input configured params. +* It locates space for storing recon in the encoder picture buffer set, fetches +* reference frame from encoder picture buffer set. Calls RC pre-enc to get +* qp and pic type for the current frame. Queues proc jobs so that +* the other threads can begin encoding. In brief, this function sets up the +* tone for the entire encoder. +* +* @param[in] ps_codec +* Pointer to codec context +* +* @param[in] ps_inp_buf +* Pointer to input buffer context +* +* @param[in] u1_spatial_layer_id +* Spatial Layer IDl 0 => Base layer +* +* @returns error_status +* +* @remarks +* +* +******************************************************************************* +*/ +IH264E_ERROR_T isvce_svc_layer_pic_init(isvce_codec_t *ps_codec, isvce_inp_buf_t *ps_inp_buf, + UWORD8 u1_spatial_layer_id) +{ + WORD32 i; + + IH264E_ERROR_T error_status = IH264E_SUCCESS; + IH264_ERROR_T ret = IH264_SUCCESS; + PIC_TYPE_T e_pic_type = ps_codec->pic_type; + + ASSERT(MAX_CTXT_SETS == 1); + + for(i = 0; i < MAX_PROCESS_THREADS; i++) + { + isvce_process_ctxt_t *ps_proc = &ps_codec->as_process[i]; + isvce_entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy; + isvce_deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt; + isvce_me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt; + svc_au_buf_t *ps_cur_pic = ps_proc->ps_cur_pic; + svc_au_buf_t *aps_ref_pic[MAX_REF_PIC_CNT] = {ps_proc->aps_ref_pic[L0], + ps_proc->aps_ref_pic[L1]}; + + ps_proc->u1_spatial_layer_id = u1_spatial_layer_id; + + ps_proc->s_src_pic_buf_props = ps_inp_buf->as_layer_yuv_buf_props[u1_spatial_layer_id]; + + ps_proc->s_rec_pic_buf_props = ps_cur_pic->ps_layer_yuv_buf_props[u1_spatial_layer_id]; + + ASSERT(0 == (ps_inp_buf->as_layer_yuv_buf_props[u1_spatial_layer_id].u4_width % MB_SIZE)); + ASSERT(0 == (ps_inp_buf->as_layer_yuv_buf_props[u1_spatial_layer_id].u4_height % MB_SIZE)); + + ps_proc->i4_wd_mbs = + ps_inp_buf->as_layer_yuv_buf_props[u1_spatial_layer_id].u4_width / MB_SIZE; + ps_proc->i4_ht_mbs = + ps_inp_buf->as_layer_yuv_buf_props[u1_spatial_layer_id].u4_height / MB_SIZE; + + ps_proc->u1_frame_qp = ps_codec->au4_frame_qp[u1_spatial_layer_id]; + + ps_proc->u1_mb_qp = ps_proc->u1_frame_qp; + ps_entropy->ps_mb_qp_ctxt->u1_cur_mb_qp = ps_proc->u1_frame_qp; + + isvce_init_quant_params(ps_proc, ps_proc->u1_frame_qp); + + memset(&ps_proc->s_frame_info, 0, sizeof(frame_info_t)); + + /* row '-1' */ + memset(ps_proc->pu1_proc_map - ps_proc->i4_wd_mbs, 1, + ps_proc->i4_wd_mbs * sizeof(ps_proc->pu1_proc_map[0])); + + /* row 0 to ht in mbs */ + memset(ps_proc->pu1_proc_map, 0, + ps_proc->i4_wd_mbs * ps_proc->i4_ht_mbs * sizeof(ps_proc->pu1_proc_map[0])); + + /* row '-1' */ + memset(ps_proc->pu1_deblk_map - ps_proc->i4_wd_mbs, 1, + ps_proc->i4_wd_mbs * sizeof(ps_proc->pu1_deblk_map[0])); + + /* row 0 to ht in mbs */ + memset(ps_proc->pu1_deblk_map, 0, + ps_proc->i4_wd_mbs * ps_proc->i4_ht_mbs * sizeof(ps_proc->pu1_deblk_map[0])); + + /* row '-1' */ + memset(ps_proc->pu1_me_map - ps_proc->i4_wd_mbs, 1, + ps_proc->i4_wd_mbs * sizeof(ps_proc->pu1_me_map[0])); + + /* row 0 to ht in mbs */ + memset(ps_proc->pu1_me_map, 0, + ps_proc->i4_wd_mbs * ps_proc->i4_ht_mbs * sizeof(ps_proc->pu1_me_map[0])); + + if(IVE_AIR_MODE_NONE != ps_codec->s_cfg.e_air_mode) + { + ps_codec->i4_air_pic_cnt = + (ps_codec->i4_air_pic_cnt + 1) % ps_codec->s_cfg.u4_air_refresh_period; + + if(!ps_codec->i4_air_pic_cnt) + { + memset(ps_proc->pu1_is_intra_coded, 0, + ps_proc->i4_wd_mbs * ps_proc->i4_ht_mbs * + sizeof(ps_proc->pu1_is_intra_coded[0])); + } + } + + if(ps_codec->s_cfg.e_slice_mode == IVE_SLICE_MODE_NONE) + { + memset(ps_proc->pu1_slice_idx, 0, + ps_proc->i4_wd_mbs * ps_proc->i4_ht_mbs * sizeof(ps_proc->pu1_slice_idx[0])); + } + else if(ps_codec->s_cfg.e_slice_mode == IVE_SLICE_MODE_BLOCKS) + { + UWORD8 *pu1_slice_idx = ps_proc->pu1_slice_idx; + WORD32 i4_mb_y = 0, slice_idx = 0, cnt; + + while(i4_mb_y < ps_proc->i4_ht_mbs) + { + if(i4_mb_y + (WORD32) ps_codec->s_cfg.u4_slice_param < ps_proc->i4_ht_mbs) + { + cnt = ps_codec->s_cfg.u4_slice_param * ps_proc->i4_wd_mbs; + i4_mb_y += ps_codec->s_cfg.u4_slice_param; + } + else + { + cnt = (ps_proc->i4_ht_mbs - i4_mb_y) * ps_proc->i4_wd_mbs; + i4_mb_y += (ps_proc->i4_ht_mbs - i4_mb_y); + } + + memset(pu1_slice_idx, slice_idx, cnt * sizeof(pu1_slice_idx[0])); + + slice_idx++; + pu1_slice_idx += cnt; + } + } + + if((e_pic_type != PIC_IDR) && (e_pic_type != PIC_I)) + { + ps_proc->as_ref_pic_buf_props[L0] = + aps_ref_pic[L0]->ps_layer_yuv_buf_props[u1_spatial_layer_id]; + ps_proc->as_ref_pic_buf_props[L1] = + aps_ref_pic[L1]->ps_layer_yuv_buf_props[u1_spatial_layer_id]; + } + + ps_entropy->i4_gen_header = ps_codec->i4_gen_header && (0 == u1_spatial_layer_id); + ps_entropy->i4_gen_subset_sps = + (ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers > 1) && ps_codec->i4_gen_header; + + /* row '-1' */ + memset(ps_entropy->pu1_entropy_map - ps_proc->i4_wd_mbs, 1, + ps_proc->i4_wd_mbs * sizeof(ps_entropy->pu1_entropy_map[0])); + + /* row 0 to ht in mbs */ + memset(ps_entropy->pu1_entropy_map, 0, + ps_proc->i4_wd_mbs * ps_proc->i4_ht_mbs * sizeof(ps_entropy->pu1_entropy_map[0])); + + isvce_init_cabac_table(ps_entropy); + + ps_entropy->i4_wd_mbs = ps_proc->i4_wd_mbs; + ps_entropy->i4_ht_mbs = ps_proc->i4_ht_mbs; + + ps_entropy->u1_entropy_coding_mode_flag = + ((ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers > 1) && (0 == u1_spatial_layer_id)) + ? CAVLC + : ps_codec->s_cfg.u4_entropy_coding_mode; + + ps_proc->s_entropy.pi4_mb_skip_run[0] = 0; + + ps_entropy->u4_header_bits[MB_TYPE_INTRA] = 0; + ps_entropy->u4_header_bits[MB_TYPE_INTER] = 0; + ps_entropy->u4_residue_bits[MB_TYPE_INTRA] = 0; + ps_entropy->u4_residue_bits[MB_TYPE_INTER] = 0; + + ps_entropy->u1_spatial_layer_id = ps_proc->u1_spatial_layer_id; + + ps_deblk->pu1_slice_idx = ps_proc->pu1_slice_idx; + + ps_me_ctxt->u1_mb_qp = ps_codec->au4_frame_qp[u1_spatial_layer_id]; + + { + UWORD8 u1_min_qp; + UWORD8 u1_max_qp; + + svc_sub_pic_rc_ctxt_t *ps_sub_pic_rc_ctxt = ps_proc->ps_sub_pic_rc_ctxt; + svc_sub_pic_rc_layer_variables_t *ps_layer_variables = + &ps_sub_pic_rc_ctxt->s_sub_pic_rc_variables.s_layer_variables; + + switch(ps_proc->i4_slice_type) + { + case ISLICE: + { + u1_min_qp = ps_codec->s_cfg.au4_i_qp_min[u1_spatial_layer_id]; + u1_max_qp = ps_codec->s_cfg.au4_i_qp_max[u1_spatial_layer_id]; + + break; + } + case PSLICE: + { + u1_min_qp = ps_codec->s_cfg.au4_p_qp_min[u1_spatial_layer_id]; + u1_max_qp = ps_codec->s_cfg.au4_p_qp_max[u1_spatial_layer_id]; + + break; + } + default: + { + u1_min_qp = ps_codec->s_cfg.au4_b_qp_min[u1_spatial_layer_id]; + u1_max_qp = ps_codec->s_cfg.au4_b_qp_max[u1_spatial_layer_id]; + + break; + } + } + + ps_layer_variables->i4_max_num_reference_frames = ps_codec->i4_max_num_reference_frames; + ps_layer_variables->i4_slice_type = ps_proc->i4_slice_type; + ps_layer_variables->i4_frame_num = ps_proc->i4_frame_num; + ps_layer_variables->u1_frame_qp = ps_proc->u1_frame_qp; + ps_layer_variables->u1_spatial_layer_id = u1_spatial_layer_id; + ps_layer_variables->u1_min_qp = u1_min_qp; + ps_layer_variables->u1_max_qp = u1_max_qp; + + isvce_sub_pic_rc_ctxt_layer_init(ps_proc->ps_sub_pic_rc_ctxt); + } + } + + { + job_t s_job; + + s_job.i4_cmd = CMD_PROCESS; + s_job.i2_mb_cnt = + ps_inp_buf->as_layer_yuv_buf_props[u1_spatial_layer_id].u4_width / MB_SIZE; + s_job.i2_mb_x = 0; + + for(i = 0; i < (WORD32) (ps_inp_buf->as_layer_yuv_buf_props[u1_spatial_layer_id].u4_height / + MB_SIZE); + i++) + { + s_job.i2_mb_y = i; + + ret = ih264_list_queue(ps_codec->pv_proc_jobq, &s_job, 1); + + if(ret != IH264_SUCCESS) + { + ps_codec->i4_error_code = ret; + + return IH264E_FAIL; + } + } + + /* Once all the jobs are queued, terminate the queue */ + /* Since the threads are created and deleted in each call, terminating + here is not an issue */ + ih264_list_terminate(ps_codec->pv_proc_jobq); + } + + ps_codec->i4_gen_header = 0; + + return error_status; +} + +/** +******************************************************************************* +* +* @brief initialize process context. +* +* @par Description: +* Before dispatching the current job to process thread, the process context +* associated with the job is initialized. Usually every job aims to encode one +* row of mb's. Basing on the row indices provided by the job, the process +* context's buffer ptrs, slice indices and other elements that are necessary +* during core-coding are initialized. +* +* @param[in] ps_proc +* Pointer to the current process context +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +IH264E_ERROR_T isvce_init_layer_proc_ctxt(isvce_process_ctxt_t *ps_proc) +{ + WORD32 i4_mb_x, i4_mb_y; + + isvce_codec_t *ps_codec = ps_proc->ps_codec; + n_mb_process_ctxt_t *ps_n_mb_ctxt = &ps_proc->s_n_mb_ctxt; + quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0]; + isvce_deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt; + isvce_bs_ctxt_t *ps_bs = &(ps_deblk->s_bs_ctxt); + svc_au_data_t *ps_cur_mv_buf = ps_proc->ps_cur_mv_buf; + + i4_mb_x = ps_proc->i4_mb_x; + i4_mb_y = ps_proc->i4_mb_y; + + ASSERT((ps_codec->s_cfg.u4_wd - ps_codec->s_cfg.u4_disp_wd) == 0); + ASSERT((ps_codec->s_cfg.u4_ht - ps_codec->s_cfg.u4_disp_ht) == 0); + + ps_proc->i4_nmb_ntrpy = ps_proc->i4_wd_mbs; + ps_proc->u4_nmb_me = 1; + + ps_proc->s_src_buf_props = ps_proc->s_src_pic_buf_props; + ps_proc->s_rec_buf_props = ps_proc->s_rec_pic_buf_props; + ps_proc->as_ref_buf_props[0] = ps_proc->as_ref_pic_buf_props[0]; + ps_proc->as_ref_buf_props[1] = ps_proc->as_ref_pic_buf_props[1]; + + ps_proc->s_src_buf_props.as_component_bufs[0].pv_data = + ((UWORD8 *) ps_proc->s_src_buf_props.as_component_bufs[0].pv_data) + (i4_mb_x * MB_SIZE) + + ps_proc->s_src_buf_props.as_component_bufs[0].i4_data_stride * (i4_mb_y * MB_SIZE); + ps_proc->s_src_buf_props.as_component_bufs[1].pv_data = + ((UWORD8 *) ps_proc->s_src_pic_buf_props.as_component_bufs[1].pv_data) + + (i4_mb_x * MB_SIZE) + + ps_proc->s_src_buf_props.as_component_bufs[1].i4_data_stride * (i4_mb_y * BLK8x8SIZE); + + ps_proc->s_rec_buf_props.as_component_bufs[0].pv_data = + ((UWORD8 *) ps_proc->s_rec_buf_props.as_component_bufs[0].pv_data) + (i4_mb_x * MB_SIZE) + + ps_proc->s_rec_buf_props.as_component_bufs[0].i4_data_stride * (i4_mb_y * MB_SIZE); + ps_proc->s_rec_buf_props.as_component_bufs[1].pv_data = + ((UWORD8 *) ps_proc->s_rec_buf_props.as_component_bufs[1].pv_data) + (i4_mb_x * MB_SIZE) + + ps_proc->s_rec_buf_props.as_component_bufs[1].i4_data_stride * (i4_mb_y * BLK8x8SIZE); + + ps_proc->as_ref_buf_props[0].as_component_bufs[0].pv_data = + ((UWORD8 *) ps_proc->as_ref_buf_props[0].as_component_bufs[0].pv_data) + + (i4_mb_x * MB_SIZE) + + ps_proc->as_ref_buf_props[0].as_component_bufs[0].i4_data_stride * (i4_mb_y * MB_SIZE); + ps_proc->as_ref_buf_props[0].as_component_bufs[1].pv_data = + ((UWORD8 *) ps_proc->as_ref_buf_props[0].as_component_bufs[1].pv_data) + + (i4_mb_x * MB_SIZE) + + ps_proc->as_ref_buf_props[0].as_component_bufs[1].i4_data_stride * (i4_mb_y * BLK8x8SIZE); + + ps_proc->as_ref_buf_props[1].as_component_bufs[0].pv_data = + ((UWORD8 *) ps_proc->as_ref_buf_props[1].as_component_bufs[0].pv_data) + + (i4_mb_x * MB_SIZE) + + ps_proc->as_ref_buf_props[1].as_component_bufs[0].i4_data_stride * (i4_mb_y * MB_SIZE); + ps_proc->as_ref_buf_props[1].as_component_bufs[1].pv_data = + ((UWORD8 *) ps_proc->as_ref_buf_props[1].as_component_bufs[1].pv_data) + + (i4_mb_x * MB_SIZE) + + ps_proc->as_ref_buf_props[1].as_component_bufs[1].i4_data_stride * (i4_mb_y * BLK8x8SIZE); + + ps_proc->pv_mb_coeff_data = + ((UWORD8 *) ps_proc->pv_pic_mb_coeff_data) + i4_mb_y * ps_codec->u4_size_coeff_data; + + ps_proc->pv_mb_header_data = + ((UWORD8 *) ps_proc->pv_pic_mb_header_data) + i4_mb_y * ps_codec->u4_size_header_data; + + ps_proc->i4_cur_slice_idx = ps_proc->pu1_slice_idx[i4_mb_y * ps_proc->i4_wd_mbs + i4_mb_x]; + + ps_proc->ps_mb_info = + ps_cur_mv_buf->ps_svc_layer_data[ps_proc->u1_spatial_layer_id].ps_mb_info + + i4_mb_y * ps_proc->i4_wd_mbs; + + ps_proc->ps_col_mb = + ps_proc->aps_mv_buf[1]->ps_svc_layer_data[ps_proc->u1_spatial_layer_id].ps_mb_info + + i4_mb_y * ps_proc->i4_wd_mbs; + + { + ps_proc->s_nbr_info.ps_top_row_mb_info = + ps_proc->s_nbr_info_base.ps_layer_nbr_info[ps_proc->u1_spatial_layer_id] + .ps_top_row_mb_info + + (i4_mb_x + (i4_mb_y - 1) * ps_proc->i4_wd_mbs); + + ps_proc->s_nbr_info.ps_top_mb_intra_modes = + ps_proc->s_nbr_info_base.ps_layer_nbr_info[ps_proc->u1_spatial_layer_id] + .ps_top_mb_intra_modes + + (i4_mb_x + (i4_mb_y - 1) * ps_proc->i4_wd_mbs); + } + + ps_proc->pu4_mb_pu_cnt = + ps_cur_mv_buf->ps_svc_layer_data[ps_proc->u1_spatial_layer_id].pu4_num_pus_in_mb + + (i4_mb_y * ps_proc->i4_wd_mbs); + + ps_proc->ps_mb_info->u2_mb_type = I16x16; + + ps_proc->u4_lambda = gu1_qp0[ps_qp_params->u1_mb_qp]; + + ps_proc->i4_mb_distortion = SHRT_MAX; + + if(i4_mb_x == 0) + { + ps_proc->s_nbr_info.ps_left_mb_info[0].i4_mb_distortion = 0; + } + + ps_proc->i4_mb_cost = INT_MAX; + + ps_deblk->i4_mb_x = ps_proc->i4_mb_x; + /* deblk lags the current mb proc by 1 row */ + /* NOTE: Intra prediction has to happen with non deblocked samples used as + * reference */ + /* Hence to deblk MB 0 of row 0, you have wait till MB 0 of row 1 is encoded. + */ + /* For simplicity, we chose to lag deblking by 1 Row wrt to proc */ + ps_deblk->i4_mb_y = ps_proc->i4_mb_y - 1; + + ps_deblk->s_rec_pic_buf_props = ps_proc->s_rec_pic_buf_props; + + ps_bs->i4_mb_x = ps_proc->i4_mb_x; + ps_bs->i4_mb_y = ps_proc->i4_mb_y; + + ps_n_mb_ctxt->i4_mb_x = 0; + ps_n_mb_ctxt->i4_mb_y = ps_deblk->i4_mb_y; + ps_n_mb_ctxt->i4_n_mbs = ps_proc->i4_nmb_ntrpy; + + return IH264E_SUCCESS; +} + +/** +******************************************************************************* +* +* @brief +* Returns size of buffers for storing SVC ILP data +* +* @param[in] u1_num_spatial_layers +* Num Spatial Layers +* +* @param[in] d_spatial_res_ratio +* Resolution Ratio b/w spatial layers +* +* @param[in] u4_wd +* Input Width +* +* @param[in] u4_ht +* Input Height +* +* @returns Size of buffers +* +******************************************************************************* +*/ +UWORD32 isvce_get_svc_ilp_buf_size(UWORD8 u1_num_spatial_layers, DOUBLE d_spatial_res_ratio, + UWORD32 u4_wd, UWORD32 u4_ht) +{ + WORD32 i; + + UWORD32 u4_size = 0; + + if(u1_num_spatial_layers > 1) + { + /* ps_intra_recon_bufs */ + u4_size += u1_num_spatial_layers * sizeof(yuv_buf_props_t); + + /* ps_residual_bufs */ + u4_size += u1_num_spatial_layers * sizeof(yuv_buf_props_t); + + /* aps_layer_resampler_props[Y] */ + u4_size += u1_num_spatial_layers * sizeof(layer_resampler_props_t); + + /* aps_layer_resampler_props[UV] */ + u4_size += u1_num_spatial_layers * sizeof(layer_resampler_props_t); + + for(i = u1_num_spatial_layers - 1; i >= 0; i--) + { + WORD32 i4_layer_luma_wd = + ((DOUBLE) u4_wd / pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - i)) + 0.99; + WORD32 i4_layer_luma_ht = + ((DOUBLE) u4_ht / pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - i)) + 0.99; + WORD32 i4_layer_luma_samples = + (ALIGN16(i4_layer_luma_wd) + PAD_WD) * (i4_layer_luma_ht + PAD_HT); + WORD32 i4_layer_uv_wd = i4_layer_luma_wd; + WORD32 i4_layer_uv_ht = i4_layer_luma_ht / 2.0 + 0.99; + WORD32 i4_layer_uv_samples = + (ALIGN16(i4_layer_uv_wd) + PAD_WD) * (i4_layer_uv_ht + PAD_HT); + + /* ps_intra_recon_bufs */ + u4_size += (i4_layer_luma_samples + i4_layer_uv_samples) * sizeof(UWORD8); + + /* ps_residual_bufs */ + u4_size += (i4_layer_luma_samples + i4_layer_uv_samples) * sizeof(WORD16); + } + } + else + { + WORD32 i4_layer_luma_wd = u4_wd; + WORD32 i4_layer_luma_ht = u4_ht; + WORD32 i4_layer_luma_samples = + (ALIGN16(i4_layer_luma_wd) + PAD_WD) * (i4_layer_luma_ht + PAD_HT); + WORD32 i4_layer_uv_wd = i4_layer_luma_wd; + WORD32 i4_layer_uv_ht = i4_layer_luma_ht / 2.0 + 0.99; + WORD32 i4_layer_uv_samples = (ALIGN16(i4_layer_uv_wd) + PAD_WD) * (i4_layer_uv_ht + PAD_HT); + + /* ps_residual_bufs */ + u4_size += sizeof(yuv_buf_props_t); + + /* ps_residual_bufs */ + u4_size += (i4_layer_luma_samples + i4_layer_uv_samples) * sizeof(WORD16); + } + + return u4_size; +} + +static void isvce_layer_resampler_props_init(layer_resampler_props_t *ps_layer_props, + DOUBLE d_spatial_res_ratio, UWORD32 u4_wd, + UWORD32 u4_ht, UWORD8 u1_level_idc, + UWORD8 u1_is_chroma) +{ + const UWORD8 u1_ref_layer_field_pic_flag = 0; + const UWORD8 u1_field_pic_flag = 0; + const UWORD8 u1_frame_mbs_only_flag = 1; + const UWORD8 u1_ref_layer_frame_mbs_only_flag = 1; + const UWORD8 u1_bot_field_flag = 0; + const WORD32 i4_scaled_ref_layer_left_offset = 0; + const WORD32 i4_scaled_ref_layer_top_offset = 0; + const WORD32 i4_ref_layer_chroma_phase_x_plus1 = 1; + const WORD32 i4_ref_layer_chroma_phase_y_plus1 = 1; + const WORD32 i4_chroma_phase_x_plus1 = 1; + const WORD32 i4_chroma_phase_y_plus1 = 1; + const WORD32 i4_sub_wd_chroma = 2; + const WORD32 i4_sub_ht_chroma = 2; + UWORD32 u4_ref_wd = (u4_wd / d_spatial_res_ratio); + UWORD32 u4_ref_ht = (u4_ht / d_spatial_res_ratio) * (1 + u1_ref_layer_field_pic_flag); + UWORD32 u4_scaled_wd = u4_wd; + UWORD32 u4_scaled_ht = u4_ht * (1 + u1_field_pic_flag); + + u4_ref_wd = u4_ref_wd >> u1_is_chroma; + u4_ref_ht = u4_ref_ht >> u1_is_chroma; + u4_scaled_wd = u4_scaled_wd >> u1_is_chroma; + u4_scaled_ht = u4_scaled_ht >> u1_is_chroma; + + if(u1_is_chroma) + { + ps_layer_props->i4_refphase_x = i4_ref_layer_chroma_phase_x_plus1 - 1; + ps_layer_props->i4_refphase_y = i4_ref_layer_chroma_phase_y_plus1 - 1; + ps_layer_props->i4_phase_x = i4_chroma_phase_x_plus1 - 1; + ps_layer_props->i4_phase_y = i4_chroma_phase_y_plus1 - 1; + ps_layer_props->u4_sub_wd = i4_sub_wd_chroma; + ps_layer_props->u4_sub_ht = i4_sub_ht_chroma; + ps_layer_props->u4_mb_wd = MB_SIZE >> 1; + ps_layer_props->u4_mb_ht = MB_SIZE >> 1; + } + else + { + ps_layer_props->i4_refphase_x = 0; + ps_layer_props->i4_refphase_y = 0; + ps_layer_props->i4_phase_x = 0; + ps_layer_props->i4_phase_y = 0; + ps_layer_props->u4_sub_wd = 1; + ps_layer_props->u4_sub_ht = 1; + ps_layer_props->u4_mb_wd = MB_SIZE; + ps_layer_props->u4_mb_ht = MB_SIZE; + } + + if(u1_level_idc <= 30) + { + ps_layer_props->u4_shift_x = 16; + ps_layer_props->u4_shift_y = 16; + } + else + { + ps_layer_props->u4_shift_x = 31 - isvcd_get_ceil_log2(u4_ref_wd); + ps_layer_props->u4_shift_y = 31 - isvcd_get_ceil_log2(u4_ref_ht); + } + + if((0 == u1_frame_mbs_only_flag) || (0 == u1_ref_layer_frame_mbs_only_flag)) + { + ps_layer_props->i4_phase_y = ps_layer_props->i4_phase_y + 4 * u1_bot_field_flag; + + if(1 == u1_ref_layer_frame_mbs_only_flag) + { + ps_layer_props->i4_refphase_y = (2 * ps_layer_props->i4_refphase_y) + 2; + } + else + { + ps_layer_props->i4_refphase_y = ps_layer_props->i4_refphase_y + (4 * u1_bot_field_flag); + } + } + + ps_layer_props->u4_scale_x = + ((u4_ref_wd << ps_layer_props->u4_shift_x) + (u4_scaled_wd >> 1)) / (u4_scaled_wd); + ps_layer_props->u4_scale_y = + ((u4_ref_ht << ps_layer_props->u4_shift_y) + (u4_scaled_ht >> 1)) / (u4_scaled_ht); + + ps_layer_props->i4_offset_x = i4_scaled_ref_layer_left_offset / ps_layer_props->u4_sub_wd; + ps_layer_props->i4_add_x = + (((u4_ref_wd * (2 + ps_layer_props->i4_phase_x)) << (ps_layer_props->u4_shift_x - 2)) + + (u4_scaled_wd >> 1)) / + u4_scaled_wd + + (1 << (ps_layer_props->u4_shift_x - 5)); + ps_layer_props->i4_delta_x = 4 * (2 + ps_layer_props->i4_refphase_x); + + if((1 == u1_frame_mbs_only_flag) && (1 == u1_ref_layer_frame_mbs_only_flag)) + { + ps_layer_props->i4_offset_y = i4_scaled_ref_layer_top_offset / ps_layer_props->u4_sub_ht; + ps_layer_props->i4_add_y = + (((u4_ref_ht * (2 + ps_layer_props->i4_phase_y)) << (ps_layer_props->u4_shift_y - 2)) + + (u4_scaled_ht >> 1)) / + u4_scaled_ht + + (1 << (ps_layer_props->u4_shift_y - 5)); + ps_layer_props->i4_delta_y = 4 * (2 + ps_layer_props->i4_refphase_y); + } + else + { + ps_layer_props->i4_offset_y = + i4_scaled_ref_layer_top_offset / (2 * ps_layer_props->u4_sub_ht); + ps_layer_props->i4_add_y = + (((u4_ref_ht * (2 + ps_layer_props->i4_phase_y)) << (ps_layer_props->u4_shift_y - 3)) + + (u4_scaled_ht >> 1)) / + u4_scaled_ht + + (1 << (ps_layer_props->u4_shift_y - 5)); + ps_layer_props->i4_delta_y = 2 * (2 + ps_layer_props->i4_refphase_y); + } +} + +/** +******************************************************************************* +* +* @brief +* Function to initialize svc ilp buffers +* +* @param[in] ps_codec +* Pointer to codec context +* +* @param[in] ps_mem_rec +* Pointer to memory allocated for input buffers +* +******************************************************************************* +*/ +void isvce_svc_ilp_buf_init(isvce_codec_t *ps_codec, iv_mem_rec_t *ps_mem_rec) +{ + UWORD8 u1_num_spatial_layers = ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers; + DOUBLE d_spatial_res_ratio = ps_codec->s_cfg.s_svc_params.d_spatial_res_ratio; + UWORD32 u4_wd = ps_codec->s_cfg.u4_wd; + UWORD32 u4_ht = ps_codec->s_cfg.u4_ht; + UWORD8 *pu1_buf = ps_mem_rec->pv_base; + WORD64 i8_alloc_mem_size = + isvce_get_svc_ilp_buf_size(u1_num_spatial_layers, d_spatial_res_ratio, u4_wd, u4_ht); + + if(u1_num_spatial_layers > 1) + { + WORD32 i, j; + + ps_codec->s_svc_ilp_data.ps_intra_recon_bufs = (yuv_buf_props_t *) pu1_buf; + pu1_buf += u1_num_spatial_layers * sizeof(ps_codec->s_svc_ilp_data.ps_intra_recon_bufs[0]); + i8_alloc_mem_size -= + u1_num_spatial_layers * sizeof(ps_codec->s_svc_ilp_data.ps_intra_recon_bufs[0]); + + ps_codec->s_svc_ilp_data.ps_residual_bufs = (yuv_buf_props_t *) pu1_buf; + pu1_buf += u1_num_spatial_layers * sizeof(ps_codec->s_svc_ilp_data.ps_residual_bufs[0]); + i8_alloc_mem_size -= + u1_num_spatial_layers * sizeof(ps_codec->s_svc_ilp_data.ps_residual_bufs[0]); + + for(i = 0; i < NUM_SP_COMPONENTS; i++) + { + ps_codec->s_svc_ilp_data.aps_layer_resampler_props[i] = + (layer_resampler_props_t *) pu1_buf; + pu1_buf += u1_num_spatial_layers * + sizeof(ps_codec->s_svc_ilp_data.aps_layer_resampler_props[i][0]); + i8_alloc_mem_size -= u1_num_spatial_layers * + sizeof(ps_codec->s_svc_ilp_data.aps_layer_resampler_props[i][0]); + } + + ASSERT(i8_alloc_mem_size >= 0); + + for(i = u1_num_spatial_layers - 1; i >= 0; i--) + { + WORD32 i4_stride; + + WORD32 i4_layer_luma_wd = + ((DOUBLE) u4_wd / pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - i)) + 0.99; + WORD32 i4_layer_luma_ht = + ((DOUBLE) u4_ht / pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - i)) + 0.99; + WORD32 i4_layer_luma_samples = + (ALIGN16(i4_layer_luma_wd) + PAD_WD) * (i4_layer_luma_ht + PAD_HT); + WORD32 i4_layer_uv_wd = i4_layer_luma_wd; + WORD32 i4_layer_uv_ht = i4_layer_luma_ht / 2.0 + 0.99; + WORD32 i4_layer_uv_samples = + (ALIGN16(i4_layer_uv_wd) + PAD_WD) * (i4_layer_uv_ht + PAD_HT); + + ps_codec->s_svc_ilp_data.ps_intra_recon_bufs[i].u4_width = i4_layer_luma_wd; + ps_codec->s_svc_ilp_data.ps_intra_recon_bufs[i].u4_height = i4_layer_luma_ht; + ps_codec->s_svc_ilp_data.ps_intra_recon_bufs[i].e_color_format = IV_YUV_420SP_UV; + ps_codec->s_svc_ilp_data.ps_intra_recon_bufs[i].u1_bit_depth = 8; + + i4_stride = ALIGN16(i4_layer_luma_wd) + PAD_WD; + ps_codec->s_svc_ilp_data.ps_intra_recon_bufs[i].as_component_bufs[Y].pv_data = + pu1_buf + PAD_LEFT + PAD_TOP * i4_stride; + ps_codec->s_svc_ilp_data.ps_intra_recon_bufs[i].as_component_bufs[Y].i4_data_stride = + ALIGN16(i4_layer_luma_wd) + PAD_WD; + pu1_buf += i4_layer_luma_samples * sizeof(UWORD8); + i8_alloc_mem_size -= i4_layer_luma_samples * sizeof(UWORD8); + + i4_stride = ALIGN16(i4_layer_uv_wd) + PAD_WD; + ps_codec->s_svc_ilp_data.ps_intra_recon_bufs[i].as_component_bufs[UV].pv_data = + pu1_buf + PAD_LEFT + PAD_TOP * i4_stride; + ps_codec->s_svc_ilp_data.ps_intra_recon_bufs[i].as_component_bufs[UV].i4_data_stride = + ALIGN16(i4_layer_uv_wd) + PAD_WD; + pu1_buf += i4_layer_uv_samples * sizeof(UWORD8); + i8_alloc_mem_size -= i4_layer_uv_samples * sizeof(UWORD8); + + ps_codec->s_svc_ilp_data.ps_residual_bufs[i].u4_width = i4_layer_luma_wd; + ps_codec->s_svc_ilp_data.ps_residual_bufs[i].u4_height = i4_layer_luma_ht; + ps_codec->s_svc_ilp_data.ps_residual_bufs[i].e_color_format = IV_YUV_420SP_UV; + ps_codec->s_svc_ilp_data.ps_residual_bufs[i].u1_bit_depth = 10; + + i4_stride = ALIGN16(i4_layer_luma_wd) + PAD_WD; + ps_codec->s_svc_ilp_data.ps_residual_bufs[i].as_component_bufs[Y].pv_data = + pu1_buf + (PAD_LEFT + PAD_TOP * i4_stride) * (sizeof(WORD16) / sizeof(pu1_buf[0])); + ps_codec->s_svc_ilp_data.ps_residual_bufs[i].as_component_bufs[Y].i4_data_stride = + i4_stride; + pu1_buf += i4_layer_luma_samples * sizeof(WORD16); + i8_alloc_mem_size -= i4_layer_luma_samples * sizeof(WORD16); + + i4_stride = ALIGN16(i4_layer_uv_wd) + PAD_WD; + ps_codec->s_svc_ilp_data.ps_residual_bufs[i].as_component_bufs[UV].pv_data = + pu1_buf + (PAD_LEFT + PAD_TOP * i4_stride) * (sizeof(WORD16) / sizeof(pu1_buf[0])); + ps_codec->s_svc_ilp_data.ps_residual_bufs[i].as_component_bufs[UV].i4_data_stride = + i4_stride; + pu1_buf += i4_layer_uv_samples * sizeof(WORD16); + i8_alloc_mem_size -= i4_layer_uv_samples * sizeof(WORD16); + + ps_codec->s_svc_ilp_data.ps_residual_bufs[i].as_component_bufs[V].pv_data = NULL; + + ASSERT(i8_alloc_mem_size >= 0); + + if(i >= 1) + { + for(j = 0; j < NUM_SP_COMPONENTS; j++) + { + isvce_layer_resampler_props_init( + &ps_codec->s_svc_ilp_data.aps_layer_resampler_props[j][i], + d_spatial_res_ratio, i4_layer_luma_wd, i4_layer_luma_ht, + ps_codec->s_cfg.u4_max_level, ((COMPONENT_TYPE) j) == UV); + } + } + } + } + else + { + WORD32 i4_stride; + + WORD32 i4_layer_luma_wd = u4_wd; + WORD32 i4_layer_luma_ht = u4_ht; + WORD32 i4_layer_luma_samples = + (ALIGN16(i4_layer_luma_wd) + PAD_WD) * (i4_layer_luma_ht + PAD_HT); + WORD32 i4_layer_uv_wd = i4_layer_luma_wd; + WORD32 i4_layer_uv_ht = i4_layer_luma_ht / 2.0 + 0.99; + WORD32 i4_layer_uv_samples = (ALIGN16(i4_layer_uv_wd) + PAD_WD) * (i4_layer_uv_ht + PAD_HT); + + ps_codec->s_svc_ilp_data.ps_residual_bufs = (yuv_buf_props_t *) pu1_buf; + pu1_buf += sizeof(ps_codec->s_svc_ilp_data.ps_residual_bufs[0]); + i8_alloc_mem_size -= sizeof(ps_codec->s_svc_ilp_data.ps_residual_bufs[0]); + + ASSERT(i8_alloc_mem_size >= 0); + + ps_codec->s_svc_ilp_data.ps_residual_bufs[0].u4_width = i4_layer_luma_wd; + ps_codec->s_svc_ilp_data.ps_residual_bufs[0].u4_height = i4_layer_luma_ht; + ps_codec->s_svc_ilp_data.ps_residual_bufs[0].e_color_format = IV_YUV_420SP_UV; + ps_codec->s_svc_ilp_data.ps_residual_bufs[0].u1_bit_depth = 10; + + i4_stride = ALIGN16(i4_layer_luma_wd) + PAD_WD; + ps_codec->s_svc_ilp_data.ps_residual_bufs[0].as_component_bufs[Y].pv_data = + pu1_buf + (PAD_LEFT + PAD_TOP * i4_stride) * (sizeof(WORD16) / sizeof(pu1_buf[0])); + ps_codec->s_svc_ilp_data.ps_residual_bufs[0].as_component_bufs[Y].i4_data_stride = + i4_stride; + pu1_buf += i4_layer_luma_samples * sizeof(WORD16); + i8_alloc_mem_size -= i4_layer_luma_samples * sizeof(WORD16); + + i4_stride = ALIGN16(i4_layer_uv_wd) + PAD_WD; + ps_codec->s_svc_ilp_data.ps_residual_bufs[0].as_component_bufs[UV].pv_data = + pu1_buf + (PAD_LEFT + PAD_TOP * i4_stride) * (sizeof(WORD16) / sizeof(pu1_buf[0])); + ps_codec->s_svc_ilp_data.ps_residual_bufs[0].as_component_bufs[UV].i4_data_stride = + i4_stride; + pu1_buf += i4_layer_uv_samples * sizeof(WORD16); + i8_alloc_mem_size -= i4_layer_uv_samples * sizeof(WORD16); + + ps_codec->s_svc_ilp_data.ps_residual_bufs[0].as_component_bufs[V].pv_data = NULL; + + ASSERT(i8_alloc_mem_size >= 0); + } +} + +static FORCEINLINE UWORD32 isvce_get_residual_csbf(mem_fxns_t *ps_mem_fxns, + buffer_container_t *ps_comp_buf) +{ + WORD32 i; + + UWORD32 u4_csbf = 0; + + for(i = 0; i < MAX_TU_IN_MB; i++) + { + UWORD8 u1_zscan_idx = gau1_raster_to_zscan_map[i]; + UWORD8 u1_offset_x = (i % MAX_TU_IN_MB_ROW) * MIN_TU_SIZE; + UWORD8 u1_offset_y = (i / MAX_TU_IN_MB_ROW) * MIN_TU_SIZE; + WORD16 *pi2_res = ((WORD16 *) ps_comp_buf->pv_data) + u1_offset_x + + u1_offset_y * ps_comp_buf->i4_data_stride; + UWORD8 u1_cbf = ps_mem_fxns->pf_nonzero_checker( + (UWORD8 *) pi2_res, ps_comp_buf->i4_data_stride * (sizeof(WORD16) / sizeof(UWORD8)), + MIN_TU_SIZE * (sizeof(WORD16) / sizeof(UWORD8)), MIN_TU_SIZE); + + u4_csbf |= (u1_cbf << u1_zscan_idx); + } + + return u4_csbf; +} + +/** +******************************************************************************* +* +* @brief +* Function to update svc ilp buffers after every MB +* +* @param[in] ps_proc +* Pointer to process context +* +******************************************************************************* +*/ +void isvce_svc_ilp_buf_update(isvce_process_ctxt_t *ps_proc) +{ + isvce_codec_t *ps_codec = ps_proc->ps_codec; + svc_params_t *ps_svc_params = &ps_codec->s_cfg.s_svc_params; + + UWORD8 u1_spatial_layer_id = ps_proc->u1_spatial_layer_id; + + if(ps_svc_params->u1_num_spatial_layers > 1) + { + buffer_container_t s_src; + buffer_container_t s_dst; + + WORD32 i; + + svc_ilp_data_t *ps_svc_ilp_data = &ps_codec->s_svc_ilp_data; + isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns; + mem_fxns_t *ps_mem_fxns = &ps_isa_dependent_fxns->s_mem_fxns; + yuv_buf_props_t *ps_residual_buf = + &ps_codec->s_svc_ilp_data.ps_residual_bufs[u1_spatial_layer_id]; + + WORD32 i4_mb_x = ps_proc->i4_mb_x; + WORD32 i4_mb_y = ps_proc->i4_mb_y; + + ASSERT(ps_proc->s_rec_buf_props.e_color_format == IV_YUV_420SP_UV); + + if(u1_spatial_layer_id < (ps_svc_params->u1_num_spatial_layers - 1)) + { + if(ps_proc->ps_mb_info->u1_is_intra) + { + for(i = 0; i < NUM_SP_COMPONENTS; i++) + { + UWORD8 u1_is_chroma = (Y != ((COMPONENT_TYPE) i)); + + s_src = ps_proc->s_rec_buf_props.as_component_bufs[i]; + + s_dst.i4_data_stride = ps_svc_ilp_data->ps_intra_recon_bufs[u1_spatial_layer_id] + .as_component_bufs[i] + .i4_data_stride; + s_dst.pv_data = + ((UWORD8 *) ps_svc_ilp_data->ps_intra_recon_bufs[u1_spatial_layer_id] + .as_component_bufs[i] + .pv_data) + + i4_mb_x * MB_SIZE + + i4_mb_y * (MB_SIZE >> u1_is_chroma) * s_dst.i4_data_stride; + + ps_mem_fxns->pf_copy_2d((UWORD8 *) s_dst.pv_data, s_dst.i4_data_stride, + (UWORD8 *) s_src.pv_data, s_src.i4_data_stride, MB_SIZE, + (MB_SIZE >> u1_is_chroma)); + } + } + else + { + for(i = 0; i < NUM_SP_COMPONENTS; i++) + { + UWORD8 u1_is_chroma = (Y != ((COMPONENT_TYPE) i)); + + s_dst.i4_data_stride = ps_svc_ilp_data->ps_intra_recon_bufs[u1_spatial_layer_id] + .as_component_bufs[i] + .i4_data_stride; + s_dst.pv_data = + ((UWORD8 *) ps_svc_ilp_data->ps_intra_recon_bufs[u1_spatial_layer_id] + .as_component_bufs[i] + .pv_data) + + i4_mb_x * MB_SIZE + + i4_mb_y * (MB_SIZE >> u1_is_chroma) * s_dst.i4_data_stride; + + ps_mem_fxns->pf_memset_2d((UWORD8 *) s_dst.pv_data, s_dst.i4_data_stride, 0, + MB_SIZE, (MB_SIZE >> u1_is_chroma)); + } + } + } + + if(ENABLE_RESIDUAL_PREDICTION && (ps_proc->i4_slice_type != ISLICE) && + (u1_spatial_layer_id < (ps_svc_params->u1_num_spatial_layers - 1))) + { + if(ps_proc->ps_mb_info->u1_is_intra || (ps_proc->ps_mb_info->u2_mb_type == PSKIP) || + (ps_proc->ps_mb_info->u2_mb_type == BSKIP)) + { + for(i = 0; i < NUM_SP_COMPONENTS; i++) + { + buffer_container_t *ps_comp_buf; + + WORD16 *pi2_res; + + UWORD8 u1_is_chroma = (Y != ((COMPONENT_TYPE) i)); + + ps_comp_buf = &ps_residual_buf->as_component_bufs[u1_is_chroma ? UV : Y]; + pi2_res = + ((WORD16 *) ps_comp_buf->pv_data) + ps_proc->i4_mb_x * MB_SIZE + + ps_proc->i4_mb_y * (MB_SIZE >> u1_is_chroma) * ps_comp_buf->i4_data_stride; + + ps_mem_fxns->pf_memset_2d( + (UWORD8 *) pi2_res, + ps_comp_buf->i4_data_stride * (sizeof(WORD16) / sizeof(UWORD8)), 0, + MB_SIZE * (sizeof(WORD16) / sizeof(UWORD8)), MB_SIZE >> u1_is_chroma); + } + } + } + + if(ENABLE_RESIDUAL_PREDICTION && (u1_spatial_layer_id > 0) && + !(ps_proc->ps_mb_info->u1_is_intra || (ps_proc->ps_mb_info->u2_mb_type == PSKIP) || + (ps_proc->ps_mb_info->u2_mb_type == BSKIP))) + { + s_src = ps_residual_buf->as_component_bufs[Y]; + s_src.pv_data = ((WORD16 *) s_src.pv_data) + ps_proc->i4_mb_x * MB_SIZE + + ps_proc->i4_mb_y * MB_SIZE * s_src.i4_data_stride; + + ps_proc->ps_mb_info->u4_res_csbp = isvce_get_residual_csbf(ps_mem_fxns, &s_src); + } + else + { + ps_proc->ps_mb_info->u4_res_csbp = 0; + } + } + else + { + ps_proc->ps_mb_info->u4_res_csbp = 0; + } +} + +/* + * Padding has a one MB row dependency on deblock which + * in turn has a one MB row dependency on encode + */ +static IH264E_ERROR_T isvce_pad_frame(isvce_process_ctxt_t *ps_proc, yuv_buf_props_t *ps_pad_buf) +{ + /* codec context */ + isvce_codec_t *ps_codec = ps_proc->ps_codec; + + WORD32 i4_element_size = (ps_pad_buf->u1_bit_depth > 8) ? 2 : 1; + + /* src buffers luma */ + WORD32 i4_luma_stride = ps_pad_buf->as_component_bufs[0].i4_data_stride * i4_element_size; + UWORD8 *pu1_curr_pic_luma = (UWORD8 *) (ps_pad_buf->as_component_bufs[0].pv_data); + + /* src buffers chroma */ + WORD32 i4_chroma_stride = ps_pad_buf->as_component_bufs[1].i4_data_stride * i4_element_size; + UWORD8 *pu1_curr_pic_chroma = (UWORD8 *) (ps_pad_buf->as_component_bufs[1].pv_data); + + WORD32 i4_bottom_offset_luma = ps_pad_buf->u4_height * i4_luma_stride; + WORD32 i4_bottom_offset_chroma = (ps_pad_buf->u4_height >> 1) * i4_chroma_stride; + + /* Pad left */ + ps_codec->pf_pad_left_luma(pu1_curr_pic_luma, i4_luma_stride, ps_pad_buf->u4_height, + PAD_LEFT * i4_element_size); + ps_codec->pf_pad_left_chroma(pu1_curr_pic_chroma, i4_chroma_stride, ps_pad_buf->u4_height >> 1, + PAD_LEFT * i4_element_size); + + /* Pad right */ + ps_codec->pf_pad_right_luma(pu1_curr_pic_luma + ps_pad_buf->u4_width * i4_element_size, + i4_luma_stride, ps_pad_buf->u4_height, PAD_RIGHT * i4_element_size); + ps_codec->pf_pad_right_chroma(pu1_curr_pic_chroma + ps_pad_buf->u4_width * i4_element_size, + i4_chroma_stride, ps_pad_buf->u4_height >> 1, + PAD_RIGHT * i4_element_size); + + /* Pad top */ + ps_codec->pf_pad_top(pu1_curr_pic_luma - (PAD_LEFT * i4_element_size), i4_luma_stride, + (ps_pad_buf->u4_width + PAD_WD) * i4_element_size, PAD_TOP); + ps_codec->pf_pad_top(pu1_curr_pic_chroma - (PAD_LEFT * i4_element_size), i4_chroma_stride, + (ps_pad_buf->u4_width + PAD_WD) * i4_element_size, PAD_TOP >> 1); + + /* Pad bottom */ + ps_codec->pf_pad_bottom( + pu1_curr_pic_luma + i4_bottom_offset_luma - (PAD_LEFT * i4_element_size), i4_luma_stride, + (ps_pad_buf->u4_width + PAD_WD) * i4_element_size, PAD_BOT); + ps_codec->pf_pad_bottom( + pu1_curr_pic_chroma + i4_bottom_offset_chroma - (PAD_LEFT * i4_element_size), + i4_chroma_stride, (ps_pad_buf->u4_width + PAD_WD) * i4_element_size, PAD_BOT >> 1); + + return IH264E_SUCCESS; +} + +void isvce_svc_pad_frame(isvce_process_ctxt_t *ps_proc) +{ + isvce_codec_t *ps_codec = ps_proc->ps_codec; + + isvce_pad_frame(ps_proc, &(ps_proc->s_rec_pic_buf_props)); + + if(ps_proc->s_svc_params.u1_num_spatial_layers > 1) + { + isvce_pad_frame( + ps_proc, &(ps_codec->s_svc_ilp_data.ps_intra_recon_bufs[ps_proc->u1_spatial_layer_id])); + isvce_pad_frame(ps_proc, + &(ps_codec->s_svc_ilp_data.ps_residual_bufs[ps_proc->u1_spatial_layer_id])); + } +} + +/** +******************************************************************************* +* +* @brief +* Initialize AIR mb frame Map +* +* @par Description: +* Initialize AIR mb frame map +* MB frame map indicates which frame an Mb should be coded as intra according +*to AIR +* +* @param[in] ps_codec +* Pointer to codec context +* +* @returns error_status +* +* @remarks +* +* +******************************************************************************* +*/ +IH264E_ERROR_T isvce_init_air_map(isvce_codec_t *ps_codec) +{ + /* intra refresh map */ + UWORD16 *pu2_intr_rfrsh_map = ps_codec->pu2_intr_rfrsh_map; + + /* air mode */ + IVE_AIR_MODE_T air_mode = ps_codec->s_cfg.e_air_mode; + + /* refresh period */ + UWORD32 air_period = ps_codec->s_cfg.u4_air_refresh_period; + + /* mb cnt */ + UWORD32 u4_mb_cnt = ps_codec->s_cfg.i4_wd_mbs * ps_codec->s_cfg.i4_ht_mbs; + + /* temp var */ + UWORD32 curr_mb, seed_rand = 1; + + switch(air_mode) + { + case IVE_AIR_MODE_CYCLIC: + + for(curr_mb = 0; curr_mb < u4_mb_cnt; curr_mb++) + { + pu2_intr_rfrsh_map[curr_mb] = curr_mb % air_period; + } + break; + + case IVE_AIR_MODE_RANDOM: + + for(curr_mb = 0; curr_mb < u4_mb_cnt; curr_mb++) + { + seed_rand = (seed_rand * 32719 + 3) % 32749; + pu2_intr_rfrsh_map[curr_mb] = seed_rand % air_period; + } + break; + + default: + + break; + } + + return IH264E_SUCCESS; +} + +/** +****************************************************************************** +* +* @brief +* derivation process for macroblock availability +* +* @par Description +* Calculates the availability of the left, top, topright and topleft macroblocks. +* +* @param[in] ps_proc_ctxt +* pointer to proc context (handle) +* +* @remarks Based on section 6.4.5 in H264 spec +* +* @return none +* +****************************************************************************** +*/ +void isvce_derive_nghbr_avbl_of_mbs(isvce_process_ctxt_t *ps_proc) +{ + UWORD8 *pu1_slice_idx_curr = ps_proc->pu1_slice_idx; + UWORD8 *pu1_slice_idx_b; + UWORD8 *pu1_slice_idx_a; + UWORD8 *pu1_slice_idx_c; + UWORD8 *pu1_slice_idx_d; + block_neighbors_t *ps_ngbr_avbl; + WORD32 i4_mb_x, i4_mb_y; + WORD32 i4_wd_mbs; + + i4_mb_x = ps_proc->i4_mb_x; + i4_mb_y = ps_proc->i4_mb_y; + + i4_wd_mbs = ps_proc->i4_wd_mbs; + + pu1_slice_idx_curr += (i4_mb_y * i4_wd_mbs) + i4_mb_x; + pu1_slice_idx_a = pu1_slice_idx_curr - 1; + pu1_slice_idx_b = pu1_slice_idx_curr - i4_wd_mbs; + pu1_slice_idx_c = pu1_slice_idx_b + 1; + pu1_slice_idx_d = pu1_slice_idx_b - 1; + ps_ngbr_avbl = ps_proc->ps_ngbr_avbl; + + /**********************************************************************/ + /* The macroblock is marked as available, unless one of the following */ + /* conditions is true in which case the macroblock shall be marked as */ + /* not available. */ + /* 1. mbAddr < 0 */ + /* 2 mbAddr > CurrMbAddr */ + /* 3. the macroblock with address mbAddr belongs to a different slice */ + /* than the macroblock with address CurrMbAddr */ + /**********************************************************************/ + + /* left macroblock availability */ + if(i4_mb_x == 0) + { /* macroblocks along first column */ + ps_ngbr_avbl->u1_mb_a = 0; + } + else + { /* macroblocks belong to same slice? */ + if(*pu1_slice_idx_a != *pu1_slice_idx_curr) + ps_ngbr_avbl->u1_mb_a = 0; + else + ps_ngbr_avbl->u1_mb_a = 1; + } + + /* top macroblock availability */ + if(i4_mb_y == 0) + { /* macroblocks along first row */ + ps_ngbr_avbl->u1_mb_b = 0; + } + else + { /* macroblocks belong to same slice? */ + if(*pu1_slice_idx_b != *pu1_slice_idx_curr) + ps_ngbr_avbl->u1_mb_b = 0; + else + ps_ngbr_avbl->u1_mb_b = 1; + } + + /* top right macroblock availability */ + if(i4_mb_x == i4_wd_mbs - 1 || i4_mb_y == 0) + { /* macroblocks along last column */ + ps_ngbr_avbl->u1_mb_c = 0; + } + else + { /* macroblocks belong to same slice? */ + if(*pu1_slice_idx_c != *pu1_slice_idx_curr) + ps_ngbr_avbl->u1_mb_c = 0; + else + ps_ngbr_avbl->u1_mb_c = 1; + } + + /* top left macroblock availability */ + if(i4_mb_x == 0 || i4_mb_y == 0) + { /* macroblocks along first column */ + ps_ngbr_avbl->u1_mb_d = 0; + } + else + { /* macroblocks belong to same slice? */ + if(*pu1_slice_idx_d != *pu1_slice_idx_curr) + ps_ngbr_avbl->u1_mb_d = 0; + else + ps_ngbr_avbl->u1_mb_d = 1; + } +} + +/** +******************************************************************************* +* +* @brief +* Codec level initializations +* +* @par Description: +* Initializes the codec with parameters that needs to be set before encoding +* first frame +* +* @param[in] ps_codec +* Pointer to codec context +* +* @param[in] ps_inp_buf +* Pointer to input buffer context +* +* @returns error_status +* +* @remarks +* +* +******************************************************************************* +*/ +IH264E_ERROR_T isvce_codec_init(isvce_codec_t *ps_codec) +{ + isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns; + enc_loop_fxns_t *ps_enc_loop_fxns = &ps_isa_dependent_fxns->s_enc_loop_fxns; + WORD8 i; + + /******************************************************************** + * INITIALIZE CODEC CONTEXT * + ********************************************************************/ + /* encoder presets */ + if(ps_codec->s_cfg.u4_enc_speed_preset != IVE_CONFIG) + { + if(ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST) + { /* high quality */ + /* enable diamond search */ + ps_codec->s_cfg.u4_me_speed_preset = DMND_SRCH; + ps_codec->s_cfg.u4_enable_fast_sad = 0; + + /* disable intra 4x4 */ + ps_codec->s_cfg.u4_enable_intra_4x4 = 1; + if(!FORCE_FAST_INTRA4X4) + { + ps_enc_loop_fxns->apf_luma_energy_compaction[1] = + isvce_code_luma_intra_macroblock_4x4_rdopt_on; + } + + /* sub pel off */ + ps_codec->s_cfg.u4_enable_hpel = 1; + + /* disabled intra inter gating in Inter slices */ + ps_codec->u4_inter_gate = 0; + } + else if(ps_codec->s_cfg.u4_enc_speed_preset == IVE_NORMAL) + { /* normal */ + /* enable diamond search */ + ps_codec->s_cfg.u4_me_speed_preset = DMND_SRCH; + ps_codec->s_cfg.u4_enable_fast_sad = 0; + + /* disable intra 4x4 */ + ps_codec->s_cfg.u4_enable_intra_4x4 = 1; + + /* sub pel off */ + ps_codec->s_cfg.u4_enable_hpel = 1; + + /* disabled intra inter gating in Inter slices */ + ps_codec->u4_inter_gate = 0; + } + else if(ps_codec->s_cfg.u4_enc_speed_preset == IVE_FAST) + { /* normal */ + /* enable diamond search */ + ps_codec->s_cfg.u4_me_speed_preset = DMND_SRCH; + ps_codec->s_cfg.u4_enable_fast_sad = 0; + + /* disable intra 4x4 */ + ps_codec->s_cfg.u4_enable_intra_4x4 = 0; + + /* sub pel off */ + ps_codec->s_cfg.u4_enable_hpel = 1; + + /* disabled intra inter gating in Inter slices */ + ps_codec->u4_inter_gate = 1; + } + else if(ps_codec->s_cfg.u4_enc_speed_preset == IVE_HIGH_SPEED) + { /* fast */ + /* enable diamond search */ + ps_codec->s_cfg.u4_me_speed_preset = DMND_SRCH; + ps_codec->s_cfg.u4_enable_fast_sad = 0; + + /* disable intra 4x4 */ + ps_codec->s_cfg.u4_enable_intra_4x4 = 0; + + /* sub pel off */ + ps_codec->s_cfg.u4_enable_hpel = 0; + + /* disabled intra inter gating in Inter slices */ + ps_codec->u4_inter_gate = 0; + } + else if(ps_codec->s_cfg.u4_enc_speed_preset == IVE_FASTEST) + { /* fastest */ + /* enable diamond search */ + ps_codec->s_cfg.u4_me_speed_preset = DMND_SRCH; + + /* disable intra 4x4 */ + ps_codec->s_cfg.u4_enable_intra_4x4 = 0; + + /* sub pel off */ + ps_codec->s_cfg.u4_enable_hpel = 0; + + /* disabled intra inter gating in Inter slices */ + ps_codec->u4_inter_gate = 1; + } + } + + /***************************************************************** + * Initialize AIR inside codec + *****************************************************************/ + if(IVE_AIR_MODE_NONE != ps_codec->s_cfg.e_air_mode) + { + isvce_init_air_map(ps_codec); + + ps_codec->i4_air_pic_cnt = -1; + } + + /****************************************************/ + /* INITIALIZE RATE CONTROL */ + /****************************************************/ + { + for(i = 0; i < MAX_NUM_SPATIAL_LAYERS; i++) + { + UWORD8 au1_init_qp[MAX_PIC_TYPE]; + UWORD8 au1_min_max_qp[2 * MAX_PIC_TYPE]; + UWORD8 au1_min_max_avc_qp[2 * MAX_PIC_TYPE]; + + /* update rc lib with modified qp */ + au1_init_qp[0] = gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.au4_i_qp[i]]; + au1_init_qp[1] = gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.au4_p_qp[i]]; + au1_init_qp[2] = gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.au4_b_qp[i]]; + + au1_min_max_qp[2 * I_PIC] = gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.au4_i_qp_min[i]]; + au1_min_max_qp[2 * I_PIC + 1] = + gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.au4_i_qp_max[i]]; + + au1_min_max_qp[2 * P_PIC] = gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.au4_p_qp_min[i]]; + au1_min_max_qp[2 * P_PIC + 1] = + gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.au4_p_qp_max[i]]; + + au1_min_max_qp[2 * B_PIC] = gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.au4_b_qp_min[i]]; + au1_min_max_qp[2 * B_PIC + 1] = + gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.au4_b_qp_max[i]]; + + /* get rc mode */ + switch(ps_codec->s_cfg.e_rc_mode) + { + case IVE_RC_STORAGE: + ps_codec->s_rate_control.e_rc_type = VBR_STORAGE; + break; + case IVE_RC_CBR_NON_LOW_DELAY: + ps_codec->s_rate_control.e_rc_type = CBR_NLDRC; + break; + case IVE_RC_CBR_LOW_DELAY: + ps_codec->s_rate_control.e_rc_type = CBR_LDRC; + break; + case IVE_RC_NONE: + ps_codec->s_rate_control.e_rc_type = CONST_QP; + break; + default: + break; + } + + ps_codec->u1_enable_init_qp = DEFAULT_INIT_QP; + + /* init rate control */ + isvce_rc_init( + ps_codec->s_rate_control.apps_rate_control_api[i], + ps_codec->s_rate_control.pps_frame_time, ps_codec->s_rate_control.pps_time_stamp, + ps_codec->s_rate_control.pps_pd_frm_rate, ps_codec->s_cfg.u4_max_framerate, + ps_codec->s_cfg.u4_src_frame_rate, ps_codec->s_cfg.u4_tgt_frame_rate, + ps_codec->s_rate_control.e_rc_type, ps_codec->s_cfg.au4_target_bitrate[i], + ps_codec->s_cfg.au4_max_bitrate[i], ps_codec->s_cfg.au4_vbv_buffer_delay[i], + ps_codec->s_cfg.u4_i_frm_interval, ps_codec->s_cfg.u4_num_bframes + 1, au1_init_qp, + ps_codec->s_cfg.u4_num_bframes + 2, au1_min_max_qp, + MAX(ps_codec->s_cfg.u4_max_level, + (UWORD32) ih264e_get_min_level(ps_codec->s_cfg.u4_max_wd, + ps_codec->s_cfg.u4_max_ht))); + + au1_min_max_avc_qp[2 * I_PIC] = ps_codec->s_cfg.au4_i_qp_min[i]; + au1_min_max_avc_qp[2 * I_PIC + 1] = ps_codec->s_cfg.au4_i_qp_max[i]; + + au1_min_max_avc_qp[2 * P_PIC] = ps_codec->s_cfg.au4_p_qp_min[i]; + au1_min_max_avc_qp[2 * P_PIC + 1] = ps_codec->s_cfg.au4_p_qp_max[i]; + + au1_min_max_avc_qp[2 * B_PIC] = ps_codec->s_cfg.au4_b_qp_min[i]; + au1_min_max_avc_qp[2 * B_PIC + 1] = ps_codec->s_cfg.au4_b_qp_max[i]; + + irc_change_qp_constraints(ps_codec->s_rate_control.apps_rate_control_api[i], + au1_min_max_qp, au1_min_max_avc_qp); + } + } + + /* recon stride */ + ps_codec->i4_rec_strd = ALIGN16(ps_codec->s_cfg.u4_max_wd) + PAD_WD; + + /* max ref and reorder cnt */ + ps_codec->i4_ref_buf_cnt = ps_codec->s_cfg.u4_max_ref_cnt + ps_codec->s_cfg.u4_max_reorder_cnt; + ps_codec->i4_ref_buf_cnt += MAX_CTXT_SETS; + ps_codec->i4_ref_buf_cnt += ps_codec->s_cfg.s_svc_params.u1_num_temporal_layers; + + DEBUG_HISTOGRAM_INIT(); + + /* Init dependecy vars */ + ps_codec->i4_last_inp_buff_received = 0; + + /* At codec start no IDR is pending */ + ps_codec->i4_pending_idr_flag = 0; + + for(i = 0; i < ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers - 1; i++) + { + ps_codec->au4_constrained_intra_pred[i] = 1; + } + + ps_codec->au4_constrained_intra_pred[ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers - 1] = + 0; + + return IH264E_SUCCESS; +} + +/** +******************************************************************************* +* +* @brief update encoder configuration parameters +* +* @par Description: +* updates encoder configuration parameters from the given config set. +* Initialize/reinitialize codec parameters according to new configurations. +* +* @param[in] ps_codec +* Pointer to codec context +* +* @param[in] ps_cfg +* Pointer to config param set +* +* @remarks none +* +******************************************************************************* +*/ +IH264E_ERROR_T isvce_codec_update_config(isvce_codec_t *ps_codec, isvce_cfg_params_t *ps_cfg) +{ + /* config params */ + isvce_cfg_params_t *ps_curr_cfg = &ps_codec->s_cfg; + + /* error status */ + IH264E_ERROR_T err = IH264E_SUCCESS; + + /* temp var */ + UWORD32 u4_init_rc = 0; + + WORD8 i; + + /***********************/ + /* UPDATE CODEC CONFIG */ + /***********************/ + if(ps_cfg->e_cmd == ISVCE_CMD_CTL_SET_DIMENSIONS) + { + UWORD32 wd_aln = ALIGN16(ps_cfg->u4_wd); + UWORD32 ht_aln = ALIGN16(ps_cfg->u4_ht); + + if(ps_curr_cfg->u4_wd != wd_aln || ps_curr_cfg->u4_ht != ht_aln || + ps_curr_cfg->u4_disp_wd != ps_cfg->u4_disp_wd || + ps_curr_cfg->u4_disp_ht != ps_cfg->u4_disp_ht) + { + ps_curr_cfg->u4_wd = wd_aln; + ps_curr_cfg->u4_ht = ht_aln; + + ps_curr_cfg->u4_disp_wd = ps_cfg->u4_disp_wd; + ps_curr_cfg->u4_disp_ht = ps_cfg->u4_disp_ht; + + ps_curr_cfg->i4_wd_mbs = ps_curr_cfg->u4_wd >> 4; + ps_curr_cfg->i4_ht_mbs = ps_curr_cfg->u4_ht >> 4; + + ps_codec->i4_rec_strd = ALIGN16(ps_cfg->u4_wd) + PAD_WD; + + /* If number of MBs in a frame changes the air map also changes. + * Hence recompute air map also reset air pic cnt */ + if(ps_codec->s_cfg.e_air_mode != IVE_AIR_MODE_NONE) + { + /* re-init the air map */ + isvce_init_air_map(ps_codec); + + /* reset air counter */ + ps_codec->i4_air_pic_cnt = -1; + } + + /* initialize mv bank buffer manager */ + err = isvce_svc_au_data_mgr_add_bufs(ps_codec); + if(err != IH264E_SUCCESS) return err; + + /* initialize ref bank buffer manager */ + err = isvce_svc_au_buf_mgr_add_bufs(ps_codec); + if(err != IH264E_SUCCESS) return err; + + /* since dimension changed, start new sequence by forcing IDR */ + ps_codec->force_curr_frame_type = IV_IDR_FRAME; + + /* in case dimension changes, we need to reinitialize RC as the + * old model shall not fit further */ + u4_init_rc = 1; + + /* when the dimension changes, the header needs to be regenerated */ + ps_codec->i4_gen_header = 1; + } + } + else if(ps_cfg->e_cmd == ISVCE_CMD_CTL_SET_FRAMERATE) + { + /* temp var */ + UWORD32 u4_src_ticks, u4_tgt_ticks; + + u4_src_ticks = ih264e_frame_time_get_src_ticks(ps_codec->s_rate_control.pps_frame_time); + + u4_tgt_ticks = ih264e_frame_time_get_tgt_ticks(ps_codec->s_rate_control.pps_frame_time); + + /* Change frame rate */ + if(ps_codec->s_cfg.u4_src_frame_rate != ps_cfg->u4_src_frame_rate * 1000) + { + ps_codec->s_cfg.u4_src_frame_rate = ps_cfg->u4_src_frame_rate * 1000; + + ih264e_frame_time_update_src_frame_rate(ps_codec->s_rate_control.pps_frame_time, + ps_codec->s_cfg.u4_src_frame_rate); + + ih264_time_stamp_update_frame_rate(ps_codec->s_rate_control.pps_time_stamp, + ps_codec->s_cfg.u4_src_frame_rate); + + for(i = 0; i < ps_cfg->s_svc_params.u1_num_spatial_layers; i++) + { + irc_change_frame_rate(ps_codec->s_rate_control.apps_rate_control_api[i], + ps_codec->s_cfg.u4_src_frame_rate, u4_src_ticks, + u4_tgt_ticks); + } + } + + if(ps_codec->s_cfg.u4_tgt_frame_rate != ps_cfg->u4_tgt_frame_rate * 1000) + { + ps_codec->s_cfg.u4_tgt_frame_rate = ps_cfg->u4_tgt_frame_rate * 1000; + + ih264e_frame_time_update_tgt_frame_rate(ps_codec->s_rate_control.pps_frame_time, + ps_codec->s_cfg.u4_tgt_frame_rate); + + for(i = 0; i < ps_cfg->s_svc_params.u1_num_spatial_layers; i++) + { + irc_change_frame_rate(ps_codec->s_rate_control.apps_rate_control_api[i], + ps_codec->s_cfg.u4_src_frame_rate, u4_src_ticks, + u4_tgt_ticks); + + irc_change_frm_rate_for_bit_alloc(ps_codec->s_rate_control.apps_rate_control_api[i], + ps_codec->s_cfg.u4_tgt_frame_rate); + } + } + } + else if(ps_cfg->e_cmd == ISVCE_CMD_CTL_SET_BITRATE) + { + for(i = 0; i < MAX_NUM_SPATIAL_LAYERS; i++) + { + if(ps_curr_cfg->au4_target_bitrate[i] != ps_cfg->au4_target_bitrate[i]) + { + if(IVE_RC_NONE != ps_curr_cfg->e_rc_mode) + irc_change_avg_bit_rate(ps_codec->s_rate_control.apps_rate_control_api[i], + ps_cfg->au4_target_bitrate[i]); + + ps_curr_cfg->au4_target_bitrate[i] = ps_cfg->au4_target_bitrate[i]; + } + } + } + else if(ps_cfg->e_cmd == ISVCE_CMD_CTL_SET_FRAMETYPE) + { + switch(ps_cfg->e_frame_type) + { + case IV_I_FRAME: + ps_codec->force_curr_frame_type = IV_I_FRAME; + break; + + case IV_IDR_FRAME: + ps_codec->force_curr_frame_type = IV_IDR_FRAME; + break; + + case IV_P_FRAME: + default: + break; + } + } + else if(ps_cfg->e_cmd == ISVCE_CMD_CTL_SET_ME_PARAMS) + { + if(ps_curr_cfg->u4_enc_speed_preset == IVE_CONFIG) + { + ps_codec->s_cfg.u4_enable_hpel = ps_cfg->u4_enable_hpel; + ps_codec->s_cfg.u4_enable_fast_sad = ps_cfg->u4_enable_fast_sad; + ps_codec->s_cfg.u4_me_speed_preset = ps_cfg->u4_me_speed_preset; + ps_codec->s_cfg.u4_enable_qpel = ps_cfg->u4_enable_qpel; + } + else if(ps_curr_cfg->u4_enc_speed_preset == IVE_FASTEST) + { + ps_codec->s_cfg.u4_enable_fast_sad = ps_cfg->u4_enable_fast_sad; + } + ps_codec->s_cfg.u4_srch_rng_x = ps_cfg->u4_srch_rng_x; + ps_codec->s_cfg.u4_srch_rng_y = ps_cfg->u4_srch_rng_y; + + if(ps_codec->s_cfg.u4_enable_alt_ref != ps_cfg->u4_enable_alt_ref) + { + ps_codec->s_cfg.u4_enable_alt_ref = ps_cfg->u4_enable_alt_ref; + ps_codec->u4_is_curr_frm_ref = 1; + } + } + else if(ps_cfg->e_cmd == ISVCE_CMD_CTL_SET_IPE_PARAMS) + { + ps_curr_cfg->u4_enc_speed_preset = ps_cfg->u4_enc_speed_preset; + + if(ps_curr_cfg->u4_enc_speed_preset == IVE_SLOWEST) + { + isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns; + enc_loop_fxns_t *ps_enc_loop_fxns = &ps_isa_dependent_fxns->s_enc_loop_fxns; + + /* enable diamond search */ + ps_curr_cfg->u4_me_speed_preset = DMND_SRCH; + ps_curr_cfg->u4_enable_fast_sad = 0; + + /* disable intra 4x4 */ + ps_curr_cfg->u4_enable_intra_4x4 = 1; + ps_enc_loop_fxns->apf_luma_energy_compaction[1] = + isvce_code_luma_intra_macroblock_4x4_rdopt_on; + + /* sub pel off */ + ps_curr_cfg->u4_enable_hpel = 1; + + /* disabled intra inter gating in Inter slices */ + ps_codec->u4_inter_gate = 0; + } + else if(ps_curr_cfg->u4_enc_speed_preset == IVE_NORMAL) + { /* normal */ + /* enable diamond search */ + ps_curr_cfg->u4_me_speed_preset = DMND_SRCH; + ps_curr_cfg->u4_enable_fast_sad = 0; + + /* disable intra 4x4 */ + ps_curr_cfg->u4_enable_intra_4x4 = 1; + + /* sub pel off */ + ps_curr_cfg->u4_enable_hpel = 1; + + /* disabled intra inter gating in Inter slices */ + ps_codec->u4_inter_gate = 0; + } + else if(ps_curr_cfg->u4_enc_speed_preset == IVE_FAST) + { /* normal */ + /* enable diamond search */ + ps_curr_cfg->u4_me_speed_preset = DMND_SRCH; + ps_curr_cfg->u4_enable_fast_sad = 0; + + /* disable intra 4x4 */ + ps_curr_cfg->u4_enable_intra_4x4 = 0; + + /* sub pel off */ + ps_curr_cfg->u4_enable_hpel = 1; + + /* disabled intra inter gating in Inter slices */ + ps_codec->u4_inter_gate = 1; + } + else if(ps_curr_cfg->u4_enc_speed_preset == IVE_HIGH_SPEED) + { /* fast */ + /* enable diamond search */ + ps_curr_cfg->u4_me_speed_preset = DMND_SRCH; + ps_curr_cfg->u4_enable_fast_sad = 0; + + /* disable intra 4x4 */ + ps_curr_cfg->u4_enable_intra_4x4 = 0; + + /* sub pel off */ + ps_curr_cfg->u4_enable_hpel = 0; + + /* disabled intra inter gating in Inter slices */ + ps_codec->u4_inter_gate = 0; + } + else if(ps_curr_cfg->u4_enc_speed_preset == IVE_FASTEST) + { /* fastest */ + /* enable diamond search */ + ps_curr_cfg->u4_me_speed_preset = DMND_SRCH; + // u4_num_layers = 4; + + /* disable intra 4x4 */ + ps_curr_cfg->u4_enable_intra_4x4 = 0; + + /* sub pel off */ + ps_curr_cfg->u4_enable_hpel = 0; + + /* disabled intra inter gating in Inter slices */ + ps_codec->u4_inter_gate = 1; + } + else if(ps_curr_cfg->u4_enc_speed_preset == IVE_CONFIG) + { + ps_curr_cfg->u4_enable_intra_4x4 = ps_cfg->u4_enable_intra_4x4; + } + } + else if(ps_cfg->e_cmd == ISVCE_CMD_CTL_SET_GOP_PARAMS) + { + if(ps_curr_cfg->u4_i_frm_interval != ps_cfg->u4_i_frm_interval) + { + ps_curr_cfg->u4_i_frm_interval = ps_cfg->u4_i_frm_interval; + + /* reset air counter */ + ps_codec->i4_air_pic_cnt = -1; + + /* re-init air map */ + isvce_init_air_map(ps_codec); + + /*Effect intra frame interval change*/ + for(i = 0; i < ps_cfg->s_svc_params.u1_num_spatial_layers; i++) + { + irc_change_intra_frm_int_call(ps_codec->s_rate_control.apps_rate_control_api[i], + ps_curr_cfg->u4_i_frm_interval); + } + } + + ps_curr_cfg->u4_idr_frm_interval = ps_cfg->u4_idr_frm_interval; + } + else if(ps_cfg->e_cmd == ISVCE_CMD_CTL_SET_DEBLOCK_PARAMS) + { + ps_curr_cfg->u4_disable_deblock_level = ps_cfg->u4_disable_deblock_level; + } + else if(ps_cfg->e_cmd == ISVCE_CMD_CTL_SET_QP) + { + for(i = 0; i < ps_cfg->s_svc_params.u1_num_spatial_layers; i++) + { + UWORD8 au1_init_qp[MAX_PIC_TYPE]; + UWORD8 au1_min_max_qp[2 * MAX_PIC_TYPE]; + UWORD8 au1_min_max_avc_qp[2 * MAX_PIC_TYPE]; + + ps_codec->s_cfg.au4_i_qp_max[i] = ps_cfg->au4_i_qp_max[i]; + ps_codec->s_cfg.au4_i_qp_min[i] = ps_cfg->au4_i_qp_min[i]; + ps_codec->s_cfg.au4_i_qp[i] = ps_cfg->au4_i_qp[i]; + + ps_codec->s_cfg.au4_p_qp_max[i] = ps_cfg->au4_p_qp_max[i]; + ps_codec->s_cfg.au4_p_qp_min[i] = ps_cfg->au4_p_qp_min[i]; + ps_codec->s_cfg.au4_p_qp[i] = ps_cfg->au4_p_qp[i]; + + ps_codec->s_cfg.au4_b_qp_max[i] = ps_cfg->au4_b_qp_max[i]; + ps_codec->s_cfg.au4_b_qp_min[i] = ps_cfg->au4_b_qp_min[i]; + ps_codec->s_cfg.au4_b_qp[i] = ps_cfg->au4_b_qp[i]; + + /* update rc lib with modified qp */ + au1_init_qp[0] = gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.au4_i_qp[i]]; + au1_init_qp[1] = gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.au4_p_qp[i]]; + au1_init_qp[2] = gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.au4_b_qp[i]]; + + irc_change_init_qp(ps_codec->s_rate_control.apps_rate_control_api[i], au1_init_qp); + + au1_min_max_qp[2 * I_PIC] = gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.au4_i_qp_min[i]]; + au1_min_max_qp[2 * I_PIC + 1] = + gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.au4_i_qp_max[i]]; + + au1_min_max_qp[2 * P_PIC] = gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.au4_p_qp_min[i]]; + au1_min_max_qp[2 * P_PIC + 1] = + gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.au4_p_qp_max[i]]; + + au1_min_max_qp[2 * B_PIC] = gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.au4_b_qp_min[i]]; + au1_min_max_qp[2 * B_PIC + 1] = + gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.au4_b_qp_max[i]]; + + au1_min_max_avc_qp[2 * I_PIC] = ps_codec->s_cfg.au4_i_qp_min[i]; + au1_min_max_avc_qp[2 * I_PIC + 1] = ps_codec->s_cfg.au4_i_qp_max[i]; + + au1_min_max_avc_qp[2 * P_PIC] = ps_codec->s_cfg.au4_p_qp_min[i]; + au1_min_max_avc_qp[2 * P_PIC + 1] = ps_codec->s_cfg.au4_p_qp_max[i]; + + au1_min_max_avc_qp[2 * B_PIC] = ps_codec->s_cfg.au4_b_qp_min[i]; + au1_min_max_avc_qp[2 * B_PIC + 1] = ps_codec->s_cfg.au4_b_qp_max[i]; + + irc_change_qp_constraints(ps_codec->s_rate_control.apps_rate_control_api[i], + au1_min_max_qp, au1_min_max_avc_qp); + } + } + else if(ps_cfg->e_cmd == ISVCE_CMD_CTL_SET_ENC_MODE) + { + ps_codec->s_cfg.e_enc_mode = ps_cfg->e_enc_mode; + + if(ps_codec->s_cfg.e_enc_mode == IVE_ENC_MODE_HEADER) + { + ps_codec->i4_header_mode = 1; + ps_codec->s_cfg.e_enc_mode = IVE_ENC_MODE_PICTURE; + } + else + { + ps_codec->i4_header_mode = 0; + } + } + else if(ps_cfg->e_cmd == ISVCE_CMD_CTL_SET_VBV_PARAMS && + IVE_RC_NONE != ps_codec->s_cfg.e_rc_mode) + { + for(i = 0; i < ps_cfg->s_svc_params.u1_num_spatial_layers; i++) + { + ps_codec->s_cfg.au4_vbv_buffer_delay[i] = ps_cfg->au4_vbv_buffer_delay[i]; + } + // irc_change_buffer_delay(ps_codec->s_rate_control.pps_rate_control_api, + // ps_codec->s_cfg.u4_vbv_buffer_delay); + + // TODO: remove this when the support for changing buffer dynamically + // is yet to be added. + u4_init_rc = 1; + } + else if(ps_cfg->e_cmd == ISVCE_CMD_CTL_SET_AIR_PARAMS) + { + if(ps_curr_cfg->e_air_mode != ps_cfg->e_air_mode || + ps_curr_cfg->u4_air_refresh_period != ps_cfg->u4_air_refresh_period) + { + ps_curr_cfg->e_air_mode = ps_cfg->e_air_mode; + ps_curr_cfg->u4_air_refresh_period = ps_cfg->u4_air_refresh_period; + + isvce_init_air_map(ps_codec); + + /* reset air counter */ + ps_codec->i4_air_pic_cnt = -1; + } + } + else if(ps_cfg->e_cmd == ISVCE_CMD_CTL_SET_PROFILE_PARAMS) + { + ps_codec->s_cfg.e_profile = ps_cfg->e_profile; + ps_codec->s_cfg.u4_entropy_coding_mode = ps_cfg->u4_entropy_coding_mode; + } + else if(ps_cfg->e_cmd == ISVCE_CMD_CTL_SET_NUM_CORES) + { + ps_codec->s_cfg.u4_num_cores = ps_cfg->u4_num_cores; + } + else if(ps_cfg->e_cmd == ISVCE_CMD_CTL_SET_VUI_PARAMS) + { + ps_codec->s_cfg.s_vui = ps_cfg->s_vui; + } + + else if(ps_cfg->e_cmd == ISVCE_CMD_CTL_SET_SEI_MDCV_PARAMS) + { + ps_codec->s_cfg.s_sei.u1_sei_mdcv_params_present_flag = + ps_cfg->s_sei.u1_sei_mdcv_params_present_flag; + ps_codec->s_cfg.s_sei.s_sei_mdcv_params = ps_cfg->s_sei.s_sei_mdcv_params; + } + else if(ps_cfg->e_cmd == ISVCE_CMD_CTL_SET_SEI_CLL_PARAMS) + { + ps_codec->s_cfg.s_sei.u1_sei_cll_params_present_flag = + ps_cfg->s_sei.u1_sei_cll_params_present_flag; + ps_codec->s_cfg.s_sei.s_sei_cll_params = ps_cfg->s_sei.s_sei_cll_params; + } + else if(ps_cfg->e_cmd == ISVCE_CMD_CTL_SET_SEI_AVE_PARAMS) + { + ps_codec->s_cfg.s_sei.u1_sei_ave_params_present_flag = + ps_cfg->s_sei.u1_sei_ave_params_present_flag; + ps_codec->s_cfg.s_sei.s_sei_ave_params = ps_cfg->s_sei.s_sei_ave_params; + } + else if(ps_cfg->e_cmd == ISVCE_CMD_CTL_SET_SEI_CCV_PARAMS) + { + ps_codec->s_cfg.s_sei.u1_sei_ccv_params_present_flag = + ps_cfg->s_sei.u1_sei_ccv_params_present_flag; + ps_codec->s_cfg.s_sei.s_sei_ccv_params = ps_cfg->s_sei.s_sei_ccv_params; + } + + /* reset RC model */ + if(u4_init_rc) + { + for(i = 0; i < ps_cfg->s_svc_params.u1_num_spatial_layers; i++) + { + /* init qp */ + UWORD8 au1_init_qp[MAX_PIC_TYPE]; + + /* min max qp */ + UWORD8 au1_min_max_qp[2 * MAX_PIC_TYPE]; + + /* init i,p,b qp */ + au1_init_qp[0] = gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.au4_i_qp[i]]; + au1_init_qp[1] = gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.au4_p_qp[i]]; + au1_init_qp[2] = gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.au4_b_qp[i]]; + + /* init min max qp */ + au1_min_max_qp[2 * I_PIC] = gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.au4_i_qp_min[i]]; + au1_min_max_qp[2 * I_PIC + 1] = + gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.au4_i_qp_max[i]]; + + au1_min_max_qp[2 * P_PIC] = gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.au4_p_qp_min[i]]; + au1_min_max_qp[2 * P_PIC + 1] = + gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.au4_p_qp_max[i]]; + + au1_min_max_qp[2 * B_PIC] = gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.au4_b_qp_min[i]]; + au1_min_max_qp[2 * B_PIC + 1] = + gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.au4_b_qp_max[i]]; + + /* get rc mode */ + switch(ps_codec->s_cfg.e_rc_mode) + { + case IVE_RC_STORAGE: + ps_codec->s_rate_control.e_rc_type = VBR_STORAGE; + break; + + case IVE_RC_CBR_NON_LOW_DELAY: + ps_codec->s_rate_control.e_rc_type = CBR_NLDRC; + break; + + case IVE_RC_CBR_LOW_DELAY: + ps_codec->s_rate_control.e_rc_type = CBR_LDRC; + break; + + case IVE_RC_NONE: + ps_codec->s_rate_control.e_rc_type = CONST_QP; + break; + + default: + break; + } + + /* init rate control */ + for(i = 0; i < MAX_NUM_SPATIAL_LAYERS; i++) + { + isvce_rc_init( + ps_codec->s_rate_control.apps_rate_control_api[i], + ps_codec->s_rate_control.pps_frame_time, + ps_codec->s_rate_control.pps_time_stamp, + ps_codec->s_rate_control.pps_pd_frm_rate, ps_codec->s_cfg.u4_max_framerate, + ps_codec->s_cfg.u4_src_frame_rate, ps_codec->s_cfg.u4_tgt_frame_rate, + ps_codec->s_rate_control.e_rc_type, ps_codec->s_cfg.au4_target_bitrate[i], + ps_codec->s_cfg.au4_max_bitrate[i], ps_codec->s_cfg.au4_vbv_buffer_delay[i], + ps_codec->s_cfg.u4_i_frm_interval, ps_codec->s_cfg.u4_num_bframes + 1, + au1_init_qp, ps_codec->s_cfg.u4_num_bframes + 2, au1_min_max_qp, + ps_codec->s_cfg.u4_max_level); + } + } + } + + return err; +} + +static FORCEINLINE void isvce_change_rc_init_qp(void *pv_rate_control_api, UWORD8 u1_qp) +{ + UWORD8 au1_pic_qps[MAX_PIC_TYPE]; + WORD32 i; + + for(i = 0; i < MAX_PIC_TYPE; i++) + { + au1_pic_qps[i] = gau1_h264_to_mpeg2_qmap[CLIP3(MIN_H264_QP, MAX_H264_QP, u1_qp + i)]; + } + + irc_change_init_qp(pv_rate_control_api, au1_pic_qps); +} + +/** + ******************************************************************************* + * + * @brief + * Queues the current buffer, gets back a another buffer for encoding with + *corrent picture type + * + * @par Description: + * This function performs 3 distinct but related functions. + * 1) Maintains an input queue [Note the the term queue donot imply a + * first-in first-out logic here] that queues input and dequeues them so + * that input frames can be encoded at any predetermined encoding order + * 2) Uses RC library to decide which frame must be encoded in current pass + * and which picture type it must be encoded to. + * 3) Uses RC library to decide the QP at which current frame has to be + * encoded + * 4) Determines if the current picture must be encoded or not based on + * PRE-ENC skip + * + * Input queue is used for storing input buffers till they are used for + * encoding. This queue is maintained at ps_codec->as_inp_list. Whenever a + * valid input comes, it is added to the end of queue. This same input is + * added to RC queue using the identifier as ps_codec->i4_pic_cnt. Hence any + * pic from RC can be located in the input queue easily. + * + * The dequeue operation does not start till we have + *ps_codec->s_cfg.u4_max_num_bframes frames in the queue. THis is done in order + *to ensure that once output starts we will have a constant stream of output + *with no gaps. + * + * THe output frame order is governed by RC library. When ever we dequeue a + * buffer from RC library, it ensures that we will get them in encoding + *order With the output of RC library, we can use the picture id to dequeue the + * corresponding buffer from input queue and encode it. + * + * Condition at the end of stream. + * ------------------------------- + * At the last valid buffer from the app, we will get ps_ive_ip->u4_is_last + * to be set. This will the given to lib when appropriate input buffer is + * given to encoding. + * + * Since we have to output is not in sync with input, we will have frames + *to encode even after we recive the last vaild input buffer. Hence we have to + * make sure that we donot queue any new buffers once we get the flag [It + *may mess up GOP ?]. This is acheived by setting + *ps_codec->i4_last_inp_buff_received to act as a permenent marker for last + *frame recived [This may not be needed, because in our current app, all buffers + *after the last are marked as last. But can we rely on that?] . Hence after + *this flgag is set no new buffers are queued. + * + * @param[in] ps_codec + * Pointer to codec descriptor + * + * @param[in] ps_ive_ip + * Current input buffer to the encoder + * + * @param[out] ps_inp + * Buffer to be encoded in the current pass + * + * @returns + * Flag indicating if we have a pre-enc skip or not + * + * @remarks + * TODO (bpic) + * The check for null ans is last is redudent. + * Need to see if we can remove it + * + ******************************************************************************* + */ +WORD32 isvce_input_queue_update(isvce_codec_t *ps_codec, ive_video_encode_ip_t *ps_ive_ip, + isvce_inp_buf_t *ps_enc_buff, WORD8 i1_layer_id) +{ + isvce_inp_buf_t *ps_inp_buf; + picture_type_e e_pictype; + WORD32 i4_skip; + UWORD32 ctxt_sel, u4_pic_id, u4_pic_disp_id; + UWORD8 u1_frame_qp = MAX_H264_QP; + UWORD32 max_frame_bits = 0x7FFFFFFF; + + WORD32 i; + + /* Mark that the last input frame has been received */ + if(ps_ive_ip->u4_is_last == 1) + { + ps_codec->i4_last_inp_buff_received = 1; + } + + if(ps_ive_ip->s_inp_buf.apv_bufs[0] == NULL && !ps_codec->i4_last_inp_buff_received) + { + ps_enc_buff->s_inp_props.s_raw_buf.apv_bufs[0] = NULL; + ps_enc_buff->s_inp_props.u4_is_last = ps_ive_ip->u4_is_last; + return 0; + } + + /*************************************************************************** + * Check for pre enc skip + * When src and target frame rates donot match, we skip some frames to + * maintain the relation ship between them + **************************************************************************/ + { + WORD32 skip_src; + + skip_src = isvce_update_rc_framerates( + ps_codec->s_rate_control.apps_rate_control_api[i1_layer_id], + ps_codec->s_rate_control.pps_pd_frm_rate, ps_codec->s_rate_control.pps_time_stamp, + ps_codec->s_rate_control.pps_frame_time); + + if(skip_src) + { + ps_enc_buff->s_inp_props.u4_is_last = ps_ive_ip->u4_is_last; + return 1; + } + } + + /*************************************************************************** + *Queue the input to the queue + **************************************************************************/ + ps_inp_buf = &(ps_codec->as_inp_list[ps_codec->i4_pic_cnt % SVC_MAX_NUM_INP_FRAMES]); + + /* copy input info. to internal structure */ + ps_inp_buf->s_inp_props.s_raw_buf = ps_ive_ip->s_inp_buf; + ps_inp_buf->s_inp_props.u4_timestamp_low = ps_ive_ip->u4_timestamp_low; + ps_inp_buf->s_inp_props.u4_timestamp_high = ps_ive_ip->u4_timestamp_high; + ps_inp_buf->s_inp_props.u4_is_last = ps_ive_ip->u4_is_last; + ps_inp_buf->s_inp_props.pv_mb_info = ps_ive_ip->pv_mb_info; + ps_inp_buf->s_inp_props.u4_mb_info_type = ps_ive_ip->u4_mb_info_type; + ps_inp_buf->s_inp_props.pv_pic_info = ps_ive_ip->pv_pic_info; + ps_inp_buf->s_inp_props.u4_pic_info_type = ps_ive_ip->u4_pic_info_type; + + ps_inp_buf->s_inp_props.u1_sei_ccv_params_present_flag = + ps_codec->s_cfg.s_sei.u1_sei_ccv_params_present_flag; + ps_inp_buf->s_inp_props.s_sei_ccv = ps_codec->s_cfg.s_sei.s_sei_ccv_params; + + if(ps_inp_buf->s_inp_props.s_raw_buf.apv_bufs[0]) + isvce_svc_inp_buf_populate(ps_codec, ps_inp_buf); + + /*************************************************************************** + * Now we should add the picture to RC stack here + **************************************************************************/ + /* + * If an I frame has been requested, ask RC to force it + * For IDR requests, we have to ask RC to force I and set IDR by our selves + * since RC Donot know about IDR. For forcing an IDR at dequeue stage we + * should record that an IDR has been requested some where. Hence we will + * store it in the u4_idr_inp_list at a position same as that of input frame + */ + { + WORD32 i4_force_idr, i4_force_i; + + i4_force_idr = (ps_codec->force_curr_frame_type == IV_IDR_FRAME); + i4_force_idr |= !(ps_codec->i4_pic_cnt % ps_codec->s_cfg.u4_idr_frm_interval); + + i4_force_i = (ps_codec->force_curr_frame_type == IV_I_FRAME); + + ps_codec->i4_pending_idr_flag |= i4_force_idr; + + if((ps_codec->i4_pic_cnt > 0) && (i4_force_idr || i4_force_i)) + { + irc_force_I_frame(ps_codec->s_rate_control.apps_rate_control_api[i1_layer_id]); + } + + if(i1_layer_id == (ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers - 1)) + { + ps_codec->force_curr_frame_type = IV_NA_FRAME; + } + } + + irc_add_picture_to_stack(ps_codec->s_rate_control.apps_rate_control_api[i1_layer_id], + ps_codec->i4_pic_cnt); + + /* Delay */ + if(ps_codec->i4_encode_api_call_cnt < (WORD32) (ps_codec->s_cfg.u4_num_bframes)) + { + ps_enc_buff->s_inp_props.s_raw_buf.apv_bufs[0] = NULL; + ps_enc_buff->s_inp_props.u4_is_last = 0; + return 0; + } + + /*************************************************************************** + * Get a new pic to encode + **************************************************************************/ + /* Query the picture_type */ + e_pictype = + isvce_rc_get_picture_details(ps_codec->s_rate_control.apps_rate_control_api[i1_layer_id], + (WORD32 *) (&u4_pic_id), (WORD32 *) (&u4_pic_disp_id)); + + switch(e_pictype) + { + case I_PIC: + ps_codec->pic_type = PIC_I; + break; + case P_PIC: + ps_codec->pic_type = PIC_P; + break; + case B_PIC: + ps_codec->pic_type = PIC_B; + break; + default: + ps_codec->pic_type = PIC_NA; + ps_enc_buff->s_inp_props.s_raw_buf.apv_bufs[0] = NULL; + return 0; + } + + /* Set IDR if it has been requested */ + if(ps_codec->pic_type == PIC_I) + { + ps_codec->pic_type = ps_codec->i4_pending_idr_flag ? PIC_IDR : ps_codec->pic_type; + ps_codec->i4_pending_idr_flag = 0; + } + + if(ps_codec->s_rate_control.e_rc_type != CONST_QP && ps_codec->u1_enable_init_qp && + (u4_pic_id == 0 || + irc_is_scenecut(ps_codec->s_rate_control.apps_rate_control_api[i1_layer_id]))) + { + DOUBLE d_bpp; + + svc_rc_utils_ctxt_t *ps_svc_rc_utils = &ps_codec->s_rate_control.s_rc_utils; + + UWORD32 u4_src_fps = ps_codec->s_cfg.u4_src_frame_rate / 1000; + UWORD32 u4_wd = ps_inp_buf->as_layer_yuv_buf_props[i1_layer_id].u4_width; + UWORD32 u4_ht = ps_inp_buf->as_layer_yuv_buf_props[i1_layer_id].u4_height; + DOUBLE d_gpp = + isvce_compute_gpp(ps_svc_rc_utils, &ps_inp_buf->as_layer_yuv_buf_props[i1_layer_id]); + + d_bpp = ((DOUBLE) irc_get_vbv_buf_size( + ps_codec->s_rate_control.apps_rate_control_api[i1_layer_id]) / + 10.) / + ((DOUBLE) (u4_src_fps * u4_wd * u4_ht)); + + u1_frame_qp = (UWORD8) irc_get_frame_level_init_qp( + ps_codec->s_rate_control.apps_rate_control_api[i1_layer_id], + ps_codec->s_rate_control.e_rc_type, e_pictype, d_bpp, d_gpp); + + isvce_change_rc_init_qp(ps_codec->s_rate_control.apps_rate_control_api[i1_layer_id], + u1_frame_qp); + + ps_codec->au4_frame_qp[i1_layer_id] = u1_frame_qp; + } + else + { + /* Get current frame Qp */ + u1_frame_qp = (UWORD8) irc_get_frame_level_qp( + ps_codec->s_rate_control.apps_rate_control_api[i1_layer_id], e_pictype, max_frame_bits); + ps_codec->au4_frame_qp[i1_layer_id] = gau1_mpeg2_to_h264_qmap[u1_frame_qp]; + } + + /* + * copy the pic id to poc because the display order is assumed to be same + * as input order + */ + ps_codec->i4_poc = u4_pic_id; + + /*************************************************************************** + * Now retrieve the correct picture from the queue + **************************************************************************/ + + /* Mark the skip flag */ + i4_skip = 0; + ctxt_sel = ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS; + ps_codec->s_rate_control.pre_encode_skip[ctxt_sel] = i4_skip; + + /* Get a buffer to encode */ + ps_inp_buf = &(ps_codec->as_inp_list[u4_pic_id % SVC_MAX_NUM_INP_FRAMES]); + + /* copy dequeued input to output */ + ps_enc_buff[0] = ps_inp_buf[0]; + + /* Special case for encoding trailing B frames + * + * In encoding streams with B frames it may happen that we have a B frame + * at the end without a P/I frame after it. Hence when we are dequeing from + * the RC, it will return the P frame [next in display order but before in + * encoding order] first. Since the dequeue happens for an invalid frame we + * will get a frame with null buff and set u4_is_last. Hence lib with return + * last frame flag at this point and will stop encoding. + * + * Since for the last B frame, we does not have the forward ref frame + * it makes sense to force it into P. + * + * To solve this, in case the current frame is P and if the last frame flag + * is set, we need to see if there is and pending B frames. If there are any, + * we should just encode that picture as the current P frame and set + * that B frame as the last frame. Hence the encoder will terminate naturally + * once that B-frame is encoded after all the in between frames. + * + * Since we cannot touch RC stack directly, the option of actually swapping + * frames in RC is ruled out. We have to modify the as_inp_list to simulate + * such a behavior by RC. We can do that by + * 1) Search through as_inp_list to locate the largest u4_timestamp_low less + * than current u4_timestamp_low. This will give us the last B frame + * before the current P frame. Note that this will handle pre encode skip too + * since queue happens after pre enc skip. 2) Swap the position in + * as_inp_list. Hence now the last B frame is encoded as P frame. And the new + * last B frame will have u4_is_last set so that encoder will end naturally + * once we reached that B frame or any subsequent frame. Also the current GOP + * will have 1 less B frame Since we are swapping, the poc will also be + * in-order. 3) In case we have an IPP stream, the result of our search will + * be an I/P frame which is already encoded. Thus swap and encode will result + * in encoding of duplicate frames. Hence to avoid this we will only + * have this work around in case of u4_num_bframes > 0. + * + * In case we have forced an I/IDR frame In between this P frame and + * the last B frame -> This cannot happen as the current P frame is + * supposed to have u4_is_last set. Thus forcing an I/ IDR after this + * is illogical. + * + * In cae if we have forced an I such that the frame just before last + * frame in is I/P -> This case will never arise. Since we have a closed GOP + * now, once we force an I, the gop gets reset, hence there will be a B + * between I/P and I/P. + */ + if(ps_enc_buff->s_inp_props.u4_is_last && (ps_codec->pic_type == PIC_P) && + ps_codec->s_cfg.u4_num_bframes) + { + WORD32 cntr; + WORD32 lst_bframe = -1; + UWORD32 u4_timestamp_low = 0; + UWORD32 u4_timestamp_high = 0; + isvce_inp_buf_t *ps_swap_buff, *ps_inp_list; + + ps_inp_list = &ps_codec->as_inp_list[0]; + + /* Now search the inp list for highest timestamp */ + for(cntr = 0; cntr < SVC_MAX_NUM_INP_FRAMES; cntr++) + { + if(ps_inp_list[cntr].s_inp_props.s_raw_buf.apv_bufs[0] != NULL) + { + if((ps_inp_list[cntr].s_inp_props.u4_timestamp_high > u4_timestamp_high) || + (ps_inp_list[cntr].s_inp_props.u4_timestamp_high == u4_timestamp_high && + ps_inp_list[cntr].s_inp_props.u4_timestamp_low > u4_timestamp_low)) + { + u4_timestamp_low = ps_inp_list[cntr].s_inp_props.u4_timestamp_low; + u4_timestamp_high = ps_inp_list[cntr].s_inp_props.u4_timestamp_high; + lst_bframe = cntr; + } + } + } + + if(lst_bframe != -1) + { + ps_swap_buff = &(ps_codec->as_inp_list[lst_bframe]); + + /* copy the last B buffer to output */ + *ps_enc_buff = *ps_swap_buff; + + /* Store the current buf into the queue in place of last B buf */ + *ps_swap_buff = *ps_inp_buf; + } + } + + if(ps_enc_buff->s_inp_props.u4_is_last) + { + ps_codec->pic_type = PIC_NA; + } + + /* The buffer in the queue is set to NULL to specify that encoding is done for + * that frame */ + for(i = 0; i < 3; i++) + { + ps_inp_buf->s_inp_props.s_raw_buf.apv_bufs[i] = NULL; + } + + /* Return the buffer status */ + return (0); +} + +/** +****************************************************************************** +* +* @brief +* This function joins all the spawned threads after successful completion of +* their tasks +* +* @par Description +* +* @param[in] ps_codec +* pointer to codec context +* +* @returns none +* +****************************************************************************** +*/ +void isvce_join_threads(isvce_codec_t *ps_codec) +{ + WORD32 i = 0; + WORD32 ret = 0; + + /* join spawned threads */ + while(i < ps_codec->i4_proc_thread_cnt) + { + if(ps_codec->ai4_process_thread_created[i]) + { + ret = ithread_join(ps_codec->apv_proc_thread_handle[i], NULL); + + if(ret != 0) + { + ASSERT(0); + } + + ps_codec->ai4_process_thread_created[i] = 0; + i++; + } + } + + ps_codec->i4_proc_thread_cnt = 0; +} diff --git a/encoder/svc/isvce_utils.h b/encoder/svc/isvce_utils.h new file mode 100644 index 0000000..fc6a7a5 --- /dev/null +++ b/encoder/svc/isvce_utils.h @@ -0,0 +1,234 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** +******************************************************************************* +* @file +* isvce_utils.h +* +* @brief +* Contains function declarations for function declared in ih264e_svc_utils.c +* +* @author +* ittiam +* +* @remarks +* None +* +******************************************************************************* +*/ + +#ifndef _ISVCE_UTILS_H_ +#define _ISVCE_UTILS_H_ + +#include +#include +#include +#include +#include + +#include "ih264_typedefs.h" +#include "ih264e_bitstream.h" +#include "isvc_macros.h" +#include "isvc_structs.h" +#include "isvce_defs.h" +#include "isvce_globals.h" +#include "isvce_interface_structs.h" +#include "isvce_structs.h" + +static FORCEINLINE void isvce_svc_au_buf_init(svc_au_buf_t *ps_svc_pic_buf, + svc_params_t *ps_svc_params) +{ + ps_svc_pic_buf->i1_temporal_id = -1; + ps_svc_pic_buf->u1_num_spatial_layers = ps_svc_params->u1_num_spatial_layers; + ps_svc_pic_buf->d_spatial_res_ratio = ps_svc_params->d_spatial_res_ratio; +} + +static FORCEINLINE WORD8 isvce_svc_temporal_id_compute(WORD32 i4_poc, UWORD8 u1_num_temporal_layers, + PIC_TYPE_T e_pic_type) +{ + if(e_pic_type == PIC_IDR) + { + return 0; + } + else + { + return i4_poc % u1_num_temporal_layers; + } +} + +static FORCEINLINE WORD32 isvcd_get_ceil_log2(WORD32 i4_input) +{ + WORD32 i4_bits = 0; + + /* check for negative number */ + ASSERT(i4_input >= 0); + + i4_input--; + + while(i4_input > 0) + { + i4_bits++; + i4_input >>= 1; + } + + return (i4_bits); +} +/** +******************************************************************************* +* +* @brief calculate coded subblock pattern from nnz +* +* @par Description: +* calculate coded subblock pattern from nnz +* +* @param[in] ps_proc +* process context +* +* @returns csbp +* +* @remarks none +* +******************************************************************************* +*/ +static FORCEINLINE UWORD32 isvce_calculate_csbp(isvce_process_ctxt_t *ps_proc) +{ + WORD32 i; + + UWORD8 *pu1_curr_nnz = ((UWORD8 *) ps_proc->au4_nnz) + 1; + UWORD32 u4_csbp = 0; + + for(i = 0; i < 16; i++) + { + UWORD8 u1_zscan_idx = gau1_raster_to_zscan_map[i]; + + u4_csbp |= ((!!pu1_curr_nnz[i]) << u1_zscan_idx); + } + + return u4_csbp; +} + +static FORCEINLINE UWORD8 isvce_check_identical_mv(isvce_enc_pu_mv_t *ps_mv1, + isvce_enc_pu_mv_t *ps_mv2, + PRED_MODE_T e_pred_mode) +{ + if(e_pred_mode != L0) + { + if(!((ps_mv1[L1].i1_ref_idx == ps_mv2[L1].i1_ref_idx) && + (ps_mv1[L1].s_mv.i2_mvx == ps_mv2[L1].s_mv.i2_mvx) && + (ps_mv1[L1].s_mv.i2_mvy == ps_mv2[L1].s_mv.i2_mvy))) + { + return 0; + } + } + + if(e_pred_mode != L1) + { + if(!((ps_mv1[L0].i1_ref_idx == ps_mv2[L0].i1_ref_idx) && + (ps_mv1[L0].s_mv.i2_mvx == ps_mv2[L0].s_mv.i2_mvx) && + (ps_mv1[L0].s_mv.i2_mvy == ps_mv2[L0].s_mv.i2_mvy))) + { + return 0; + } + } + + return 1; +} + +static FORCEINLINE WORD32 isvce_get_num_bits(bitstrm_t *ps_bitstream) +{ + return GET_NUM_BITS(ps_bitstream); +} + +extern WORD32 ih264e_get_min_level(WORD32 wd, WORD32 ht); + +extern WORD32 isvce_svc_au_props_validate(svc_inp_params_t *ps_svc_inp_params, UWORD32 u4_inp_wd, + UWORD32 u4_inp_ht, UWORD32 u4_svc_comp_wd, + UWORD32 u4_svc_comp_ht); + +extern WORD32 isvce_svc_inp_params_validate(isvce_init_ip_t *ps_ip, isvce_cfg_params_t *ps_cfg); + +extern WORD32 isvce_get_total_svc_au_buf_size(svc_inp_params_t *ps_svc_inp_params, + WORD32 i4_pic_size, WORD32 i4_level, + WORD32 i4_horz_pad, WORD32 i4_vert_pad, + WORD32 i4_num_ref_frames, + WORD32 i4_num_reorder_frames); + +extern UWORD32 isvce_get_total_svc_au_data_size(WORD32 i4_num_luma_samples, + UWORD8 u1_num_spatial_layers, + DOUBLE d_spatial_res_ratio); + +extern IH264E_ERROR_T isvce_svc_au_data_mgr_add_bufs(isvce_codec_t *ps_codec); + +extern IH264E_ERROR_T isvce_svc_au_buf_mgr_add_bufs(isvce_codec_t *ps_codec); + +extern UWORD32 isvce_get_svc_inp_buf_size(UWORD8 u1_num_spatial_layers, DOUBLE d_spatial_res_ratio, + UWORD32 u4_wd, UWORD32 u4_ht); + +extern void isvce_svc_inp_buf_init(isvce_codec_t *ps_codec, iv_mem_rec_t *ps_mem_rec); + +extern void isvce_init_svc_dimension(isvce_inp_buf_t *ps_inp); + +extern void isvce_svc_inp_buf_populate(isvce_codec_t *ps_codec, isvce_inp_buf_t *ps_inp); + +extern void isvce_get_svc_compliant_dimensions(UWORD8 u1_num_spatial_layers, + DOUBLE d_scaling_factor, UWORD32 u4_wd, + UWORD32 u4_ht, UWORD32 *pu4_svc_comp_wd, + UWORD32 *pu4_svc_comp_ht); + +extern UWORD32 isvce_get_svc_nbr_info_buf_size(UWORD8 u1_num_spatial_layers, + DOUBLE d_spatial_res_ratio, UWORD32 u4_wd, + UWORD32 u4_ht); + +extern void isvce_svc_nbr_info_buf_init(isvce_codec_t *ps_codec, iv_mem_rec_t *ps_mem_rec); + +extern IH264E_ERROR_T isvce_svc_au_init(isvce_codec_t *ps_codec, isvce_inp_buf_t *ps_inp_buf); + +extern IH264E_ERROR_T isvce_svc_layer_pic_init(isvce_codec_t *ps_codec, isvce_inp_buf_t *ps_inp_buf, + UWORD8 u1_spatial_layer_id); + +extern IH264E_ERROR_T isvce_init_layer_proc_ctxt(isvce_process_ctxt_t *ps_proc); + +extern UWORD32 isvce_get_svc_ilp_buf_size(UWORD8 u1_num_spatial_layers, DOUBLE d_spatial_res_ratio, + UWORD32 u4_wd, UWORD32 u4_ht); + +extern void isvce_svc_ilp_buf_init(isvce_codec_t *ps_codec, iv_mem_rec_t *ps_mem_rec); + +extern void isvce_svc_ilp_buf_update(isvce_process_ctxt_t *ps_proc); + +extern void isvce_svc_pad_frame(isvce_process_ctxt_t *ps_proc); + +extern IH264E_ERROR_T isvce_init_air_map(isvce_codec_t *ps_codec); + +extern void isvce_derive_nghbr_avbl_of_mbs(isvce_process_ctxt_t *ps_proc); + +extern void isvce_init_quant_params(isvce_process_ctxt_t *ps_proc, WORD32 qp); + +extern IH264E_ERROR_T isvce_codec_init(isvce_codec_t *ps_codec); + +extern IH264E_ERROR_T isvce_codec_update_config(isvce_codec_t *ps_codec, + isvce_cfg_params_t *ps_cfg); + +extern WORD32 isvce_input_queue_update(isvce_codec_t *ps_codec, ive_video_encode_ip_t *ps_ive_ip, + isvce_inp_buf_t *ps_enc_buff, WORD8 i1_layer_id); + +extern void isvce_join_threads(isvce_codec_t *ps_codec); + +#endif diff --git a/encoder/svc/libsvcenc.cmake b/encoder/svc/libsvcenc.cmake new file mode 100644 index 0000000..91dda45 --- /dev/null +++ b/encoder/svc/libsvcenc.cmake @@ -0,0 +1,127 @@ +list( + APPEND + LIBSVCENC_SRCS + "${AVC_ROOT}/encoder/ih264e_bitstream.c" + "${AVC_ROOT}/encoder/ih264e_cabac_init.c" + "${AVC_ROOT}/encoder/ih264e_core_coding.c" + "${AVC_ROOT}/encoder/ih264e_encode_header.c" + "${AVC_ROOT}/encoder/ih264e_fmt_conv.c" + "${AVC_ROOT}/encoder/ih264e_globals.c" + "${AVC_ROOT}/encoder/ih264e_half_pel.c" + "${AVC_ROOT}/encoder/ih264e_intra_modes_eval.c" + "${AVC_ROOT}/encoder/ih264e_mc.c" + "${AVC_ROOT}/encoder/ih264e_me.c" + "${AVC_ROOT}/encoder/ih264e_modify_frm_rate.c" + "${AVC_ROOT}/encoder/ih264e_rate_control.c" + "${AVC_ROOT}/encoder/ih264e_rc_mem_interface.c" + "${AVC_ROOT}/encoder/ih264e_sei.c" + "${AVC_ROOT}/encoder/ih264e_time_stamp.c" + "${AVC_ROOT}/encoder/ih264e_utils.c" + "${AVC_ROOT}/encoder/ih264e_version.c" + "${AVC_ROOT}/encoder/ime.c" + "${AVC_ROOT}/encoder/ime_distortion_metrics.c" + "${AVC_ROOT}/encoder/irc_bit_allocation.c" + "${AVC_ROOT}/encoder/irc_cbr_buffer_control.c" + "${AVC_ROOT}/encoder/irc_est_sad.c" + "${AVC_ROOT}/encoder/irc_fixed_point_error_bits.c" + "${AVC_ROOT}/encoder/irc_frame_info_collector.c" + "${AVC_ROOT}/encoder/irc_mb_model_based.c" + "${AVC_ROOT}/encoder/irc_picture_type.c" + "${AVC_ROOT}/encoder/irc_rate_control_api.c" + "${AVC_ROOT}/encoder/irc_rd_model.c" + "${AVC_ROOT}/encoder/irc_vbr_storage_vbv.c" + "${AVC_ROOT}/encoder/irc_vbr_str_prms.c" + "${AVC_ROOT}/encoder/svc/irc_svc_rate_control_api.c" + "${AVC_ROOT}/encoder/svc/isvce_api.c" + "${AVC_ROOT}/encoder/svc/isvce_cabac.c" + "${AVC_ROOT}/encoder/svc/isvce_cabac_encode.c" + "${AVC_ROOT}/encoder/svc/isvce_cabac_init.c" + "${AVC_ROOT}/encoder/svc/isvce_cavlc.c" + "${AVC_ROOT}/encoder/svc/isvce_core_coding.c" + "${AVC_ROOT}/encoder/svc/isvce_deblk.c" + "${AVC_ROOT}/encoder/svc/isvce_downscaler.c" + "${AVC_ROOT}/encoder/svc/isvce_encode.c" + "${AVC_ROOT}/encoder/svc/isvce_encode_header.c" + "${AVC_ROOT}/encoder/svc/isvce_fmt_conv.c" + "${AVC_ROOT}/encoder/svc/isvce_function_selector_generic.c" + "${AVC_ROOT}/encoder/svc/isvce_globals.c" + "${AVC_ROOT}/encoder/svc/isvce_ibl_eval.c" + "${AVC_ROOT}/encoder/svc/isvce_ilp_mv.c" + "${AVC_ROOT}/encoder/svc/isvce_intra_modes_eval.c" + "${AVC_ROOT}/encoder/svc/isvce_mc.c" + "${AVC_ROOT}/encoder/svc/isvce_me.c" + "${AVC_ROOT}/encoder/svc/isvce_mode_stat_visualiser.c" + "${AVC_ROOT}/encoder/svc/isvce_nalu_stat_aggregator.c" + "${AVC_ROOT}/encoder/svc/isvce_process.c" + "${AVC_ROOT}/encoder/svc/isvce_rate_control.c" + "${AVC_ROOT}/encoder/svc/isvce_rc_mem_interface.c" + "${AVC_ROOT}/encoder/svc/isvce_rc_utils.c" + "${AVC_ROOT}/encoder/svc/isvce_residual_pred.c" + "${AVC_ROOT}/encoder/svc/isvce_sub_pic_rc.c" + "${AVC_ROOT}/encoder/svc/isvce_utils.c" + "${AVC_ROOT}/encoder/psnr.c") + +include_directories(${AVC_ROOT}/encoder) +include_directories(${AVC_ROOT}/encoder/svc) + +if("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "aarch64" OR + "${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "aarch32") + list( + APPEND + LIBSVCENC_ASMS + "${AVC_ROOT}/encoder/arm/svc/isvce_function_selector.c" + "${AVC_ROOT}/encoder/arm/svc/isvce_function_selector_a9q.c" + "${AVC_ROOT}/encoder/arm/svc/isvce_function_selector_av8.c" + "${AVC_ROOT}/encoder/arm/svc/isvce_downscaler_neon.c" + "${AVC_ROOT}/encoder/arm/svc/isvce_rc_utils_neon.c" + "${AVC_ROOT}/encoder/arm/svc/isvce_residual_pred_neon.c") + + include_directories(${AVC_ROOT}/encoder/arm) + include_directories(${AVC_ROOT}/encoder/arm/svc) +endif() + +if(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "aarch64") + list( + APPEND + LIBSVCENC_ASMS + "${AVC_ROOT}/encoder/armv8/ih264e_evaluate_intra16x16_modes_av8.s" + "${AVC_ROOT}/encoder/armv8/ih264e_evaluate_intra_chroma_modes_av8.s" + "${AVC_ROOT}/encoder/armv8/ih264e_half_pel_av8.s" + "${AVC_ROOT}/encoder/armv8/ime_distortion_metrics_av8.s") + + include_directories(${AVC_ROOT}/encoder/armv8) +elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "aarch32") + list( + APPEND + LIBSVCENC_ASMS + "${AVC_ROOT}/encoder/arm/ih264e_evaluate_intra16x16_modes_a9q.s" + "${AVC_ROOT}/encoder/arm/ih264e_evaluate_intra4x4_modes_a9q.s" + "${AVC_ROOT}/encoder/arm/ih264e_evaluate_intra_chroma_modes_a9q.s" + "${AVC_ROOT}/encoder/arm/ih264e_fmt_conv.s" + "${AVC_ROOT}/encoder/arm/ih264e_half_pel.s" + "${AVC_ROOT}/encoder/arm/ime_distortion_metrics_a9q.s") +else() + list( + APPEND + LIBSVCENC_SRCS + "${AVC_ROOT}/encoder/x86/ih264e_function_selector.c" + "${AVC_ROOT}/encoder/x86/ih264e_function_selector_sse42.c" + "${AVC_ROOT}/encoder/x86/ih264e_function_selector_ssse3.c" + "${AVC_ROOT}/encoder/x86/ih264e_half_pel_ssse3.c" + "${AVC_ROOT}/encoder/x86/ih264e_intra_modes_eval_ssse3.c" + "${AVC_ROOT}/encoder/x86/ime_distortion_metrics_sse42.c" + "${AVC_ROOT}/encoder/x86/svc/isvce_downscaler_sse42.c" + "${AVC_ROOT}/encoder/x86/svc/isvce_function_selector.c" + "${AVC_ROOT}/encoder/x86/svc/isvce_function_selector_sse42.c" + "${AVC_ROOT}/encoder/x86/svc/isvce_function_selector_ssse3.c" + "${AVC_ROOT}/encoder/x86/svc/isvce_rc_utils_sse42.c" + "${AVC_ROOT}/encoder/x86/svc/isvce_residual_pred_sse42.c") + + include_directories(${AVC_ROOT}/encoder/x86) + include_directories(${AVC_ROOT}/encoder/x86/svc) +endif() + +add_library(libsvcenc STATIC ${LIBAVC_COMMON_SRCS} ${LIBAVC_COMMON_ASMS} + ${LIBSVCENC_SRCS} ${LIBSVCENC_ASMS}) + +target_compile_definitions(libsvcenc PRIVATE N_MB_ENABLE) diff --git a/encoder/x86/svc/isvce_downscaler_sse42.c b/encoder/x86/svc/isvce_downscaler_sse42.c new file mode 100644 index 0000000..0055590 --- /dev/null +++ b/encoder/x86/svc/isvce_downscaler_sse42.c @@ -0,0 +1,652 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** +****************************************************************************** +* @file isvce_downscaler_sse42.c +* +* @brief +* This file contains the x86 SIMD version of the function which does +* horizontal scaling and transpose +* +* @author +* Ittiam +* +* @par List of Functions: +* - isvce_horizontal_downscale_and_transpose_sse42() +* +* @remarks +* None +* +******************************************************************************* +*/ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System include files */ +#include +#include +#include + +/* User include files */ +#include "ih264_typedefs.h" +#include "isvc_macros.h" +#include "ih264_platform_macros.h" +#include "isvc_defs.h" +#include "isvce_defs.h" +#include "isvc_structs.h" +#include "isvce_downscaler_private_defs.h" + +/*****************************************************************************/ +/* Function Definitions */ +/*****************************************************************************/ + +/** +******************************************************************************* +* +* @brief +* horizontal scaler function +* +* @par Description: +* Does horizontal scaling for the given block +* +* @param[in] ps_scaler +* pointer to downscaler context +* +* @param[in] ps_src +* pointer to source buffer container +* +* @param[in] ps_dst +* pointer to destination buffer container +* +* @param[in] pai1_filters +* pointer to array of downscaler filters +* +* @param[in] u4_blk_wd +* width of the block after horizontal scaling (output block width) +* +* @param[in] u4_blk_ht +* height of the current block (input block height) +* +* @param[in] u1_is_chroma +* flag suggesting whether the buffer is luma or chroma +* +* +* @returns +* +* @remarks +* The same function is used for vertical scaling too as +* the horizontally scaled input in stored in transpose fashion. +* +******************************************************************************* +*/ + +void isvce_horizontal_downscale_and_transpose_sse42( + downscaler_ctxt_t *ps_scaler, buffer_container_t *ps_src, buffer_container_t *ps_dst, + FILTER_COEFF_ARRAY pai1_filters, UWORD32 u4_blk_wd, UWORD32 u4_blk_ht, UWORD8 u1_is_chroma) +{ + WORD32 i, j; + UWORD8 u1_phase; + UWORD8 *pu1_src_j, *pu1_dst_j; + WORD32 i4_temp_pixel_holder; + UWORD32 u4_num_iterations_vertical_by_16; + UWORD32 u4_rem_vert_loop; + UWORD8 *pu1_in_pixel; + UWORD8 *pu1_out_pixel; + WORD8 *pi1_filter_for_grid; + UWORD16 u2_full_pixel_inc; + + __m128i src_temp_0, src_temp_1, src_temp_2, src_temp_3, src_temp_4, src_temp_5, src_temp_6, + src_temp_7; + + __m128i reg_all_1s, reg_64val_32bit, reg_all_0s, filt_coeff_grid, reg_shuffle; + + __m128i reg_01_16x8b, reg_02_16x8b, reg_03_16x8b, reg_04_16x8b, reg_05_16x8b; + + downscaler_state_t *ps_scaler_state = (downscaler_state_t *) ps_scaler->pv_scaler_state; + + UWORD32 u4_center_pixel_pos = ps_scaler_state->i4_init_offset; + UWORD32 u4_src_vert_increments = ps_scaler_state->u4_vert_increment; + UWORD32 u4_src_horz_increments = ps_scaler_state->u4_horz_increment; + + UWORD8 *pu1_src = ps_src->pv_data; + UWORD32 u4_in_stride = ps_src->i4_data_stride; + UWORD8 *pu1_dst = ps_dst->pv_data; + UWORD32 u4_out_stride = ps_dst->i4_data_stride; + UWORD32 u4_center_pixel_pos_src = u4_center_pixel_pos; + + ASSERT((1 << DOWNSCALER_Q) == u4_src_vert_increments); + + reg_all_1s = _mm_set1_epi16((short) 1); + reg_64val_32bit = _mm_set1_epi32((int) 64); + reg_all_0s = _mm_setzero_si128(); + reg_shuffle = _mm_set_epi8(15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0); + + u4_num_iterations_vertical_by_16 = u4_blk_ht >> 4; + u4_rem_vert_loop = u4_blk_ht % 16; + + /* Offset the input so that the input pixel to be processed + co-incides with the centre of filter (4th coefficient)*/ + pu1_src += (1 + u1_is_chroma); + + if(!u1_is_chroma) + { + for(j = 0; j < (WORD32) u4_num_iterations_vertical_by_16; j++) + { + pu1_src_j = pu1_src + ((j << 4) * u4_in_stride); + pu1_dst_j = pu1_dst + (j << 4); + + u4_center_pixel_pos = u4_center_pixel_pos_src; + + for(i = 0; i < (WORD32) u4_blk_wd; i++) + { + u1_phase = get_filter_phase(u4_center_pixel_pos); + pi1_filter_for_grid = pai1_filters[u1_phase]; + + u2_full_pixel_inc = u4_center_pixel_pos >> DOWNSCALER_Q; + + pu1_in_pixel = pu1_src_j + (u2_full_pixel_inc << u1_is_chroma); + + pu1_out_pixel = pu1_dst_j + ((i << u1_is_chroma) * u4_out_stride); + + filt_coeff_grid = _mm_loadu_si128((__m128i *) pi1_filter_for_grid); + /******************************************************/ + /* This loop is going vertically in bottom direction */ + /* but the output pixels are stored in horizontal */ + /* direction in transpose manner */ + /******************************************************/ + + /*For row 0,1*/ + src_temp_0 = _mm_loadl_epi64((__m128i *) pu1_in_pixel); + src_temp_1 = _mm_loadl_epi64((__m128i *) (pu1_in_pixel + u4_in_stride)); + /*next transfer the 8 pixels from temp_2 to temp_1 higher bits 64-127*/ + src_temp_0 = _mm_unpacklo_epi64(src_temp_0, src_temp_1); + + /*For row 2,3*/ + src_temp_2 = _mm_loadl_epi64((__m128i *) (pu1_in_pixel + u4_in_stride * 2)); + + src_temp_3 = _mm_loadl_epi64((__m128i *) (pu1_in_pixel + u4_in_stride * 3)); + + src_temp_2 = _mm_unpacklo_epi64(src_temp_2, src_temp_3); + + reg_01_16x8b = _mm_maddubs_epi16(src_temp_0, filt_coeff_grid); + + /*multiply with filter coeffs to get 16 bit results*/ + reg_02_16x8b = _mm_maddubs_epi16(src_temp_2, filt_coeff_grid); + + reg_01_16x8b = _mm_hadd_epi16(reg_01_16x8b, reg_02_16x8b); + /*add adjacent 16 bit values to get 32 bit values*/ + reg_01_16x8b = _mm_madd_epi16(reg_01_16x8b, reg_all_1s); + + /*Add offset of 64 for rounding each out pixel value*/ + reg_01_16x8b = _mm_add_epi32(reg_01_16x8b, reg_64val_32bit); + /*Divide by 128 each out pixel value*/ + reg_01_16x8b = _mm_srli_epi32(reg_01_16x8b, 7); + + /*For row 4,5*/ + src_temp_4 = _mm_loadl_epi64((__m128i *) (pu1_in_pixel + u4_in_stride * 4)); + + src_temp_5 = _mm_loadl_epi64((__m128i *) (pu1_in_pixel + u4_in_stride * 5)); + + src_temp_4 = _mm_unpacklo_epi64(src_temp_4, src_temp_5); + + /*For row 6,7*/ + src_temp_6 = _mm_loadl_epi64((__m128i *) (pu1_in_pixel + u4_in_stride * 6)); + + src_temp_7 = _mm_loadl_epi64((__m128i *) (pu1_in_pixel + u4_in_stride * 7)); + + src_temp_6 = _mm_unpacklo_epi64(src_temp_6, src_temp_7); + + reg_03_16x8b = _mm_maddubs_epi16(src_temp_4, filt_coeff_grid); + + reg_04_16x8b = _mm_maddubs_epi16(src_temp_6, filt_coeff_grid); + + reg_03_16x8b = _mm_hadd_epi16(reg_03_16x8b, reg_04_16x8b); + + reg_03_16x8b = _mm_madd_epi16(reg_03_16x8b, reg_all_1s); + + /*next add 2 adjacent 32 bit values to get a single 32 bit + **value in each row + */ + + /*Add offset of 64 for rounding each out pixel value*/ + reg_03_16x8b = _mm_add_epi32(reg_03_16x8b, reg_64val_32bit); + /*Divide by 128 each out pixel value*/ + reg_03_16x8b = _mm_srli_epi32(reg_03_16x8b, 7); + + /*pack the lower 16 bit values corresponding to the 8 output + pixels from reg1 and reg 2*/ + reg_01_16x8b = _mm_packus_epi32(reg_01_16x8b, reg_03_16x8b); + + /*For row 8,9*/ + src_temp_0 = _mm_loadl_epi64((__m128i *) (pu1_in_pixel + 8 * u4_in_stride)); + + src_temp_1 = _mm_loadl_epi64((__m128i *) (pu1_in_pixel + 9 * u4_in_stride)); + + /*next transfer the 8 pixels from temp_2 to temp_1 higher bits 64-127*/ + src_temp_0 = _mm_unpacklo_epi64(src_temp_0, src_temp_1); + + /*For row 10,11*/ + src_temp_2 = _mm_loadl_epi64((__m128i *) (pu1_in_pixel + u4_in_stride * 10)); + + src_temp_3 = _mm_loadl_epi64((__m128i *) (pu1_in_pixel + u4_in_stride * 11)); + + src_temp_2 = _mm_unpacklo_epi64(src_temp_2, src_temp_3); + + reg_02_16x8b = _mm_maddubs_epi16(src_temp_0, filt_coeff_grid); + + /*multiply with filter coeffs to get 16 bit results*/ + reg_03_16x8b = _mm_maddubs_epi16(src_temp_2, filt_coeff_grid); + + reg_02_16x8b = _mm_hadd_epi16(reg_02_16x8b, reg_03_16x8b); + /*add adjacent 16 bit values to get 32 bit values*/ + reg_02_16x8b = _mm_madd_epi16(reg_02_16x8b, reg_all_1s); + + /*next add 2 adjacent 32 bit values to get a single + 32 bit value in each row*/ + + /*Add offset of 64 for rounding each out pixel value*/ + reg_02_16x8b = _mm_add_epi32(reg_02_16x8b, reg_64val_32bit); + /*Divide by 128 each out pixel value*/ + reg_02_16x8b = _mm_srli_epi32(reg_02_16x8b, 7); + + /*For row 12,13*/ + src_temp_4 = _mm_loadl_epi64((__m128i *) (pu1_in_pixel + u4_in_stride * 12)); + + src_temp_5 = _mm_loadl_epi64((__m128i *) (pu1_in_pixel + u4_in_stride * 13)); + + src_temp_4 = _mm_unpacklo_epi64(src_temp_4, src_temp_5); + + /*For row 14,15*/ + src_temp_6 = _mm_loadl_epi64((__m128i *) (pu1_in_pixel + u4_in_stride * 14)); + + src_temp_7 = _mm_loadl_epi64((__m128i *) (pu1_in_pixel + u4_in_stride * 15)); + + src_temp_6 = _mm_unpacklo_epi64(src_temp_6, src_temp_7); + + reg_04_16x8b = _mm_maddubs_epi16(src_temp_4, filt_coeff_grid); + + reg_05_16x8b = _mm_maddubs_epi16(src_temp_6, filt_coeff_grid); + + reg_04_16x8b = _mm_hadd_epi16(reg_04_16x8b, reg_05_16x8b); + /*add adjacent 16 bit values to get 32 bit values*/ + reg_04_16x8b = _mm_madd_epi16(reg_04_16x8b, reg_all_1s); + + /*next add 2 adjacent 32 bit values to get a single + 32 bit value in each row*/ + + /*Add offset of 64 for rounding each out pixel value*/ + reg_04_16x8b = _mm_add_epi32(reg_04_16x8b, reg_64val_32bit); + /*Divide by 128 each out pixel value*/ + reg_04_16x8b = _mm_srli_epi32(reg_04_16x8b, 7); + + /*pack the lower 16 bit values corresponding to the 8 output + pixels from reg1 and reg 2*/ + reg_02_16x8b = _mm_packus_epi32(reg_02_16x8b, reg_04_16x8b); + + /*next get saturated 8 bit output pixel values for row 0-15*/ + reg_01_16x8b = _mm_packus_epi16(reg_01_16x8b, reg_02_16x8b); + + /*Store the 16 output values*/ + _mm_storeu_si128((__m128i *) pu1_out_pixel, reg_01_16x8b); + + pu1_out_pixel += 16; + + pu1_in_pixel += ((u4_src_vert_increments * (u4_in_stride << 4)) >> DOWNSCALER_Q); + + /* Update the context for next Loop Count */ + u4_center_pixel_pos += u4_src_horz_increments; + } + } + + /*if height is not a multiple of 8 process 2 rows at a + time for the remaining rows*/ + if(u4_rem_vert_loop) + { + pu1_src_j = pu1_src + ((j << 4) * u4_in_stride); + pu1_dst_j = pu1_dst + (j << 4); + + u4_center_pixel_pos = u4_center_pixel_pos_src; + + for(i = 0; i < (WORD32) u4_blk_wd; i++) + { + u1_phase = get_filter_phase(u4_center_pixel_pos); + pi1_filter_for_grid = pai1_filters[u1_phase]; + + u2_full_pixel_inc = u4_center_pixel_pos >> DOWNSCALER_Q; + + pu1_in_pixel = pu1_src_j + (u2_full_pixel_inc << u1_is_chroma); + + pu1_out_pixel = pu1_dst_j + ((i << u1_is_chroma) * u4_out_stride); + + filt_coeff_grid = _mm_loadu_si128((__m128i *) pi1_filter_for_grid); + + for(j = u4_rem_vert_loop; j > 0; j--) + { + src_temp_0 = _mm_loadl_epi64((__m128i const *) pu1_in_pixel); + + src_temp_0 = _mm_maddubs_epi16(src_temp_0, filt_coeff_grid); + + src_temp_0 = _mm_madd_epi16(src_temp_0, reg_all_1s); + + reg_01_16x8b = _mm_hadd_epi32(src_temp_0, reg_all_0s); + + /*Add offset of 64 for rounding each out pixel value*/ + reg_01_16x8b = _mm_add_epi32(reg_01_16x8b, reg_64val_32bit); + /*Divide by 128 each out pixel value*/ + reg_01_16x8b = _mm_srli_epi32(reg_01_16x8b, (int) 7); + + reg_01_16x8b = _mm_packus_epi32(reg_01_16x8b, reg_all_0s); + + /*next get saturated 8 bit output pixel values*/ + reg_01_16x8b = _mm_packus_epi16(reg_01_16x8b, reg_all_0s); + + /*Store the 1 output value*/ + *pu1_out_pixel = (UWORD8) _mm_cvtsi128_si32(reg_01_16x8b); + + pu1_in_pixel += (u4_src_vert_increments * u4_in_stride) >> DOWNSCALER_Q; + + pu1_out_pixel++; + } + /* Update the context for next Loop Count */ + u4_center_pixel_pos += u4_src_horz_increments; + } + } + } + + else /* for chroma */ + { + for(j = 0; j < (WORD32) u4_num_iterations_vertical_by_16; j++) + { + pu1_src_j = pu1_src + ((j << 4) * u4_in_stride); + pu1_dst_j = pu1_dst + (j << 4); + + u4_center_pixel_pos = u4_center_pixel_pos_src; + + for(i = 0; i < (WORD32) u4_blk_wd; i++) + { + u1_phase = get_filter_phase(u4_center_pixel_pos); + pi1_filter_for_grid = pai1_filters[u1_phase]; + + u2_full_pixel_inc = u4_center_pixel_pos >> DOWNSCALER_Q; + + pu1_in_pixel = pu1_src_j + (u2_full_pixel_inc << u1_is_chroma); + + pu1_out_pixel = pu1_dst_j + ((i << u1_is_chroma) * u4_out_stride); + + filt_coeff_grid = _mm_loadu_si128((__m128i *) pi1_filter_for_grid); + /******************************************************/ + /* This loop is going vertically in bottom direction */ + /* but the output pixels are stored in horizontal */ + /* direction in transpose manner */ + /******************************************************/ + + /*Load 16 values shuffle to separate Cb and Cr and process*/ + + src_temp_0 = _mm_loadu_si128((__m128i *) pu1_in_pixel); + src_temp_1 = _mm_loadu_si128((__m128i *) (pu1_in_pixel + u4_in_stride)); + + src_temp_2 = _mm_loadu_si128((__m128i *) (pu1_in_pixel + u4_in_stride * 2)); + + src_temp_3 = _mm_loadu_si128((__m128i *) (pu1_in_pixel + u4_in_stride * 3)); + + src_temp_0 = _mm_shuffle_epi8(src_temp_0, reg_shuffle); + src_temp_1 = _mm_shuffle_epi8(src_temp_1, reg_shuffle); + src_temp_2 = _mm_shuffle_epi8(src_temp_2, reg_shuffle); + src_temp_3 = _mm_shuffle_epi8(src_temp_3, reg_shuffle); + + reg_01_16x8b = _mm_maddubs_epi16(src_temp_0, filt_coeff_grid); + reg_02_16x8b = _mm_maddubs_epi16(src_temp_1, filt_coeff_grid); + + reg_01_16x8b = _mm_hadd_epi16(reg_01_16x8b, reg_02_16x8b); + + reg_01_16x8b = _mm_madd_epi16(reg_01_16x8b, reg_all_1s); + + reg_01_16x8b = _mm_add_epi32(reg_01_16x8b, reg_64val_32bit); + + reg_01_16x8b = _mm_srli_epi32(reg_01_16x8b, (int) 7); + + reg_03_16x8b = _mm_maddubs_epi16(src_temp_2, filt_coeff_grid); + reg_04_16x8b = _mm_maddubs_epi16(src_temp_3, filt_coeff_grid); + + src_temp_4 = _mm_loadu_si128((__m128i *) (pu1_in_pixel + u4_in_stride * 4)); + + src_temp_5 = _mm_loadu_si128((__m128i *) (pu1_in_pixel + u4_in_stride * 5)); + + src_temp_6 = _mm_loadu_si128((__m128i *) (pu1_in_pixel + u4_in_stride * 6)); + + src_temp_7 = _mm_loadu_si128((__m128i *) (pu1_in_pixel + u4_in_stride * 7)); + + src_temp_4 = _mm_shuffle_epi8(src_temp_4, reg_shuffle); + src_temp_5 = _mm_shuffle_epi8(src_temp_5, reg_shuffle); + src_temp_6 = _mm_shuffle_epi8(src_temp_6, reg_shuffle); + src_temp_7 = _mm_shuffle_epi8(src_temp_7, reg_shuffle); + + reg_03_16x8b = _mm_hadd_epi16(reg_03_16x8b, reg_04_16x8b); + + reg_03_16x8b = _mm_madd_epi16(reg_03_16x8b, reg_all_1s); + + reg_03_16x8b = _mm_add_epi32(reg_03_16x8b, reg_64val_32bit); + + reg_03_16x8b = _mm_srli_epi32(reg_03_16x8b, (int) 7); + + reg_01_16x8b = _mm_packus_epi32(reg_01_16x8b, reg_03_16x8b); + + reg_02_16x8b = _mm_maddubs_epi16(src_temp_4, filt_coeff_grid); + reg_04_16x8b = _mm_maddubs_epi16(src_temp_5, filt_coeff_grid); + + reg_02_16x8b = _mm_hadd_epi16(reg_02_16x8b, reg_04_16x8b); + + reg_02_16x8b = _mm_madd_epi16(reg_02_16x8b, reg_all_1s); + + reg_02_16x8b = _mm_add_epi32(reg_02_16x8b, reg_64val_32bit); + + reg_02_16x8b = _mm_srli_epi32(reg_02_16x8b, (int) 7); + + reg_03_16x8b = _mm_maddubs_epi16(src_temp_6, filt_coeff_grid); + reg_04_16x8b = _mm_maddubs_epi16(src_temp_7, filt_coeff_grid); + + reg_03_16x8b = _mm_hadd_epi16(reg_03_16x8b, reg_04_16x8b); + + reg_03_16x8b = _mm_madd_epi16(reg_03_16x8b, reg_all_1s); + + reg_03_16x8b = _mm_add_epi32(reg_03_16x8b, reg_64val_32bit); + + reg_03_16x8b = _mm_srli_epi32(reg_03_16x8b, (int) 7); + + reg_02_16x8b = _mm_packus_epi32(reg_02_16x8b, reg_03_16x8b); + + reg_01_16x8b = _mm_packus_epi16(reg_01_16x8b, reg_02_16x8b); + + reg_01_16x8b = _mm_shuffle_epi8(reg_01_16x8b, reg_shuffle); + + src_temp_0 = _mm_loadu_si128((__m128i *) (pu1_in_pixel + 8 * u4_in_stride)); + + src_temp_1 = _mm_loadu_si128((__m128i *) (pu1_in_pixel + 9 * u4_in_stride)); + + src_temp_2 = _mm_loadu_si128((__m128i *) (pu1_in_pixel + u4_in_stride * 10)); + + src_temp_3 = _mm_loadu_si128((__m128i *) (pu1_in_pixel + u4_in_stride * 11)); + + src_temp_0 = _mm_shuffle_epi8(src_temp_0, reg_shuffle); + src_temp_1 = _mm_shuffle_epi8(src_temp_1, reg_shuffle); + src_temp_2 = _mm_shuffle_epi8(src_temp_2, reg_shuffle); + src_temp_3 = _mm_shuffle_epi8(src_temp_3, reg_shuffle); + + reg_02_16x8b = _mm_maddubs_epi16(src_temp_0, filt_coeff_grid); + reg_03_16x8b = _mm_maddubs_epi16(src_temp_1, filt_coeff_grid); + + reg_02_16x8b = _mm_hadd_epi16(reg_02_16x8b, reg_03_16x8b); + + reg_02_16x8b = _mm_madd_epi16(reg_02_16x8b, reg_all_1s); + + reg_02_16x8b = _mm_add_epi32(reg_02_16x8b, reg_64val_32bit); + + reg_02_16x8b = _mm_srli_epi32(reg_02_16x8b, (int) 7); + + reg_04_16x8b = _mm_maddubs_epi16(src_temp_2, filt_coeff_grid); + reg_05_16x8b = _mm_maddubs_epi16(src_temp_3, filt_coeff_grid); + + src_temp_4 = _mm_loadu_si128((__m128i *) (pu1_in_pixel + u4_in_stride * 12)); + + src_temp_5 = _mm_loadu_si128((__m128i *) (pu1_in_pixel + u4_in_stride * 13)); + + src_temp_6 = _mm_loadu_si128((__m128i *) (pu1_in_pixel + u4_in_stride * 14)); + + src_temp_7 = _mm_loadu_si128((__m128i *) (pu1_in_pixel + u4_in_stride * 15)); + + src_temp_4 = _mm_shuffle_epi8(src_temp_4, reg_shuffle); + src_temp_5 = _mm_shuffle_epi8(src_temp_5, reg_shuffle); + src_temp_6 = _mm_shuffle_epi8(src_temp_6, reg_shuffle); + src_temp_7 = _mm_shuffle_epi8(src_temp_7, reg_shuffle); + + reg_04_16x8b = _mm_hadd_epi16(reg_04_16x8b, reg_05_16x8b); + + reg_04_16x8b = _mm_madd_epi16(reg_04_16x8b, reg_all_1s); + + reg_04_16x8b = _mm_add_epi32(reg_04_16x8b, reg_64val_32bit); + + reg_04_16x8b = _mm_srli_epi32(reg_04_16x8b, (int) 7); + + reg_02_16x8b = _mm_packus_epi32(reg_02_16x8b, reg_04_16x8b); + + reg_03_16x8b = _mm_maddubs_epi16(src_temp_4, filt_coeff_grid); + reg_05_16x8b = _mm_maddubs_epi16(src_temp_5, filt_coeff_grid); + + reg_03_16x8b = _mm_hadd_epi16(reg_03_16x8b, reg_05_16x8b); + + reg_03_16x8b = _mm_madd_epi16(reg_03_16x8b, reg_all_1s); + + reg_03_16x8b = _mm_add_epi32(reg_03_16x8b, reg_64val_32bit); + + reg_03_16x8b = _mm_srli_epi32(reg_03_16x8b, (int) 7); + + reg_04_16x8b = _mm_maddubs_epi16(src_temp_6, filt_coeff_grid); + reg_05_16x8b = _mm_maddubs_epi16(src_temp_7, filt_coeff_grid); + + reg_04_16x8b = _mm_hadd_epi16(reg_04_16x8b, reg_05_16x8b); + + reg_04_16x8b = _mm_madd_epi16(reg_04_16x8b, reg_all_1s); + + reg_04_16x8b = _mm_add_epi32(reg_04_16x8b, reg_64val_32bit); + + reg_04_16x8b = _mm_srli_epi32(reg_04_16x8b, (int) 7); + + reg_03_16x8b = _mm_packus_epi32(reg_03_16x8b, reg_04_16x8b); + + reg_02_16x8b = _mm_packus_epi16(reg_02_16x8b, reg_03_16x8b); + + reg_02_16x8b = _mm_shuffle_epi8(reg_02_16x8b, reg_shuffle); + + reg_03_16x8b = _mm_unpacklo_epi64(reg_01_16x8b, reg_02_16x8b); + + reg_04_16x8b = _mm_unpackhi_epi64(reg_01_16x8b, reg_02_16x8b); + + /*Storing after shuffling again*/ + + _mm_storeu_si128((__m128i *) pu1_out_pixel, reg_03_16x8b); + _mm_storeu_si128((__m128i *) (pu1_out_pixel + u4_out_stride), reg_04_16x8b); + + pu1_out_pixel += 16; + + pu1_in_pixel += (u4_src_vert_increments * (u4_in_stride << 4)) >> DOWNSCALER_Q; + + /* Update the context for next Loop Count */ + u4_center_pixel_pos += u4_src_horz_increments; + } + } + + /*if height is not a multiple of 8 process 2 rows at a + time for the remaining rows*/ + if(u4_rem_vert_loop) + { + pu1_src_j = pu1_src + ((j << 4) * u4_in_stride); + pu1_dst_j = pu1_dst + (j << 4); + + u4_center_pixel_pos = u4_center_pixel_pos_src; + for(i = 0; i < (WORD32) u4_blk_wd; i++) + { + UWORD8 u1_phase = get_filter_phase(u4_center_pixel_pos); + pi1_filter_for_grid = pai1_filters[u1_phase]; + + u2_full_pixel_inc = u4_center_pixel_pos >> DOWNSCALER_Q; + + pu1_in_pixel = pu1_src_j + (u2_full_pixel_inc << u1_is_chroma); + + pu1_out_pixel = pu1_dst_j + ((i << u1_is_chroma) * u4_out_stride); + + filt_coeff_grid = _mm_loadu_si128((__m128i *) pi1_filter_for_grid); + + for(j = u4_rem_vert_loop; j > 0; j = j - 2) + { + src_temp_0 = _mm_loadu_si128((__m128i const *) pu1_in_pixel); + src_temp_0 = _mm_shuffle_epi8(src_temp_0, reg_shuffle); + + src_temp_1 = _mm_loadu_si128((__m128i const *) (pu1_in_pixel + u4_in_stride)); + + src_temp_1 = _mm_shuffle_epi8(src_temp_1, reg_shuffle); + + src_temp_0 = _mm_maddubs_epi16(src_temp_0, filt_coeff_grid); + src_temp_1 = _mm_maddubs_epi16(src_temp_1, filt_coeff_grid); + + reg_01_16x8b = _mm_hadd_epi16(src_temp_0, src_temp_1); + + reg_01_16x8b = _mm_madd_epi16(reg_01_16x8b, reg_all_1s); + + /*Add offset of 64 for rounding each out pixel value*/ + reg_01_16x8b = _mm_add_epi32(reg_01_16x8b, reg_64val_32bit); + /*Divide by 128 each out pixel value*/ + reg_01_16x8b = _mm_srli_epi32(reg_01_16x8b, (int) 7); + + reg_01_16x8b = _mm_packus_epi32(reg_01_16x8b, reg_all_0s); + + /*next get saturated 8 bit output pixel values*/ + reg_01_16x8b = _mm_packus_epi16(reg_01_16x8b, reg_all_0s); + + reg_01_16x8b = _mm_shuffle_epi8(reg_01_16x8b, reg_shuffle); + + reg_02_16x8b = _mm_srli_si128(reg_01_16x8b, (int) 8); + + /*Store the 2 output values*/ + i4_temp_pixel_holder = _mm_cvtsi128_si32(reg_01_16x8b); + + *pu1_out_pixel = (UWORD8) i4_temp_pixel_holder; + i4_temp_pixel_holder >>= 8; + + *(pu1_out_pixel + 1) = (UWORD8) i4_temp_pixel_holder; + + i4_temp_pixel_holder = _mm_cvtsi128_si32(reg_02_16x8b); + + *(pu1_out_pixel + u4_out_stride) = (UWORD8) i4_temp_pixel_holder; + i4_temp_pixel_holder >>= 8; + + *(pu1_out_pixel + u4_out_stride + 1) = (UWORD8) i4_temp_pixel_holder; + + pu1_in_pixel += (u4_src_vert_increments * (u4_in_stride << 1)) >> DOWNSCALER_Q; + pu1_out_pixel += 2; + } + /* Update the context for next Loop Count */ + u4_center_pixel_pos += u4_src_horz_increments; + } + } + } +} diff --git a/encoder/x86/svc/isvce_function_selector.c b/encoder/x86/svc/isvce_function_selector.c new file mode 100644 index 0000000..7ff22a2 --- /dev/null +++ b/encoder/x86/svc/isvce_function_selector.c @@ -0,0 +1,136 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** +******************************************************************************* +* @file +* isvce_function_selector.c +* +* @brief +* Contains functions to initialize function pointers used in h264 +* +* @author +* Ittiam +* +* @par List of Functions: +* +* @remarks +* None +* +******************************************************************************* +*/ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System Include Files */ +#include +#include +#include +#include + +/* User Include Files */ +#include "ih264_typedefs.h" +#include "iv2.h" +#include "ive2.h" +#include "isvc_defs.h" +#include "ih264_size_defs.h" +#include "isvce_defs.h" +#include "ih264e_error.h" +#include "ih264e_bitstream.h" +#include "ime_distortion_metrics.h" +#include "ime_defs.h" +#include "ime_structs.h" +#include "ih264_error.h" +#include "isvc_structs.h" +#include "isvc_trans_quant_itrans_iquant.h" +#include "isvc_inter_pred_filters.h" +#include "isvc_mem_fns.h" +#include "ih264_padding.h" +#include "ih264_intra_pred_filters.h" +#include "ih264_deblk_edge_filters.h" +#include "isvc_cabac_tables.h" +#include "isvc_macros.h" +#include "ih264_platform_macros.h" +#include "irc_cntrl_param.h" +#include "irc_frame_info_collector.h" +#include "isvce_rate_control.h" +#include "isvce_cabac_structs.h" +#include "isvce_structs.h" +#include "isvce_cabac.h" +#include "isvce_platform_macros.h" + +/** +******************************************************************************* +* +* @brief Initialize the intra/inter/transform/deblk function pointers of +* codec context +* +* @par Description: the current routine initializes the function pointers of +* codec context basing on the architecture in use +* +* @param[in] ps_codec +* Codec context pointer +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +void isvce_init_function_ptr(isvce_codec_t *ps_codec) +{ + isvce_init_function_ptr_generic(ps_codec); + + switch(ps_codec->s_cfg.e_arch) + { + case ARCH_X86_GENERIC: + isvce_init_function_ptr_generic(ps_codec); + break; + case ARCH_X86_SSSE3: + isvce_init_function_ptr_ssse3(ps_codec); + break; + case ARCH_X86_SSE42: + default: + isvce_init_function_ptr_ssse3(ps_codec); + isvce_init_function_ptr_sse42(ps_codec); + break; + } +} + +/** +******************************************************************************* +* +* @brief Determine the architecture of the encoder executing environment +* +* @par Description: This routine returns the architecture of the enviro- +* ment in which the current encoder is being tested +* +* @param[in] void +* +* @returns IV_ARCH_T +* architecture +* +* @remarks none +* +******************************************************************************* +*/ +IV_ARCH_T isvce_default_arch(void) { return ARCH_X86_SSE42; } diff --git a/encoder/x86/svc/isvce_function_selector_sse42.c b/encoder/x86/svc/isvce_function_selector_sse42.c new file mode 100644 index 0000000..709155f --- /dev/null +++ b/encoder/x86/svc/isvce_function_selector_sse42.c @@ -0,0 +1,169 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ +/** +******************************************************************************* +* @file +* isvce_function_selector_sse42.c +* +* @brief +* Contains functions to initialize function pointers of codec context +* +* @author +* Ittiam +* +* @par List of Functions: +* - isvce_init_function_ptr_sse42 +* +* @remarks +* None +* +******************************************************************************* +*/ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System Include files */ +#include +#include +#include +#include + +/* User Include files */ +#include "ih264_typedefs.h" +#include "iv2.h" +#include "ive2.h" +#include "isvc_defs.h" +#include "ih264_size_defs.h" +#include "isvce_defs.h" +#include "ih264e_error.h" +#include "ih264e_bitstream.h" +#include "ime_distortion_metrics.h" +#include "ime_defs.h" +#include "ime_structs.h" +#include "ih264_error.h" +#include "isvc_structs.h" +#include "isvc_trans_quant_itrans_iquant.h" +#include "isvc_inter_pred_filters.h" +#include "isvc_mem_fns.h" +#include "ih264_padding.h" +#include "ih264_intra_pred_filters.h" +#include "ih264_deblk_edge_filters.h" +#include "isvc_cabac_tables.h" +#include "irc_cntrl_param.h" +#include "irc_frame_info_collector.h" +#include "isvce_rate_control.h" +#include "isvce_cabac_structs.h" +#include "isvce_structs.h" +#include "isvce_cabac.h" +#include "ih264e_platform_macros.h" +#include "isvce_core_coding.h" +#include "ih264_cavlc_tables.h" +#include "isvce_cavlc.h" +#include "ih264e_intra_modes_eval.h" +#include "ih264e_fmt_conv.h" +#include "ih264e_half_pel.h" + +/** +******************************************************************************* +* +* @brief Initialize the intra/inter/transform/deblk function pointers of +* codec context +* +* @par Description: the current routine initializes the function pointers of +* codec context basing on the architecture in use +* +* @param[in] ps_codec +* Codec context pointer +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +void isvce_init_function_ptr_sse42(isvce_codec_t *ps_codec) +{ + WORD32 i; + isvce_process_ctxt_t *ps_proc = NULL; + isvce_me_ctxt_t *ps_me_ctxt = NULL; + isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns; + enc_loop_fxns_t *ps_enc_loop_fxns = &ps_isa_dependent_fxns->s_enc_loop_fxns; + mem_fxns_t *ps_mem_fxns = &ps_isa_dependent_fxns->s_mem_fxns; + + ps_enc_loop_fxns->pf_hadamard_quant_4x4 = isvc_hadamard_quant_4x4_sse42; + ps_enc_loop_fxns->pf_hadamard_quant_2x2_uv = isvc_hadamard_quant_2x2_uv_sse42; + + ps_enc_loop_fxns->apf_resi_trans_quant_4x4[0] = isvc_resi_trans_quant_4x4_sse42; + ps_enc_loop_fxns->apf_resi_trans_quant_4x4[1] = isvc_resi_trans_quant_4x4_with_res_pred_sse42; + + ps_enc_loop_fxns->apf_resi_trans_quant_chroma_4x4[0] = isvc_resi_trans_quant_chroma_4x4_sse42; + ps_enc_loop_fxns->apf_resi_trans_quant_chroma_4x4[1] = + isvc_resi_trans_quant_chroma_4x4_with_res_pred_sse42; + + ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4[0] = isvc_iquant_itrans_recon_res_4x4_sse42; + ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4[1] = + isvc_iquant_itrans_recon_res_4x4_with_res_acc_sse42; + ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4[2] = isvc_iquant_itrans_recon_4x4_sse42; + + ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4[0] = + isvc_iquant_itrans_recon_res_chroma_4x4_sse42; + ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4[1] = + isvc_iquant_itrans_recon_res_chroma_4x4_with_res_acc_sse42; + ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4[2] = + isvc_iquant_itrans_recon_chroma_4x4_sse42; + + ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4_dc[0] = isvc_iquant_itrans_recon_res_dc_4x4_sse42; + ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4_dc[1] = + isvc_iquant_itrans_recon_res_dc_with_res_acc_4x4_sse42; + ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4_dc[2] = isvc_iquant_itrans_recon_dc_4x4_sse42; + + ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4_dc[0] = + isvc_iquant_itrans_recon_res_chroma_4x4_dc_sse42; + ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4_dc[1] = + isvc_iquant_itrans_recon_res_chroma_4x4_dc_with_res_acc_sse42; + ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4_dc[2] = + isvc_iquant_itrans_recon_chroma_4x4_dc_sse42; + + ps_enc_loop_fxns->pf_ihadamard_scaling_4x4 = ih264_ihadamard_scaling_4x4_sse42; + + /* sad me level functions */ + ps_codec->apf_compute_sad_16x16[0] = ime_compute_sad_16x16_sse42; + ps_codec->apf_compute_sad_16x16[1] = ime_compute_sad_16x16_fast_sse42; + ps_codec->pf_compute_sad_16x8 = ime_compute_sad_16x8_sse42; + + ps_mem_fxns->pf_copy_2d = isvc_copy_2d_ssse3; + ps_mem_fxns->pf_memset_2d = isvc_memset_2d_sse42; + + /* sad me level functions */ + for(i = 0; i < (MAX_PROCESS_CTXT); i++) + { + ps_proc = &ps_codec->as_process[i]; + + ps_me_ctxt = &ps_proc->s_me_ctxt; + ps_me_ctxt->pf_ime_compute_sad_16x16[0] = ime_compute_sad_16x16_sse42; + ps_me_ctxt->pf_ime_compute_sad_16x16[1] = ime_compute_sad_16x16_fast_sse42; + ps_me_ctxt->pf_ime_compute_sad_16x8 = ime_compute_sad_16x8_sse42; + ps_me_ctxt->pf_ime_compute_sad4_diamond = ime_calculate_sad4_prog_sse42; + ps_me_ctxt->pf_ime_sub_pel_compute_sad_16x16 = ime_sub_pel_compute_sad_16x16_sse42; + ps_me_ctxt->pf_ime_compute_sad_stat_luma_16x16 = ime_compute_satqd_16x16_lumainter_sse42; + } +} diff --git a/encoder/x86/svc/isvce_function_selector_ssse3.c b/encoder/x86/svc/isvce_function_selector_ssse3.c new file mode 100644 index 0000000..298b490 --- /dev/null +++ b/encoder/x86/svc/isvce_function_selector_ssse3.c @@ -0,0 +1,182 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ +/** +******************************************************************************* +* @file +* isvce_function_selector_ssse3.c +* +* @brief +* Contains functions to initialize function pointers of codec context +* +* @author +* Ittiam +* +* @par List of Functions: +* - isvce_init_function_ptr_ssse3 +* +* @remarks +* None +* +******************************************************************************* +*/ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System Include files */ +#include +#include +#include +#include + +/* User Include files */ +#include "ih264_typedefs.h" +#include "iv2.h" +#include "ive2.h" +#include "isvc_defs.h" +#include "ih264_size_defs.h" +#include "isvce_defs.h" +#include "ih264e_error.h" +#include "ih264e_bitstream.h" +#include "ime_distortion_metrics.h" +#include "ime_defs.h" +#include "ime_structs.h" +#include "ih264_error.h" +#include "isvc_structs.h" +#include "isvc_trans_quant_itrans_iquant.h" +#include "ih264_inter_pred_filters.h" +#include "ih264_mem_fns.h" +#include "isvc_mem_fns.h" +#include "ih264_padding.h" +#include "ih264_intra_pred_filters.h" +#include "ih264_deblk_edge_filters.h" +#include "isvc_cabac_tables.h" +#include "irc_cntrl_param.h" +#include "irc_frame_info_collector.h" +#include "isvce_rate_control.h" +#include "isvce_cabac_structs.h" +#include "isvce_structs.h" +#include "ih264e_platform_macros.h" +#include "isvce_cabac.h" +#include "isvce_core_coding.h" +#include "ih264_cavlc_tables.h" +#include "isvce_cavlc.h" +#include "ih264e_intra_modes_eval.h" +#include "ih264e_fmt_conv.h" +#include "ih264e_half_pel.h" + +/** +******************************************************************************* +* +* @brief Initialize the intra/inter/transform/deblk function pointers of +* codec context +* +* @par Description: the current routine initializes the function pointers of +* codec context basing on the architecture in use +* +* @param[in] ps_codec +* Codec context pointer +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +void isvce_init_function_ptr_ssse3(isvce_codec_t *ps_codec) +{ + isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns; + inter_pred_fxns_t *ps_inter_pred_fxns = &ps_isa_dependent_fxns->s_inter_pred_fxns; + mem_fxns_t *ps_mem_fxns = &ps_isa_dependent_fxns->s_mem_fxns; + + /* Init function pointers for intra pred leaf level functions luma + * Intra 16x16 */ + ps_codec->apf_intra_pred_16_l[0] = ih264_intra_pred_luma_16x16_mode_vert_ssse3; + ps_codec->apf_intra_pred_16_l[1] = ih264_intra_pred_luma_16x16_mode_horz_ssse3; + ps_codec->apf_intra_pred_16_l[2] = ih264_intra_pred_luma_16x16_mode_dc_ssse3; + ps_codec->apf_intra_pred_16_l[3] = ih264_intra_pred_luma_16x16_mode_plane_ssse3; + + /* Init function pointers for intra pred leaf level functions luma + * Intra 4x4 */ + ps_codec->apf_intra_pred_4_l[0] = ih264_intra_pred_luma_4x4_mode_vert_ssse3; + ps_codec->apf_intra_pred_4_l[1] = ih264_intra_pred_luma_4x4_mode_horz_ssse3; + ps_codec->apf_intra_pred_4_l[2] = ih264_intra_pred_luma_4x4_mode_dc_ssse3; + ps_codec->apf_intra_pred_4_l[3] = ih264_intra_pred_luma_4x4_mode_diag_dl_ssse3; + ps_codec->apf_intra_pred_4_l[4] = ih264_intra_pred_luma_4x4_mode_diag_dr_ssse3; + ps_codec->apf_intra_pred_4_l[5] = ih264_intra_pred_luma_4x4_mode_vert_r_ssse3; + ps_codec->apf_intra_pred_4_l[6] = ih264_intra_pred_luma_4x4_mode_horz_d_ssse3; + ps_codec->apf_intra_pred_4_l[7] = ih264_intra_pred_luma_4x4_mode_vert_l_ssse3; + ps_codec->apf_intra_pred_4_l[8] = ih264_intra_pred_luma_4x4_mode_horz_u_ssse3; + + /* Init function pointers for intra pred leaf level functions luma + * Intra 8x8 */ + ps_codec->apf_intra_pred_8_l[0] = ih264_intra_pred_luma_8x8_mode_vert_ssse3; + ps_codec->apf_intra_pred_8_l[2] = ih264_intra_pred_luma_8x8_mode_dc_ssse3; + ps_codec->apf_intra_pred_8_l[3] = ih264_intra_pred_luma_8x8_mode_diag_dl_ssse3; + ps_codec->apf_intra_pred_8_l[4] = ih264_intra_pred_luma_8x8_mode_diag_dr_ssse3; + ps_codec->apf_intra_pred_8_l[5] = ih264_intra_pred_luma_8x8_mode_vert_r_ssse3; + ps_codec->apf_intra_pred_8_l[6] = ih264_intra_pred_luma_8x8_mode_horz_d_ssse3; + ps_codec->apf_intra_pred_8_l[7] = ih264_intra_pred_luma_8x8_mode_vert_l_ssse3; + ps_codec->apf_intra_pred_8_l[8] = ih264_intra_pred_luma_8x8_mode_horz_u_ssse3; + + /* Init function pointers for intra pred leaf level functions chroma + * Intra 8x8 */ + ps_codec->apf_intra_pred_c[1] = ih264_intra_pred_chroma_8x8_mode_horz_ssse3; + ps_codec->apf_intra_pred_c[2] = ih264_intra_pred_chroma_8x8_mode_vert_ssse3; + ps_codec->apf_intra_pred_c[3] = ih264_intra_pred_chroma_8x8_mode_plane_ssse3; + + /* Init fn ptr luma deblocking */ + ps_codec->pf_deblk_luma_vert_bs4 = ih264_deblk_luma_vert_bs4_ssse3; + ps_codec->pf_deblk_luma_vert_bslt4 = ih264_deblk_luma_vert_bslt4_ssse3; + ps_codec->pf_deblk_luma_horz_bs4 = ih264_deblk_luma_horz_bs4_ssse3; + ps_codec->pf_deblk_luma_horz_bslt4 = ih264_deblk_luma_horz_bslt4_ssse3; + /* Init fn ptr chroma deblocking */ + ps_codec->pf_deblk_chroma_vert_bs4 = ih264_deblk_chroma_vert_bs4_ssse3; + ps_codec->pf_deblk_chroma_vert_bslt4 = ih264_deblk_chroma_vert_bslt4_ssse3; + ps_codec->pf_deblk_chroma_horz_bs4 = ih264_deblk_chroma_horz_bs4_ssse3; + ps_codec->pf_deblk_chroma_horz_bslt4 = ih264_deblk_chroma_horz_bslt4_ssse3; + + /* Padding Functions */ + ps_codec->pf_pad_left_luma = ih264_pad_left_luma_ssse3; + ps_codec->pf_pad_left_chroma = ih264_pad_left_chroma_ssse3; + ps_codec->pf_pad_right_luma = ih264_pad_right_luma_ssse3; + ps_codec->pf_pad_right_chroma = ih264_pad_right_chroma_ssse3; + + /* Inter pred leaf level functions */ + ps_inter_pred_fxns->pf_inter_pred_luma_copy = ih264_inter_pred_luma_copy_ssse3; + ps_inter_pred_fxns->pf_inter_pred_luma_horz = ih264_inter_pred_luma_horz_ssse3; + ps_inter_pred_fxns->pf_inter_pred_luma_vert = ih264_inter_pred_luma_vert_ssse3; + ps_inter_pred_fxns->pf_inter_pred_chroma = ih264_inter_pred_chroma_ssse3; + + /* memory handling operations */ + ps_mem_fxns->pf_mem_cpy_mul8 = ih264_memcpy_mul_8_ssse3; + ps_mem_fxns->pf_mem_set_mul8 = ih264_memset_mul_8_ssse3; + ps_mem_fxns->pf_copy_2d = isvc_copy_2d_ssse3; + + /*intra mode eval -encoder level function*/ + ps_codec->pf_ih264e_evaluate_intra16x16_modes = ih264e_evaluate_intra16x16_modes_ssse3; + ps_codec->pf_ih264e_evaluate_intra_4x4_modes = ih264e_evaluate_intra_4x4_modes_ssse3; + ps_codec->pf_ih264e_evaluate_intra_chroma_modes = ih264e_evaluate_intra_chroma_modes_ssse3; + + /* Halp pel generation function - encoder level*/ + ps_codec->pf_ih264e_sixtapfilter_horz = ih264e_sixtapfilter_horz_ssse3; + ps_codec->pf_ih264e_sixtap_filter_2dvh_vert = ih264e_sixtap_filter_2dvh_vert_ssse3; +} diff --git a/encoder/x86/svc/isvce_platform_macros.h b/encoder/x86/svc/isvce_platform_macros.h new file mode 100644 index 0000000..f6e1ceb --- /dev/null +++ b/encoder/x86/svc/isvce_platform_macros.h @@ -0,0 +1,119 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ +/** + ******************************************************************************* + * @file + * isvce_platform_macros.h + * + * @brief + * Contains platform specific routines used for codec context intialization + * + * @author + * ittiam + * + * @remarks + * none + * + ******************************************************************************* + */ + +#ifndef _ISVCE_PLATFORM_MACROS_H_ +#define _ISVCE_PLATFORM_MACROS_H_ + +/** +******************************************************************************* +* +* @brief Initialize the intra/inter/transform/deblk function pointers of +* codec context +* +* @par Description: the current routine initializes the function pointers of +* codec context basing on the architecture in use +* +* @param[in] ps_codec +* Codec context pointer +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +void isvce_init_function_ptr_generic(isvce_codec_t *ps_codec); +/** +******************************************************************************* +* +* @brief Initialize the intra/inter/transform/deblk function pointers of +* codec context +* +* @par Description: the current routine initializes the function pointers of +* codec context basing on the architecture in use +* +* @param[in] ps_codec +* Codec context pointer +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +void isvce_init_function_ptr_ssse3(isvce_codec_t *ps_codec); +void isvce_init_function_ptr_sse42(isvce_codec_t *ps_codec); + +/** +******************************************************************************* +* +* @brief Initialize the intra/inter/transform/deblk function pointers of +* codec context +* +* @par Description: the current routine initializes the function pointers of +* codec context basing on the architecture in use +* +* @param[in] ps_codec +* Codec context pointer +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +void isvce_init_function_ptr(isvce_codec_t *ps_codec); + +/** +******************************************************************************* +* +* @brief Determine the architecture of the encoder executing environment +* +* @par Description: This routine returns the architecture of the enviro- +* ment in which the current encoder is being tested +* +* @param[in] void +* +* @returns IV_ARCH_T +* architecture +* +* @remarks none +* +******************************************************************************* +*/ +IV_ARCH_T isvce_default_arch(void); + +#endif diff --git a/encoder/x86/svc/isvce_rc_utils_sse42.c b/encoder/x86/svc/isvce_rc_utils_sse42.c new file mode 100644 index 0000000..6444d72 --- /dev/null +++ b/encoder/x86/svc/isvce_rc_utils_sse42.c @@ -0,0 +1,450 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** +****************************************************************************** +* @file isvce_rc_utils_sse42.c +* +* @brief +* This file contains the x86 SIMD version of the function which computes +* gradient per pixel value being used in Init Qp +* +* @author +* Ittiam +* +* @par List of Functions: +* - isvce_get_gpp_sse42() +* +* @remarks +* None +* +******************************************************************************* +*/ + +#include + +#include "ih264_typedefs.h" +#include "ih264_debug.h" +#include "isvc_structs.h" +#include "isvce_rc_utils_private_defs.h" + +/** +******************************************************************************* +* +* @brief +* get gpp function +* +* @par Description: +* computes gradient per pixel value for a given frame +* +* @param[in] ps_input_buf +* pointer to yuv buffer properties +* +* @returns +* calculated gpp value +* +* @remarks +* none +* +******************************************************************************* +*/ + +DOUBLE isvce_get_gpp_sse42(yuv_buf_props_t *ps_input_buf) +{ + UWORD8 *pu1_input_buf; + UWORD16 mask_ffff, mask_00ff; + UWORD32 i, j, k; + UWORD32 u4_width, u4_height, i4_input_stride; + DOUBLE d_gpp_y, d_gpp_u, d_gpp_v, d_gpp; + + __m128i u1_src_r0, u1_src_r1, u1_src_r2, u1_src_r3, u1_src_r4; + __m128i u1_src_right_r0, u1_src_right_r1, u1_src_right_r2, u1_src_right_r3; + __m128i u2_sad_cur_bot_r01, u2_sad_cur_bot_r12, u2_sad_cur_bot_r23, u2_sad_cur_bot_r34; + __m128i u2_sad_cur_right_r0, u2_sad_cur_right_r1, u2_sad_cur_right_r2, u2_sad_cur_right_r3; + __m128i u2_sad_hadd, u1_shuffle_chroma, u2_mask_and_pixY, u2_mask_and_pixUV; + + d_gpp_y = 0; + d_gpp_u = 0; + d_gpp_v = 0; + d_gpp = 0; + mask_ffff = 0xffff; + mask_00ff = 0x00ff; + pu1_input_buf = (UWORD8 *) ps_input_buf->as_component_bufs[0].pv_data; + i4_input_stride = ps_input_buf->as_component_bufs[0].i4_data_stride; + u4_width = ps_input_buf->u4_width; + u4_height = ps_input_buf->u4_height; + + u1_shuffle_chroma = _mm_setr_epi8(0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x01, 0x03, + 0x05, 0x07, 0x09, 0x0b, 0x0d, 0x0f); + u2_mask_and_pixY = _mm_setr_epi16(mask_ffff, mask_ffff, mask_ffff, mask_ffff, mask_ffff, + mask_ffff, mask_ffff, mask_00ff); + u2_mask_and_pixUV = _mm_setr_epi16(mask_ffff, mask_ffff, mask_ffff, mask_00ff, mask_ffff, + mask_ffff, mask_ffff, mask_00ff); + + ASSERT((u4_width % 16) == 0); + + /***********************************************************/ + /* For Luma - */ + /* This code block calculates gpp value for luma by adding */ + /* the absolute difference between the current pixel and */ + /* it's immediate right pixel with the absolute difference */ + /* between the current pixel and it's immediate bottom */ + /* pixel and accumulating for every pixel in the frame. */ + /***********************************************************/ + for(i = 0; i < u4_height - 4; i += 4) + { + for(j = 0; j < u4_width - 16; j += 16) + { + u1_src_r0 = _mm_loadu_si128((__m128i *) (pu1_input_buf + j)); + u1_src_r1 = _mm_loadu_si128((__m128i *) (pu1_input_buf + i4_input_stride + j)); + u1_src_r2 = _mm_loadu_si128((__m128i *) (pu1_input_buf + (i4_input_stride * 2) + j)); + u1_src_r3 = _mm_loadu_si128((__m128i *) (pu1_input_buf + (i4_input_stride * 3) + j)); + u1_src_r4 = _mm_loadu_si128((__m128i *) (pu1_input_buf + (i4_input_stride * 4) + j)); + u1_src_right_r0 = _mm_loadu_si128((__m128i *) (pu1_input_buf + j + 1)); + u1_src_right_r1 = + _mm_loadu_si128((__m128i *) (pu1_input_buf + i4_input_stride + j + 1)); + u1_src_right_r2 = + _mm_loadu_si128((__m128i *) (pu1_input_buf + (i4_input_stride * 2) + j + 1)); + u1_src_right_r3 = + _mm_loadu_si128((__m128i *) (pu1_input_buf + (i4_input_stride * 3) + j + 1)); + + u2_sad_cur_bot_r01 = _mm_sad_epu8(u1_src_r0, u1_src_r1); + u2_sad_cur_bot_r12 = _mm_sad_epu8(u1_src_r1, u1_src_r2); + u2_sad_cur_bot_r23 = _mm_sad_epu8(u1_src_r2, u1_src_r3); + u2_sad_cur_bot_r34 = _mm_sad_epu8(u1_src_r3, u1_src_r4); + u2_sad_cur_right_r0 = _mm_sad_epu8(u1_src_r0, u1_src_right_r0); + u2_sad_cur_right_r1 = _mm_sad_epu8(u1_src_r1, u1_src_right_r1); + u2_sad_cur_right_r2 = _mm_sad_epu8(u1_src_r2, u1_src_right_r2); + u2_sad_cur_right_r3 = _mm_sad_epu8(u1_src_r3, u1_src_right_r3); + + u2_sad_cur_bot_r01 = _mm_adds_epu16(u2_sad_cur_bot_r01, u2_sad_cur_bot_r12); + u2_sad_cur_bot_r23 = _mm_adds_epu16(u2_sad_cur_bot_r23, u2_sad_cur_bot_r34); + u2_sad_cur_right_r0 = _mm_adds_epu16(u2_sad_cur_right_r0, u2_sad_cur_right_r1); + u2_sad_cur_right_r2 = _mm_adds_epu16(u2_sad_cur_right_r2, u2_sad_cur_right_r3); + + u2_sad_cur_bot_r01 = _mm_adds_epu16(u2_sad_cur_bot_r01, u2_sad_cur_bot_r23); + u2_sad_cur_right_r0 = _mm_adds_epu16(u2_sad_cur_right_r0, u2_sad_cur_right_r2); + + u2_sad_cur_bot_r01 = _mm_adds_epu16(u2_sad_cur_bot_r01, u2_sad_cur_right_r0); + + u2_sad_hadd = _mm_hadd_epi16(u2_sad_cur_bot_r01, u2_sad_cur_bot_r01); + u2_sad_hadd = _mm_hadd_epi16(u2_sad_hadd, u2_sad_hadd); + u2_sad_hadd = _mm_hadd_epi16(u2_sad_hadd, u2_sad_hadd); + + d_gpp_y += _mm_extract_epi16(u2_sad_hadd, 0); + } + + /************************************************************/ + /* Remaining width - */ + /* Since Last pixel is not getting processed, remaining 15 */ + /* pixels are getting processed separately by performing */ + /* and operations with u2_mask_and_pixY mask */ + /************************************************************/ + u1_src_r0 = _mm_loadu_si128((__m128i *) (pu1_input_buf + j)); + u1_src_r1 = _mm_loadu_si128((__m128i *) (pu1_input_buf + i4_input_stride + j)); + u1_src_r2 = _mm_loadu_si128((__m128i *) (pu1_input_buf + (i4_input_stride * 2) + j)); + u1_src_r3 = _mm_loadu_si128((__m128i *) (pu1_input_buf + (i4_input_stride * 3) + j)); + u1_src_r4 = _mm_loadu_si128((__m128i *) (pu1_input_buf + (i4_input_stride * 4) + j)); + u1_src_right_r0 = _mm_srli_si128(u1_src_r0, 1); + u1_src_right_r1 = _mm_srli_si128(u1_src_r1, 1); + u1_src_right_r2 = _mm_srli_si128(u1_src_r2, 1); + u1_src_right_r3 = _mm_srli_si128(u1_src_r3, 1); + + u1_src_r0 = _mm_and_si128(u1_src_r0, u2_mask_and_pixY); + u1_src_r1 = _mm_and_si128(u1_src_r1, u2_mask_and_pixY); + u1_src_r2 = _mm_and_si128(u1_src_r2, u2_mask_and_pixY); + u1_src_r3 = _mm_and_si128(u1_src_r3, u2_mask_and_pixY); + u1_src_r4 = _mm_and_si128(u1_src_r4, u2_mask_and_pixY); + + u2_sad_cur_bot_r01 = _mm_sad_epu8(u1_src_r0, u1_src_r1); + u2_sad_cur_bot_r12 = _mm_sad_epu8(u1_src_r1, u1_src_r2); + u2_sad_cur_bot_r23 = _mm_sad_epu8(u1_src_r2, u1_src_r3); + u2_sad_cur_bot_r34 = _mm_sad_epu8(u1_src_r3, u1_src_r4); + u2_sad_cur_right_r0 = _mm_sad_epu8(u1_src_r0, u1_src_right_r0); + u2_sad_cur_right_r1 = _mm_sad_epu8(u1_src_r1, u1_src_right_r1); + u2_sad_cur_right_r2 = _mm_sad_epu8(u1_src_r2, u1_src_right_r2); + u2_sad_cur_right_r3 = _mm_sad_epu8(u1_src_r3, u1_src_right_r3); + + u2_sad_cur_bot_r01 = _mm_adds_epu16(u2_sad_cur_bot_r01, u2_sad_cur_bot_r12); + u2_sad_cur_bot_r23 = _mm_adds_epu16(u2_sad_cur_bot_r23, u2_sad_cur_bot_r34); + u2_sad_cur_right_r0 = _mm_adds_epu16(u2_sad_cur_right_r0, u2_sad_cur_right_r1); + u2_sad_cur_right_r2 = _mm_adds_epu16(u2_sad_cur_right_r2, u2_sad_cur_right_r3); + + u2_sad_cur_bot_r01 = _mm_adds_epu16(u2_sad_cur_bot_r01, u2_sad_cur_bot_r23); + u2_sad_cur_right_r0 = _mm_adds_epu16(u2_sad_cur_right_r0, u2_sad_cur_right_r2); + + u2_sad_cur_bot_r01 = _mm_adds_epu16(u2_sad_cur_bot_r01, u2_sad_cur_right_r0); + + u2_sad_hadd = _mm_hadd_epi16(u2_sad_cur_bot_r01, u2_sad_cur_bot_r01); + u2_sad_hadd = _mm_hadd_epi16(u2_sad_hadd, u2_sad_hadd); + u2_sad_hadd = _mm_hadd_epi16(u2_sad_hadd, u2_sad_hadd); + + d_gpp_y += _mm_extract_epi16(u2_sad_hadd, 0); + + pu1_input_buf += (i4_input_stride << 2); + } + + /* Loop for the remaining height */ + for(k = i; k < u4_height - 1; k++) + { + for(j = 0; j < u4_width - 16; j += 16) + { + u1_src_r0 = _mm_loadu_si128((__m128i *) (pu1_input_buf + j)); + u1_src_r1 = _mm_loadu_si128((__m128i *) (pu1_input_buf + i4_input_stride + j)); + u1_src_right_r0 = _mm_loadu_si128((__m128i *) (pu1_input_buf + j + 1)); + + u2_sad_cur_bot_r01 = _mm_sad_epu8(u1_src_r0, u1_src_r1); + u2_sad_cur_right_r0 = _mm_sad_epu8(u1_src_r0, u1_src_right_r0); + + u2_sad_cur_bot_r01 = _mm_adds_epu16(u2_sad_cur_bot_r01, u2_sad_cur_right_r0); + + u2_sad_hadd = _mm_hadd_epi16(u2_sad_cur_bot_r01, u2_sad_cur_bot_r01); + u2_sad_hadd = _mm_hadd_epi16(u2_sad_hadd, u2_sad_hadd); + u2_sad_hadd = _mm_hadd_epi16(u2_sad_hadd, u2_sad_hadd); + + d_gpp_y += _mm_extract_epi16(u2_sad_hadd, 0); + } + + /************************************************************/ + /* Remaining width - */ + /* Since Last pixel is not getting processed, remaining 15 */ + /* pixels are getting processed separately by performing */ + /* and operations with u2_mask_and_pixY mask */ + /************************************************************/ + u1_src_r0 = _mm_loadu_si128((__m128i *) (pu1_input_buf + j)); + u1_src_r1 = _mm_loadu_si128((__m128i *) (pu1_input_buf + i4_input_stride + j)); + u1_src_right_r0 = _mm_srli_si128(u1_src_r0, 1); + + u1_src_r0 = _mm_and_si128(u1_src_r0, u2_mask_and_pixY); + u1_src_r1 = _mm_and_si128(u1_src_r1, u2_mask_and_pixY); + + u2_sad_cur_bot_r01 = _mm_sad_epu8(u1_src_r0, u1_src_r1); + u2_sad_cur_right_r0 = _mm_sad_epu8(u1_src_r0, u1_src_right_r0); + + u2_sad_cur_bot_r01 = _mm_adds_epu16(u2_sad_cur_bot_r01, u2_sad_cur_right_r0); + + u2_sad_hadd = _mm_hadd_epi16(u2_sad_cur_bot_r01, u2_sad_cur_bot_r01); + u2_sad_hadd = _mm_hadd_epi16(u2_sad_hadd, u2_sad_hadd); + u2_sad_hadd = _mm_hadd_epi16(u2_sad_hadd, u2_sad_hadd); + + d_gpp_y += _mm_extract_epi16(u2_sad_hadd, 0); + + pu1_input_buf += (i4_input_stride); + } + + pu1_input_buf = (UWORD8 *) ps_input_buf->as_component_bufs[1].pv_data; + i4_input_stride = ps_input_buf->as_component_bufs[1].i4_data_stride; + + /**************************************************************/ + /* For Chroma - */ + /* This code block first deinterleaves the Cb and Cr values */ + /* from the loaded registers, calculates gpp value for both */ + /* Cb and Cr separately by adding the absolute difference */ + /* between the current pixel and it's immediate right pixel */ + /* with the absolute difference between the current pixel and */ + /* it's immediate bottom pixel and accumulating for every */ + /* pixel in the frame. */ + /**************************************************************/ + for(i = 0; i < (u4_height / 2) - 4; i += 4) + { + for(j = 0; j < u4_width - 16; j += 16) + { + u1_src_r0 = _mm_loadu_si128((__m128i *) (pu1_input_buf + j)); + u1_src_r1 = _mm_loadu_si128((__m128i *) (pu1_input_buf + i4_input_stride + j)); + u1_src_r2 = _mm_loadu_si128((__m128i *) (pu1_input_buf + (i4_input_stride * 2) + j)); + u1_src_r3 = _mm_loadu_si128((__m128i *) (pu1_input_buf + (i4_input_stride * 3) + j)); + u1_src_r4 = _mm_loadu_si128((__m128i *) (pu1_input_buf + (i4_input_stride * 4) + j)); + u1_src_right_r0 = _mm_loadu_si128((__m128i *) (pu1_input_buf + j + 2)); + u1_src_right_r1 = + _mm_loadu_si128((__m128i *) (pu1_input_buf + i4_input_stride + j + 2)); + u1_src_right_r2 = + _mm_loadu_si128((__m128i *) (pu1_input_buf + (i4_input_stride * 2) + j + 2)); + u1_src_right_r3 = + _mm_loadu_si128((__m128i *) (pu1_input_buf + (i4_input_stride * 3) + j + 2)); + + /* separating u and v */ + u1_src_r0 = _mm_shuffle_epi8(u1_src_r0, u1_shuffle_chroma); + u1_src_r1 = _mm_shuffle_epi8(u1_src_r1, u1_shuffle_chroma); + u1_src_r2 = _mm_shuffle_epi8(u1_src_r2, u1_shuffle_chroma); + u1_src_r3 = _mm_shuffle_epi8(u1_src_r3, u1_shuffle_chroma); + u1_src_r4 = _mm_shuffle_epi8(u1_src_r4, u1_shuffle_chroma); + u1_src_right_r0 = _mm_shuffle_epi8(u1_src_right_r0, u1_shuffle_chroma); + u1_src_right_r1 = _mm_shuffle_epi8(u1_src_right_r1, u1_shuffle_chroma); + u1_src_right_r2 = _mm_shuffle_epi8(u1_src_right_r2, u1_shuffle_chroma); + u1_src_right_r3 = _mm_shuffle_epi8(u1_src_right_r3, u1_shuffle_chroma); + + u2_sad_cur_bot_r01 = _mm_sad_epu8(u1_src_r0, u1_src_r1); + u2_sad_cur_bot_r12 = _mm_sad_epu8(u1_src_r1, u1_src_r2); + u2_sad_cur_bot_r23 = _mm_sad_epu8(u1_src_r2, u1_src_r3); + u2_sad_cur_bot_r34 = _mm_sad_epu8(u1_src_r3, u1_src_r4); + u2_sad_cur_right_r0 = _mm_sad_epu8(u1_src_r0, u1_src_right_r0); + u2_sad_cur_right_r1 = _mm_sad_epu8(u1_src_r1, u1_src_right_r1); + u2_sad_cur_right_r2 = _mm_sad_epu8(u1_src_r2, u1_src_right_r2); + u2_sad_cur_right_r3 = _mm_sad_epu8(u1_src_r3, u1_src_right_r3); + + u2_sad_cur_bot_r01 = _mm_adds_epu16(u2_sad_cur_bot_r01, u2_sad_cur_bot_r12); + u2_sad_cur_bot_r23 = _mm_adds_epu16(u2_sad_cur_bot_r23, u2_sad_cur_bot_r34); + u2_sad_cur_right_r0 = _mm_adds_epu16(u2_sad_cur_right_r0, u2_sad_cur_right_r1); + u2_sad_cur_right_r2 = _mm_adds_epu16(u2_sad_cur_right_r2, u2_sad_cur_right_r3); + + u2_sad_cur_bot_r01 = _mm_adds_epu16(u2_sad_cur_bot_r01, u2_sad_cur_bot_r23); + u2_sad_cur_right_r0 = _mm_adds_epu16(u2_sad_cur_right_r0, u2_sad_cur_right_r2); + + u2_sad_cur_bot_r01 = _mm_adds_epu16(u2_sad_cur_bot_r01, u2_sad_cur_right_r0); + + u2_sad_hadd = _mm_hadd_epi16(u2_sad_cur_bot_r01, u2_sad_cur_bot_r01); + u2_sad_hadd = _mm_hadd_epi16(u2_sad_hadd, u2_sad_hadd); + + d_gpp_u += _mm_extract_epi16(u2_sad_hadd, 0); + d_gpp_v += _mm_extract_epi16(u2_sad_hadd, 1); + } + + /************************************************************/ + /* Remaining width - */ + /* Since Last pixel is not getting processed, remaining 15 */ + /* pixels are getting processed separately by performing */ + /* and operations with u2_mask_and_pixUV mask */ + /************************************************************/ + u1_src_r0 = _mm_loadu_si128((__m128i *) (pu1_input_buf + j)); + u1_src_r1 = _mm_loadu_si128((__m128i *) (pu1_input_buf + i4_input_stride + j)); + u1_src_r2 = _mm_loadu_si128((__m128i *) (pu1_input_buf + (i4_input_stride * 2) + j)); + u1_src_r3 = _mm_loadu_si128((__m128i *) (pu1_input_buf + (i4_input_stride * 3) + j)); + u1_src_r4 = _mm_loadu_si128((__m128i *) (pu1_input_buf + (i4_input_stride * 4) + j)); + u1_src_right_r0 = _mm_srli_si128(u1_src_r0, 2); + u1_src_right_r1 = _mm_srli_si128(u1_src_r1, 2); + u1_src_right_r2 = _mm_srli_si128(u1_src_r2, 2); + u1_src_right_r3 = _mm_srli_si128(u1_src_r3, 2); + + /* separating u and v */ + u1_src_r0 = _mm_shuffle_epi8(u1_src_r0, u1_shuffle_chroma); + u1_src_r1 = _mm_shuffle_epi8(u1_src_r1, u1_shuffle_chroma); + u1_src_r2 = _mm_shuffle_epi8(u1_src_r2, u1_shuffle_chroma); + u1_src_r3 = _mm_shuffle_epi8(u1_src_r3, u1_shuffle_chroma); + u1_src_r4 = _mm_shuffle_epi8(u1_src_r4, u1_shuffle_chroma); + u1_src_right_r0 = _mm_shuffle_epi8(u1_src_right_r0, u1_shuffle_chroma); + u1_src_right_r1 = _mm_shuffle_epi8(u1_src_right_r1, u1_shuffle_chroma); + u1_src_right_r2 = _mm_shuffle_epi8(u1_src_right_r2, u1_shuffle_chroma); + u1_src_right_r3 = _mm_shuffle_epi8(u1_src_right_r3, u1_shuffle_chroma); + + u1_src_r0 = _mm_and_si128(u1_src_r0, u2_mask_and_pixUV); + u1_src_r1 = _mm_and_si128(u1_src_r1, u2_mask_and_pixUV); + u1_src_r2 = _mm_and_si128(u1_src_r2, u2_mask_and_pixUV); + u1_src_r3 = _mm_and_si128(u1_src_r3, u2_mask_and_pixUV); + u1_src_r4 = _mm_and_si128(u1_src_r4, u2_mask_and_pixUV); + u1_src_right_r0 = _mm_and_si128(u1_src_right_r0, u2_mask_and_pixUV); + u1_src_right_r1 = _mm_and_si128(u1_src_right_r1, u2_mask_and_pixUV); + u1_src_right_r2 = _mm_and_si128(u1_src_right_r2, u2_mask_and_pixUV); + u1_src_right_r3 = _mm_and_si128(u1_src_right_r3, u2_mask_and_pixUV); + + u2_sad_cur_bot_r01 = _mm_sad_epu8(u1_src_r0, u1_src_r1); + u2_sad_cur_bot_r12 = _mm_sad_epu8(u1_src_r1, u1_src_r2); + u2_sad_cur_bot_r23 = _mm_sad_epu8(u1_src_r2, u1_src_r3); + u2_sad_cur_bot_r34 = _mm_sad_epu8(u1_src_r3, u1_src_r4); + u2_sad_cur_right_r0 = _mm_sad_epu8(u1_src_r0, u1_src_right_r0); + u2_sad_cur_right_r1 = _mm_sad_epu8(u1_src_r1, u1_src_right_r1); + u2_sad_cur_right_r2 = _mm_sad_epu8(u1_src_r2, u1_src_right_r2); + u2_sad_cur_right_r3 = _mm_sad_epu8(u1_src_r3, u1_src_right_r3); + + u2_sad_cur_bot_r01 = _mm_adds_epu16(u2_sad_cur_bot_r01, u2_sad_cur_bot_r12); + u2_sad_cur_bot_r23 = _mm_adds_epu16(u2_sad_cur_bot_r23, u2_sad_cur_bot_r34); + u2_sad_cur_right_r0 = _mm_adds_epu16(u2_sad_cur_right_r0, u2_sad_cur_right_r1); + u2_sad_cur_right_r2 = _mm_adds_epu16(u2_sad_cur_right_r2, u2_sad_cur_right_r3); + + u2_sad_cur_bot_r01 = _mm_adds_epu16(u2_sad_cur_bot_r01, u2_sad_cur_bot_r23); + u2_sad_cur_right_r0 = _mm_adds_epu16(u2_sad_cur_right_r0, u2_sad_cur_right_r2); + + u2_sad_cur_bot_r01 = _mm_adds_epu16(u2_sad_cur_bot_r01, u2_sad_cur_right_r0); + + u2_sad_hadd = _mm_hadd_epi16(u2_sad_cur_bot_r01, u2_sad_cur_bot_r01); + u2_sad_hadd = _mm_hadd_epi16(u2_sad_hadd, u2_sad_hadd); + + d_gpp_u += _mm_extract_epi16(u2_sad_hadd, 0); + d_gpp_v += _mm_extract_epi16(u2_sad_hadd, 1); + + pu1_input_buf += (i4_input_stride * 4); + } + + /* Loop for the remaining height */ + for(k = i; k < (u4_height / 2) - 1; k++) + { + for(j = 0; j < u4_width - 16; j += 16) + { + u1_src_r0 = _mm_loadu_si128((__m128i *) (pu1_input_buf + j)); + u1_src_r1 = _mm_loadu_si128((__m128i *) (pu1_input_buf + i4_input_stride + j)); + u1_src_right_r0 = _mm_loadu_si128((__m128i *) (pu1_input_buf + j + 2)); + + /* separating u and v */ + u1_src_r0 = _mm_shuffle_epi8(u1_src_r0, u1_shuffle_chroma); + u1_src_r1 = _mm_shuffle_epi8(u1_src_r1, u1_shuffle_chroma); + u1_src_right_r0 = _mm_shuffle_epi8(u1_src_right_r0, u1_shuffle_chroma); + + u2_sad_cur_bot_r01 = _mm_sad_epu8(u1_src_r0, u1_src_r1); + u2_sad_cur_right_r0 = _mm_sad_epu8(u1_src_r0, u1_src_right_r0); + + u2_sad_cur_bot_r01 = _mm_adds_epu16(u2_sad_cur_bot_r01, u2_sad_cur_right_r0); + + u2_sad_hadd = _mm_hadd_epi16(u2_sad_cur_bot_r01, u2_sad_cur_bot_r01); + u2_sad_hadd = _mm_hadd_epi16(u2_sad_hadd, u2_sad_hadd); + + d_gpp_u += _mm_extract_epi16(u2_sad_hadd, 0); + d_gpp_v += _mm_extract_epi16(u2_sad_hadd, 1); + } + + /************************************************************/ + /* Remaining width - */ + /* Since Last pixel is not getting processed, remaining 15 */ + /* pixels are getting processed separately by performing */ + /* and operations with u2_mask_and_pixUV mask */ + /************************************************************/ + u1_src_r0 = _mm_loadu_si128((__m128i *) (pu1_input_buf + j)); + u1_src_r1 = _mm_loadu_si128((__m128i *) (pu1_input_buf + i4_input_stride + j)); + u1_src_right_r0 = _mm_srli_si128(u1_src_r0, 2); + + /* separating u and v */ + u1_src_r0 = _mm_shuffle_epi8(u1_src_r0, u1_shuffle_chroma); + u1_src_r1 = _mm_shuffle_epi8(u1_src_r1, u1_shuffle_chroma); + u1_src_right_r0 = _mm_shuffle_epi8(u1_src_right_r0, u1_shuffle_chroma); + + u1_src_r0 = _mm_and_si128(u1_src_r0, u2_mask_and_pixUV); + u1_src_r1 = _mm_and_si128(u1_src_r1, u2_mask_and_pixUV); + u1_src_right_r0 = _mm_and_si128(u1_src_right_r0, u2_mask_and_pixUV); + + u2_sad_cur_bot_r01 = _mm_sad_epu8(u1_src_r0, u1_src_r1); + u2_sad_cur_right_r0 = _mm_sad_epu8(u1_src_r0, u1_src_right_r0); + + u2_sad_cur_bot_r01 = _mm_adds_epu16(u2_sad_cur_bot_r01, u2_sad_cur_right_r0); + + u2_sad_hadd = _mm_hadd_epi16(u2_sad_cur_bot_r01, u2_sad_cur_bot_r01); + u2_sad_hadd = _mm_hadd_epi16(u2_sad_hadd, u2_sad_hadd); + + d_gpp_u += _mm_extract_epi16(u2_sad_hadd, 0); + d_gpp_v += _mm_extract_epi16(u2_sad_hadd, 1); + + pu1_input_buf += i4_input_stride; + } + + d_gpp_y /= (u4_width * u4_height); + d_gpp_u /= ((u4_width / 2) * (u4_height / 2)); + d_gpp_v /= ((u4_width / 2) * (u4_height / 2)); + + d_gpp = (DOUBLE) ((WT_LUMA_GPP * d_gpp_y) + d_gpp_u + d_gpp_v) / WT_TOTAL_GPP; + + return d_gpp; +} diff --git a/encoder/x86/svc/isvce_residual_pred_sse42.c b/encoder/x86/svc/isvce_residual_pred_sse42.c new file mode 100644 index 0000000..6b7fce7 --- /dev/null +++ b/encoder/x86/svc/isvce_residual_pred_sse42.c @@ -0,0 +1,735 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/** +******************************************************************************* +* +* @file +* isvce_residual_pred_sse42.c +* +* @brief +* Contains functions +* used for SVC residual +* prediction +* +******************************************************************************* +*/ +#include + +#include "ih264_typedefs.h" +#include "ih264_macros.h" +#include "isvc_structs.h" + +void isvce_luma_residual_sampler_2x_sse42(coordinates_t *ps_ref_array_positions, + coordinates_t *ps_ref_array_phases, + buffer_container_t *ps_inp, buffer_container_t *ps_out, + buffer_container_t *ps_scratch, UWORD32 u4_ref_nnz, + UWORD8 u1_ref_tx_size) +{ + WORD16 *pi2_inp_data = (WORD16 *) ps_inp->pv_data; + WORD16 *pi2_out_res = (WORD16 *) ps_out->pv_data; + WORD32 i4_inp_data_stride = ps_inp->i4_data_stride; + WORD32 i4_out_res_stride = ps_out->i4_data_stride; + WORD16 *pi2_refarray_buffer = (WORD16 *) ps_scratch->pv_data; + WORD32 i4_blk_ctr; + + UNUSED(ps_ref_array_positions); + UNUSED(ps_ref_array_phases); + + /* For 2x scaling, offsets always point to TL pixel outside MB */ + /* Hence, refTransBlkIdc will be different and since phase */ + /* for first refArray pos for horiz filtering samples > 8, */ + /* first row and first column from the refArray is never used */ + pi2_inp_data += 1 + i4_inp_data_stride; + + if((u1_ref_tx_size) && (0 != u4_ref_nnz)) + { + WORD16 *pi2_ref_data_byte; + WORD32 i4_i, i4_j; + WORD16 *pi2_refarray_buffer_tmp = pi2_refarray_buffer; + + __m128i i2_coeff_8x16b_r1_0, i2_coeff_8x16b_r1_1; + __m128i res_8x16b_r1_0, res_8x16b_r1_1; + __m128i final_res_8x16b_r1_0, final_res_8x16b_r1_1; + __m128i coeff_add_8x16b_r1; + __m128i coeff_add_8x16b_r2; + __m128i i2_coeff_8x16b_r2_0, i2_coeff_8x16b_r2_1; + __m128i res_8x16b_r2_0, res_8x16b_r2_1; + __m128i final_res_8x16b_r2_0, final_res_8x16b_r2_1; + + pi2_ref_data_byte = pi2_inp_data; + + /* ----------- Horizontal Interpolation ---------------- */ + for(i4_i = 0; i4_i < BLK8x8SIZE; i4_i += 2) + { + /* a0 a1 a2 a3 a4 a5 a6 a7 */ + i2_coeff_8x16b_r1_0 = _mm_loadu_si128((__m128i *) pi2_ref_data_byte); + /* b0 b1 b2 b3 b4 b5 b6 b7 */ + i2_coeff_8x16b_r2_0 = + _mm_loadu_si128((__m128i *) (pi2_ref_data_byte + i4_inp_data_stride)); + + /* a1 a2 a3 a4 a5 a6 a7 0 */ + i2_coeff_8x16b_r1_1 = _mm_srli_si128(i2_coeff_8x16b_r1_0, 2); + /* b1 b2 b3 b4 b5 b6 b7 0 */ + i2_coeff_8x16b_r2_1 = _mm_srli_si128(i2_coeff_8x16b_r2_0, 2); + + coeff_add_8x16b_r1 = _mm_add_epi16(i2_coeff_8x16b_r1_0, i2_coeff_8x16b_r1_1); + coeff_add_8x16b_r2 = _mm_add_epi16(i2_coeff_8x16b_r2_0, i2_coeff_8x16b_r2_1); + + i2_coeff_8x16b_r1_0 = _mm_slli_epi16(i2_coeff_8x16b_r1_0, 1); + i2_coeff_8x16b_r2_0 = _mm_slli_epi16(i2_coeff_8x16b_r2_0, 1); + + i2_coeff_8x16b_r1_1 = _mm_slli_epi16(i2_coeff_8x16b_r1_1, 1); + i2_coeff_8x16b_r2_1 = _mm_slli_epi16(i2_coeff_8x16b_r2_1, 1); + + res_8x16b_r1_0 = _mm_add_epi16(i2_coeff_8x16b_r1_0, coeff_add_8x16b_r1); + res_8x16b_r2_0 = _mm_add_epi16(i2_coeff_8x16b_r2_0, coeff_add_8x16b_r2); + + res_8x16b_r1_1 = _mm_add_epi16(i2_coeff_8x16b_r1_1, coeff_add_8x16b_r1); + res_8x16b_r2_1 = _mm_add_epi16(i2_coeff_8x16b_r2_1, coeff_add_8x16b_r2); + + final_res_8x16b_r1_0 = _mm_unpacklo_epi16(res_8x16b_r1_0, res_8x16b_r1_1); + final_res_8x16b_r2_0 = _mm_unpacklo_epi16(res_8x16b_r2_0, res_8x16b_r2_1); + + final_res_8x16b_r1_1 = _mm_unpackhi_epi16(res_8x16b_r1_0, res_8x16b_r1_1); + final_res_8x16b_r2_1 = _mm_unpackhi_epi16(res_8x16b_r2_0, res_8x16b_r2_1); + + _mm_storeu_si128((__m128i *) (pi2_refarray_buffer + 1), final_res_8x16b_r1_0); + _mm_storeu_si128((__m128i *) (pi2_refarray_buffer + 9), final_res_8x16b_r1_1); + + _mm_storeu_si128((__m128i *) (pi2_refarray_buffer + 17), final_res_8x16b_r2_0); + _mm_storeu_si128((__m128i *) (pi2_refarray_buffer + 25), final_res_8x16b_r2_1); + + pi2_refarray_buffer[0] = (pi2_ref_data_byte[0] << 2); + pi2_refarray_buffer[15] = (pi2_ref_data_byte[7] << 2); + pi2_ref_data_byte += i4_inp_data_stride; + pi2_refarray_buffer[16] = (pi2_ref_data_byte[0] << 2); + pi2_refarray_buffer[31] = (pi2_ref_data_byte[7] << 2); + + /* vertical loop updates */ + pi2_ref_data_byte = pi2_inp_data + ((i4_i + 2) * i4_inp_data_stride); + pi2_refarray_buffer += 32; + } + + /* ----------- Vertical Interpolation ---------------- */ + pi2_refarray_buffer = pi2_refarray_buffer_tmp; + + { + __m128i i4_horz_samp_4x32b_r1_1, i4_horz_samp_4x32b_r1_2, i4_horz_samp_4x32b_r1_3, + i4_horz_samp_4x32b_r1_4; + __m128i i4_horz_samp_4x32b_r2_1, i4_horz_samp_4x32b_r2_2, i4_horz_samp_4x32b_r2_3, + i4_horz_samp_4x32b_r2_4; + __m128i i4_res_samp_4x32b_r1_1, i4_res_samp_4x32b_r1_2, i4_res_samp_4x32b_r1_3, + i4_res_samp_4x32b_r1_4; + __m128i i4_res_samp_4x32b_r2_1, i4_res_samp_4x32b_r2_2, i4_res_samp_4x32b_r2_3, + i4_res_samp_4x32b_r2_4; + __m128i horz_add_4x32b_r2_1, horz_add_4x32b_r2_2, horz_add_4x32b_r2_3, + horz_add_4x32b_r2_4; + + __m128i i4_horz_samp_8x16b_r1_1, i4_horz_samp_8x16b_r2_1; + __m128i i4_horz_samp_8x16b_r1_2, i4_horz_samp_8x16b_r2_2; + __m128i i4_horz_samp_8x16b_r1_3, i4_horz_samp_8x16b_r2_3; + __m128i i4_horz_samp_8x16b_r1_4, i4_horz_samp_8x16b_r2_4; + + __m128i twos = _mm_set1_epi32(2); + __m128i eights = _mm_set1_epi32(8); + + WORD16 *pi2_out; + pi2_out = pi2_out_res; + + i4_horz_samp_8x16b_r1_1 = _mm_loadu_si128((__m128i *) (pi2_refarray_buffer)); + i4_horz_samp_8x16b_r1_2 = _mm_loadu_si128((__m128i *) (pi2_refarray_buffer + 4)); + i4_horz_samp_8x16b_r1_3 = _mm_loadu_si128((__m128i *) (pi2_refarray_buffer + 8)); + i4_horz_samp_8x16b_r1_4 = _mm_loadu_si128((__m128i *) (pi2_refarray_buffer + 12)); + + i4_horz_samp_4x32b_r1_1 = _mm_cvtepi16_epi32(i4_horz_samp_8x16b_r1_1); + i4_horz_samp_4x32b_r1_2 = _mm_cvtepi16_epi32(i4_horz_samp_8x16b_r1_2); + i4_horz_samp_4x32b_r1_3 = _mm_cvtepi16_epi32(i4_horz_samp_8x16b_r1_3); + i4_horz_samp_4x32b_r1_4 = _mm_cvtepi16_epi32(i4_horz_samp_8x16b_r1_4); + + /* populate the first inter sample */ + i4_res_samp_4x32b_r1_1 = + _mm_srai_epi32(_mm_add_epi32(i4_horz_samp_4x32b_r1_1, twos), 2); + i4_res_samp_4x32b_r1_2 = + _mm_srai_epi32(_mm_add_epi32(i4_horz_samp_4x32b_r1_2, twos), 2); + i4_res_samp_4x32b_r1_3 = + _mm_srai_epi32(_mm_add_epi32(i4_horz_samp_4x32b_r1_3, twos), 2); + i4_res_samp_4x32b_r1_4 = + _mm_srai_epi32(_mm_add_epi32(i4_horz_samp_4x32b_r1_4, twos), 2); + + _mm_storeu_si128((__m128i *) pi2_out, + _mm_packs_epi32(i4_res_samp_4x32b_r1_1, i4_res_samp_4x32b_r1_2)); + _mm_storeu_si128((__m128i *) (pi2_out + 8), + _mm_packs_epi32(i4_res_samp_4x32b_r1_3, i4_res_samp_4x32b_r1_4)); + pi2_out += i4_out_res_stride; + + for(i4_j = 0; i4_j < 14; i4_j += 2) + { + pi2_refarray_buffer += MB_SIZE; + + i4_horz_samp_8x16b_r2_1 = _mm_loadu_si128((__m128i *) (pi2_refarray_buffer)); + i4_horz_samp_8x16b_r2_2 = _mm_loadu_si128((__m128i *) (pi2_refarray_buffer + 4)); + i4_horz_samp_8x16b_r2_3 = _mm_loadu_si128((__m128i *) (pi2_refarray_buffer + 8)); + i4_horz_samp_8x16b_r2_4 = _mm_loadu_si128((__m128i *) (pi2_refarray_buffer + 12)); + + i4_horz_samp_4x32b_r2_1 = _mm_cvtepi16_epi32(i4_horz_samp_8x16b_r2_1); + i4_horz_samp_4x32b_r2_2 = _mm_cvtepi16_epi32(i4_horz_samp_8x16b_r2_2); + i4_horz_samp_4x32b_r2_3 = _mm_cvtepi16_epi32(i4_horz_samp_8x16b_r2_3); + i4_horz_samp_4x32b_r2_4 = _mm_cvtepi16_epi32(i4_horz_samp_8x16b_r2_4); + + horz_add_4x32b_r2_1 = + _mm_add_epi32(i4_horz_samp_4x32b_r1_1, i4_horz_samp_4x32b_r2_1); + horz_add_4x32b_r2_2 = + _mm_add_epi32(i4_horz_samp_4x32b_r1_2, i4_horz_samp_4x32b_r2_2); + horz_add_4x32b_r2_3 = + _mm_add_epi32(i4_horz_samp_4x32b_r1_3, i4_horz_samp_4x32b_r2_3); + horz_add_4x32b_r2_4 = + _mm_add_epi32(i4_horz_samp_4x32b_r1_4, i4_horz_samp_4x32b_r2_4); + + i4_res_samp_4x32b_r1_1 = + _mm_add_epi32(_mm_slli_epi32(i4_horz_samp_4x32b_r1_1, 1), horz_add_4x32b_r2_1); + i4_res_samp_4x32b_r1_2 = + _mm_add_epi32(_mm_slli_epi32(i4_horz_samp_4x32b_r1_2, 1), horz_add_4x32b_r2_2); + i4_res_samp_4x32b_r1_3 = + _mm_add_epi32(_mm_slli_epi32(i4_horz_samp_4x32b_r1_3, 1), horz_add_4x32b_r2_3); + i4_res_samp_4x32b_r1_4 = + _mm_add_epi32(_mm_slli_epi32(i4_horz_samp_4x32b_r1_4, 1), horz_add_4x32b_r2_4); + + i4_res_samp_4x32b_r2_1 = + _mm_add_epi32(_mm_slli_epi32(i4_horz_samp_4x32b_r2_1, 1), horz_add_4x32b_r2_1); + i4_res_samp_4x32b_r2_2 = + _mm_add_epi32(_mm_slli_epi32(i4_horz_samp_4x32b_r2_2, 1), horz_add_4x32b_r2_2); + i4_res_samp_4x32b_r2_3 = + _mm_add_epi32(_mm_slli_epi32(i4_horz_samp_4x32b_r2_3, 1), horz_add_4x32b_r2_3); + i4_res_samp_4x32b_r2_4 = + _mm_add_epi32(_mm_slli_epi32(i4_horz_samp_4x32b_r2_4, 1), horz_add_4x32b_r2_4); + + i4_res_samp_4x32b_r1_1 = + _mm_srai_epi32(_mm_add_epi32(i4_res_samp_4x32b_r1_1, eights), 4); + i4_res_samp_4x32b_r1_2 = + _mm_srai_epi32(_mm_add_epi32(i4_res_samp_4x32b_r1_2, eights), 4); + i4_res_samp_4x32b_r1_3 = + _mm_srai_epi32(_mm_add_epi32(i4_res_samp_4x32b_r1_3, eights), 4); + i4_res_samp_4x32b_r1_4 = + _mm_srai_epi32(_mm_add_epi32(i4_res_samp_4x32b_r1_4, eights), 4); + + i4_res_samp_4x32b_r2_1 = + _mm_srai_epi32(_mm_add_epi32(i4_res_samp_4x32b_r2_1, eights), 4); + i4_res_samp_4x32b_r2_2 = + _mm_srai_epi32(_mm_add_epi32(i4_res_samp_4x32b_r2_2, eights), 4); + i4_res_samp_4x32b_r2_3 = + _mm_srai_epi32(_mm_add_epi32(i4_res_samp_4x32b_r2_3, eights), 4); + i4_res_samp_4x32b_r2_4 = + _mm_srai_epi32(_mm_add_epi32(i4_res_samp_4x32b_r2_4, eights), 4); + + /* populate 2 samples based on current coeffs */ + _mm_storeu_si128((__m128i *) pi2_out, + _mm_packs_epi32(i4_res_samp_4x32b_r1_1, i4_res_samp_4x32b_r1_2)); + _mm_storeu_si128((__m128i *) (pi2_out + 8), + _mm_packs_epi32(i4_res_samp_4x32b_r1_3, i4_res_samp_4x32b_r1_4)); + pi2_out += i4_out_res_stride; + + _mm_storeu_si128((__m128i *) pi2_out, + _mm_packs_epi32(i4_res_samp_4x32b_r2_1, i4_res_samp_4x32b_r2_2)); + _mm_storeu_si128((__m128i *) (pi2_out + 8), + _mm_packs_epi32(i4_res_samp_4x32b_r2_3, i4_res_samp_4x32b_r2_4)); + pi2_out += i4_out_res_stride; + + /* store the coeff 2 to coeff 1 */ + /* (used in next iteration) */ + i4_horz_samp_4x32b_r1_1 = i4_horz_samp_4x32b_r2_1; + i4_horz_samp_4x32b_r1_2 = i4_horz_samp_4x32b_r2_2; + i4_horz_samp_4x32b_r1_3 = i4_horz_samp_4x32b_r2_3; + i4_horz_samp_4x32b_r1_4 = i4_horz_samp_4x32b_r2_4; + } + + i4_res_samp_4x32b_r1_1 = + _mm_srai_epi32(_mm_add_epi32(i4_horz_samp_4x32b_r1_1, twos), 2); + i4_res_samp_4x32b_r1_2 = + _mm_srai_epi32(_mm_add_epi32(i4_horz_samp_4x32b_r1_2, twos), 2); + i4_res_samp_4x32b_r1_3 = + _mm_srai_epi32(_mm_add_epi32(i4_horz_samp_4x32b_r1_3, twos), 2); + i4_res_samp_4x32b_r1_4 = + _mm_srai_epi32(_mm_add_epi32(i4_horz_samp_4x32b_r1_4, twos), 2); + + _mm_storeu_si128((__m128i *) pi2_out, + _mm_packs_epi32(i4_res_samp_4x32b_r1_1, i4_res_samp_4x32b_r1_2)); + _mm_storeu_si128((__m128i *) (pi2_out + 8), + _mm_packs_epi32(i4_res_samp_4x32b_r1_3, i4_res_samp_4x32b_r1_4)); + } + } + else + { + /* ----------------------------------------------------------------- */ + /* LOOP over number of blocks */ + /* ----------------------------------------------------------------- */ + for(i4_blk_ctr = 0; i4_blk_ctr < 4; i4_blk_ctr++) + { + /* if reference layer is not coded then no processing */ + if(0 != (u4_ref_nnz & 0x1)) + { + { + __m128i i2_coeff_8x16b_r1_0, i2_coeff_8x16b_r1_1; + __m128i i2_coeff_8x16b_r2_0, i2_coeff_8x16b_r2_1; + __m128i i2_coeff_8x16b_r3_0, i2_coeff_8x16b_r3_1; + __m128i i2_coeff_8x16b_r4_0, i2_coeff_8x16b_r4_1; + + __m128i res_8x16b_r1_0, res_8x16b_r1_1; + __m128i res_8x16b_r2_0, res_8x16b_r2_1; + __m128i res_8x16b_r3_0, res_8x16b_r3_1; + __m128i res_8x16b_r4_0, res_8x16b_r4_1; + __m128i final_res_8x16b_r1_0; + __m128i final_res_8x16b_r2_0; + __m128i final_res_8x16b_r3_0; + __m128i final_res_8x16b_r4_0; + + __m128i coeff_add_8x16b_r1; + __m128i coeff_add_8x16b_r2; + __m128i coeff_add_8x16b_r3; + __m128i coeff_add_8x16b_r4; + + /* ----------- Horizontal Interpolation ---------------- */ + { + /* a0 a1 a2 a3 a4 a5 a6 a7 */ + i2_coeff_8x16b_r1_0 = _mm_loadu_si128((__m128i *) pi2_inp_data); + /* b0 b1 b2 b3 b4 b5 b6 b7 */ + i2_coeff_8x16b_r2_0 = + _mm_loadu_si128((__m128i *) (pi2_inp_data + i4_inp_data_stride)); + i2_coeff_8x16b_r3_0 = + _mm_loadu_si128((__m128i *) (pi2_inp_data + (i4_inp_data_stride << 1))); + i2_coeff_8x16b_r4_0 = + _mm_loadu_si128((__m128i *) (pi2_inp_data + (i4_inp_data_stride * 3))); + + /* a1 a2 a3 a4 a5 a6 a7 0 */ + i2_coeff_8x16b_r1_1 = _mm_srli_si128(i2_coeff_8x16b_r1_0, 2); + /* b1 b2 b3 b4 b5 b6 b7 0 */ + i2_coeff_8x16b_r2_1 = _mm_srli_si128(i2_coeff_8x16b_r2_0, 2); + i2_coeff_8x16b_r3_1 = _mm_srli_si128(i2_coeff_8x16b_r3_0, 2); + i2_coeff_8x16b_r4_1 = _mm_srli_si128(i2_coeff_8x16b_r4_0, 2); + + coeff_add_8x16b_r1 = + _mm_add_epi16(i2_coeff_8x16b_r1_0, i2_coeff_8x16b_r1_1); + coeff_add_8x16b_r2 = + _mm_add_epi16(i2_coeff_8x16b_r2_0, i2_coeff_8x16b_r2_1); + coeff_add_8x16b_r3 = + _mm_add_epi16(i2_coeff_8x16b_r3_0, i2_coeff_8x16b_r3_1); + coeff_add_8x16b_r4 = + _mm_add_epi16(i2_coeff_8x16b_r4_0, i2_coeff_8x16b_r4_1); + + i2_coeff_8x16b_r1_0 = _mm_slli_epi16(i2_coeff_8x16b_r1_0, 1); + i2_coeff_8x16b_r2_0 = _mm_slli_epi16(i2_coeff_8x16b_r2_0, 1); + i2_coeff_8x16b_r3_0 = _mm_slli_epi16(i2_coeff_8x16b_r3_0, 1); + i2_coeff_8x16b_r4_0 = _mm_slli_epi16(i2_coeff_8x16b_r4_0, 1); + + i2_coeff_8x16b_r1_1 = _mm_slli_epi16(i2_coeff_8x16b_r1_1, 1); + i2_coeff_8x16b_r2_1 = _mm_slli_epi16(i2_coeff_8x16b_r2_1, 1); + i2_coeff_8x16b_r3_1 = _mm_slli_epi16(i2_coeff_8x16b_r3_1, 1); + i2_coeff_8x16b_r4_1 = _mm_slli_epi16(i2_coeff_8x16b_r4_1, 1); + + res_8x16b_r1_0 = _mm_add_epi16(i2_coeff_8x16b_r1_0, coeff_add_8x16b_r1); + res_8x16b_r2_0 = _mm_add_epi16(i2_coeff_8x16b_r2_0, coeff_add_8x16b_r2); + res_8x16b_r3_0 = _mm_add_epi16(i2_coeff_8x16b_r3_0, coeff_add_8x16b_r3); + res_8x16b_r4_0 = _mm_add_epi16(i2_coeff_8x16b_r4_0, coeff_add_8x16b_r4); + + res_8x16b_r1_1 = _mm_add_epi16(i2_coeff_8x16b_r1_1, coeff_add_8x16b_r1); + res_8x16b_r2_1 = _mm_add_epi16(i2_coeff_8x16b_r2_1, coeff_add_8x16b_r2); + res_8x16b_r3_1 = _mm_add_epi16(i2_coeff_8x16b_r3_1, coeff_add_8x16b_r3); + res_8x16b_r4_1 = _mm_add_epi16(i2_coeff_8x16b_r4_1, coeff_add_8x16b_r4); + + final_res_8x16b_r1_0 = _mm_unpacklo_epi16(res_8x16b_r1_0, res_8x16b_r1_1); + final_res_8x16b_r2_0 = _mm_unpacklo_epi16(res_8x16b_r2_0, res_8x16b_r2_1); + final_res_8x16b_r3_0 = _mm_unpacklo_epi16(res_8x16b_r3_0, res_8x16b_r3_1); + final_res_8x16b_r4_0 = _mm_unpacklo_epi16(res_8x16b_r4_0, res_8x16b_r4_1); + + _mm_storeu_si128((__m128i *) (pi2_refarray_buffer + 1), + final_res_8x16b_r1_0); + _mm_storeu_si128((__m128i *) (pi2_refarray_buffer + 9), + final_res_8x16b_r2_0); + _mm_storeu_si128((__m128i *) (pi2_refarray_buffer + 17), + final_res_8x16b_r3_0); + _mm_storeu_si128((__m128i *) (pi2_refarray_buffer + 25), + final_res_8x16b_r4_0); + + pi2_refarray_buffer[0] = (pi2_inp_data[0] << 2); + pi2_refarray_buffer[7] = (pi2_inp_data[3] << 2); + pi2_refarray_buffer[8] = (pi2_inp_data[i4_inp_data_stride] << 2); + pi2_refarray_buffer[15] = (pi2_inp_data[i4_inp_data_stride + 3] << 2); + pi2_refarray_buffer[16] = (pi2_inp_data[(i4_inp_data_stride << 1)] << 2); + pi2_refarray_buffer[23] = + (pi2_inp_data[(i4_inp_data_stride << 1) + 3] << 2); + pi2_refarray_buffer[24] = (pi2_inp_data[(i4_inp_data_stride * 3)] << 2); + pi2_refarray_buffer[31] = (pi2_inp_data[(i4_inp_data_stride * 3) + 3] << 2); + } + + /* ----------- Vertical Interpolation ---------------- */ + { + __m128i i4_horz_samp_8x16b_r0_1, i4_horz_samp_8x16b_r0_2; + __m128i i4_horz_samp_8x16b_r1_1, i4_horz_samp_8x16b_r1_2; + __m128i i4_horz_samp_8x16b_r2_1, i4_horz_samp_8x16b_r2_2; + __m128i i4_horz_samp_8x16b_r3_1, i4_horz_samp_8x16b_r3_2; + + __m128i i4_horz_samp_4x32b_r0_1, i4_horz_samp_4x32b_r0_2; + __m128i i4_horz_samp_4x32b_r1_1, i4_horz_samp_4x32b_r1_2; + __m128i i4_horz_samp_4x32b_r2_1, i4_horz_samp_4x32b_r2_2; + __m128i i4_horz_samp_4x32b_r3_1, i4_horz_samp_4x32b_r3_2; + + __m128i i4_res_samp_4x32b_r0_1, i4_res_samp_4x32b_r0_2; + __m128i i4_res_samp_4x32b_r1_1, i4_res_samp_4x32b_r1_2; + __m128i i4_res_samp_4x32b_r2_1, i4_res_samp_4x32b_r2_2; + __m128i i4_res_samp_4x32b_r3_1, i4_res_samp_4x32b_r3_2; + __m128i i4_res_samp_4x32b_r4_1, i4_res_samp_4x32b_r4_2; + __m128i i4_res_samp_4x32b_r5_1, i4_res_samp_4x32b_r5_2; + __m128i i4_res_samp_4x32b_r6_1, i4_res_samp_4x32b_r6_2; + __m128i i4_res_samp_4x32b_r7_1, i4_res_samp_4x32b_r7_2; + + __m128i horz_add_4x32b_r1_1, horz_add_4x32b_r1_2; + __m128i horz_add_4x32b_r2_1, horz_add_4x32b_r2_2; + __m128i horz_add_4x32b_r3_1, horz_add_4x32b_r3_2; + + __m128i twos = _mm_set1_epi32(2); + __m128i eights = _mm_set1_epi32(8); + + i4_horz_samp_8x16b_r0_1 = + _mm_loadu_si128((__m128i *) (pi2_refarray_buffer)); + i4_horz_samp_8x16b_r0_2 = + _mm_loadu_si128((__m128i *) (pi2_refarray_buffer + 4)); + i4_horz_samp_8x16b_r1_1 = + _mm_loadu_si128((__m128i *) (pi2_refarray_buffer + BLK8x8SIZE)); + i4_horz_samp_8x16b_r1_2 = + _mm_loadu_si128((__m128i *) (pi2_refarray_buffer + BLK8x8SIZE + 4)); + i4_horz_samp_8x16b_r2_1 = + _mm_loadu_si128((__m128i *) (pi2_refarray_buffer + (BLK8x8SIZE << 1))); + i4_horz_samp_8x16b_r2_2 = _mm_loadu_si128( + (__m128i *) (pi2_refarray_buffer + (BLK8x8SIZE << 1) + 4)); + i4_horz_samp_8x16b_r3_1 = + _mm_loadu_si128((__m128i *) (pi2_refarray_buffer + (BLK8x8SIZE * 3))); + i4_horz_samp_8x16b_r3_2 = _mm_loadu_si128( + (__m128i *) (pi2_refarray_buffer + (BLK8x8SIZE * 3) + 4)); + + i4_horz_samp_4x32b_r0_1 = _mm_cvtepi16_epi32(i4_horz_samp_8x16b_r0_1); + i4_horz_samp_4x32b_r0_2 = _mm_cvtepi16_epi32(i4_horz_samp_8x16b_r0_2); + i4_horz_samp_4x32b_r1_1 = _mm_cvtepi16_epi32(i4_horz_samp_8x16b_r1_1); + i4_horz_samp_4x32b_r1_2 = _mm_cvtepi16_epi32(i4_horz_samp_8x16b_r1_2); + i4_horz_samp_4x32b_r2_1 = _mm_cvtepi16_epi32(i4_horz_samp_8x16b_r2_1); + i4_horz_samp_4x32b_r2_2 = _mm_cvtepi16_epi32(i4_horz_samp_8x16b_r2_2); + i4_horz_samp_4x32b_r3_1 = _mm_cvtepi16_epi32(i4_horz_samp_8x16b_r3_1); + i4_horz_samp_4x32b_r3_2 = _mm_cvtepi16_epi32(i4_horz_samp_8x16b_r3_2); + + horz_add_4x32b_r1_1 = + _mm_add_epi32(i4_horz_samp_4x32b_r0_1, i4_horz_samp_4x32b_r1_1); + horz_add_4x32b_r2_1 = + _mm_add_epi32(i4_horz_samp_4x32b_r1_1, i4_horz_samp_4x32b_r2_1); + horz_add_4x32b_r3_1 = + _mm_add_epi32(i4_horz_samp_4x32b_r2_1, i4_horz_samp_4x32b_r3_1); + + horz_add_4x32b_r1_2 = + _mm_add_epi32(i4_horz_samp_4x32b_r0_2, i4_horz_samp_4x32b_r1_2); + horz_add_4x32b_r2_2 = + _mm_add_epi32(i4_horz_samp_4x32b_r1_2, i4_horz_samp_4x32b_r2_2); + horz_add_4x32b_r3_2 = + _mm_add_epi32(i4_horz_samp_4x32b_r2_2, i4_horz_samp_4x32b_r3_2); + + i4_res_samp_4x32b_r1_1 = _mm_add_epi32( + _mm_slli_epi32(i4_horz_samp_4x32b_r0_1, 1), horz_add_4x32b_r1_1); + i4_res_samp_4x32b_r2_1 = _mm_add_epi32( + _mm_slli_epi32(i4_horz_samp_4x32b_r1_1, 1), horz_add_4x32b_r1_1); + i4_res_samp_4x32b_r3_1 = _mm_add_epi32( + _mm_slli_epi32(i4_horz_samp_4x32b_r1_1, 1), horz_add_4x32b_r2_1); + i4_res_samp_4x32b_r4_1 = _mm_add_epi32( + _mm_slli_epi32(i4_horz_samp_4x32b_r2_1, 1), horz_add_4x32b_r2_1); + i4_res_samp_4x32b_r5_1 = _mm_add_epi32( + _mm_slli_epi32(i4_horz_samp_4x32b_r2_1, 1), horz_add_4x32b_r3_1); + i4_res_samp_4x32b_r6_1 = _mm_add_epi32( + _mm_slli_epi32(i4_horz_samp_4x32b_r3_1, 1), horz_add_4x32b_r3_1); + + i4_res_samp_4x32b_r1_2 = _mm_add_epi32( + _mm_slli_epi32(i4_horz_samp_4x32b_r0_2, 1), horz_add_4x32b_r1_2); + i4_res_samp_4x32b_r2_2 = _mm_add_epi32( + _mm_slli_epi32(i4_horz_samp_4x32b_r1_2, 1), horz_add_4x32b_r1_2); + i4_res_samp_4x32b_r3_2 = _mm_add_epi32( + _mm_slli_epi32(i4_horz_samp_4x32b_r1_2, 1), horz_add_4x32b_r2_2); + i4_res_samp_4x32b_r4_2 = _mm_add_epi32( + _mm_slli_epi32(i4_horz_samp_4x32b_r2_2, 1), horz_add_4x32b_r2_2); + i4_res_samp_4x32b_r5_2 = _mm_add_epi32( + _mm_slli_epi32(i4_horz_samp_4x32b_r2_2, 1), horz_add_4x32b_r3_2); + i4_res_samp_4x32b_r6_2 = _mm_add_epi32( + _mm_slli_epi32(i4_horz_samp_4x32b_r3_2, 1), horz_add_4x32b_r3_2); + + i4_res_samp_4x32b_r0_1 = + _mm_srai_epi32(_mm_add_epi32(i4_horz_samp_4x32b_r0_1, twos), 2); + i4_res_samp_4x32b_r1_1 = + _mm_srai_epi32(_mm_add_epi32(i4_res_samp_4x32b_r1_1, eights), 4); + i4_res_samp_4x32b_r2_1 = + _mm_srai_epi32(_mm_add_epi32(i4_res_samp_4x32b_r2_1, eights), 4); + i4_res_samp_4x32b_r3_1 = + _mm_srai_epi32(_mm_add_epi32(i4_res_samp_4x32b_r3_1, eights), 4); + i4_res_samp_4x32b_r4_1 = + _mm_srai_epi32(_mm_add_epi32(i4_res_samp_4x32b_r4_1, eights), 4); + i4_res_samp_4x32b_r5_1 = + _mm_srai_epi32(_mm_add_epi32(i4_res_samp_4x32b_r5_1, eights), 4); + i4_res_samp_4x32b_r6_1 = + _mm_srai_epi32(_mm_add_epi32(i4_res_samp_4x32b_r6_1, eights), 4); + i4_res_samp_4x32b_r7_1 = + _mm_srai_epi32(_mm_add_epi32(i4_horz_samp_4x32b_r3_1, twos), 2); + + i4_res_samp_4x32b_r0_2 = + _mm_srai_epi32(_mm_add_epi32(i4_horz_samp_4x32b_r0_2, twos), 2); + i4_res_samp_4x32b_r1_2 = + _mm_srai_epi32(_mm_add_epi32(i4_res_samp_4x32b_r1_2, eights), 4); + i4_res_samp_4x32b_r2_2 = + _mm_srai_epi32(_mm_add_epi32(i4_res_samp_4x32b_r2_2, eights), 4); + i4_res_samp_4x32b_r3_2 = + _mm_srai_epi32(_mm_add_epi32(i4_res_samp_4x32b_r3_2, eights), 4); + i4_res_samp_4x32b_r4_2 = + _mm_srai_epi32(_mm_add_epi32(i4_res_samp_4x32b_r4_2, eights), 4); + i4_res_samp_4x32b_r5_2 = + _mm_srai_epi32(_mm_add_epi32(i4_res_samp_4x32b_r5_2, eights), 4); + i4_res_samp_4x32b_r6_2 = + _mm_srai_epi32(_mm_add_epi32(i4_res_samp_4x32b_r6_2, eights), 4); + i4_res_samp_4x32b_r7_2 = + _mm_srai_epi32(_mm_add_epi32(i4_horz_samp_4x32b_r3_2, twos), 2); + + /* populate 2 samples based on current coeffs */ + _mm_storeu_si128( + (__m128i *) pi2_out_res, + _mm_packs_epi32(i4_res_samp_4x32b_r0_1, i4_res_samp_4x32b_r0_2)); + _mm_storeu_si128( + (__m128i *) (pi2_out_res + i4_out_res_stride), + _mm_packs_epi32(i4_res_samp_4x32b_r1_1, i4_res_samp_4x32b_r1_2)); + _mm_storeu_si128( + (__m128i *) (pi2_out_res + (i4_out_res_stride << 1)), + _mm_packs_epi32(i4_res_samp_4x32b_r2_1, i4_res_samp_4x32b_r2_2)); + _mm_storeu_si128( + (__m128i *) (pi2_out_res + (i4_out_res_stride * 3)), + _mm_packs_epi32(i4_res_samp_4x32b_r3_1, i4_res_samp_4x32b_r3_2)); + _mm_storeu_si128( + (__m128i *) (pi2_out_res + (i4_out_res_stride << 2)), + _mm_packs_epi32(i4_res_samp_4x32b_r4_1, i4_res_samp_4x32b_r4_2)); + _mm_storeu_si128( + (__m128i *) (pi2_out_res + (i4_out_res_stride * 5)), + _mm_packs_epi32(i4_res_samp_4x32b_r5_1, i4_res_samp_4x32b_r5_2)); + _mm_storeu_si128( + (__m128i *) (pi2_out_res + (i4_out_res_stride * 6)), + _mm_packs_epi32(i4_res_samp_4x32b_r6_1, i4_res_samp_4x32b_r6_2)); + _mm_storeu_si128( + (__m128i *) (pi2_out_res + (i4_out_res_stride * 7)), + _mm_packs_epi32(i4_res_samp_4x32b_r7_1, i4_res_samp_4x32b_r7_2)); + + pi2_out_res += BLK8x8SIZE; + } + } + } + else + { + pi2_out_res += BLK8x8SIZE; + } + + /* Block level loop updates */ + if(1 == i4_blk_ctr) + { + pi2_inp_data -= 4; + pi2_inp_data += (i4_inp_data_stride * 4); + pi2_out_res -= MB_SIZE; + pi2_out_res += (i4_out_res_stride * BLK8x8SIZE); + u4_ref_nnz >>= 2; + } + else + { + pi2_inp_data += 4; + } + + u4_ref_nnz >>= 1; + + } /* end of loop over all the blocks */ + } +} + +UWORD32 isvce_get_sad_with_residual_pred_sse42(buffer_container_t *ps_src, + buffer_container_t *ps_pred, + buffer_container_t *ps_res, UWORD32 u4_mb_wd, + UWORD32 u4_mb_ht) +{ + UWORD32 i, j, u4_sad = 0; + UWORD8 *pu1_src = (UWORD8 *) ps_src->pv_data; + UWORD8 *pu1_pred = (UWORD8 *) ps_pred->pv_data; + WORD16 *pi2_res = (WORD16 *) ps_res->pv_data; + WORD32 i4_src_stride = ps_src->i4_data_stride; + WORD32 i4_pred_stride = ps_pred->i4_data_stride; + WORD32 i4_res_stride = ps_res->i4_data_stride; + UWORD32 u4_num_rows_per_loop = 8; + UWORD32 u4_ht_by_8 = u4_mb_ht / u4_num_rows_per_loop; + + __m128i src_r0, src_r1, src_r2, src_r3; + __m128i src_r4, src_r5, src_r6, src_r7; + __m128i pred_r0, pred_r1, pred_r2, pred_r3; + __m128i pred_r4, pred_r5, pred_r6, pred_r7; + __m128i res_r0, res_r1, res_r2, res_r3; + __m128i res_r4, res_r5, res_r6, res_r7; + __m128i zero_4x32 = _mm_set1_epi32((WORD32) 0); + + if((u4_mb_wd == 16) && (u4_mb_ht % 8 == 0)) + { + for(i = 0; i < u4_ht_by_8; i++) + { + for(j = 0; j < 2; j++) + { + src_r0 = _mm_loadl_epi64((__m128i *) (pu1_src)); + src_r1 = _mm_loadl_epi64((__m128i *) (pu1_src + 8)); + + pu1_src += i4_src_stride; + + src_r2 = _mm_loadl_epi64((__m128i *) (pu1_src)); + src_r3 = _mm_loadl_epi64((__m128i *) (pu1_src + 8)); + + pu1_src += i4_src_stride; + + src_r4 = _mm_loadl_epi64((__m128i *) (pu1_src)); + src_r5 = _mm_loadl_epi64((__m128i *) (pu1_src + 8)); + + pu1_src += i4_src_stride; + + src_r6 = _mm_loadl_epi64((__m128i *) (pu1_src)); + src_r7 = _mm_loadl_epi64((__m128i *) (pu1_src + 8)); + + pu1_src += i4_src_stride; + + pred_r0 = _mm_loadl_epi64((__m128i *) (pu1_pred)); + pred_r1 = _mm_loadl_epi64((__m128i *) (pu1_pred + 8)); + + pu1_pred += i4_pred_stride; + + pred_r2 = _mm_loadl_epi64((__m128i *) (pu1_pred)); + pred_r3 = _mm_loadl_epi64((__m128i *) (pu1_pred + 8)); + + pu1_pred += i4_pred_stride; + + pred_r4 = _mm_loadl_epi64((__m128i *) (pu1_pred)); + pred_r5 = _mm_loadl_epi64((__m128i *) (pu1_pred + 8)); + + pu1_pred += i4_pred_stride; + + pred_r6 = _mm_loadl_epi64((__m128i *) (pu1_pred)); + pred_r7 = _mm_loadl_epi64((__m128i *) (pu1_pred + 8)); + + pu1_pred += i4_pred_stride; + + src_r0 = _mm_cvtepu8_epi16(src_r0); + src_r1 = _mm_cvtepu8_epi16(src_r1); + src_r2 = _mm_cvtepu8_epi16(src_r2); + src_r3 = _mm_cvtepu8_epi16(src_r3); + src_r4 = _mm_cvtepu8_epi16(src_r4); + src_r5 = _mm_cvtepu8_epi16(src_r5); + src_r6 = _mm_cvtepu8_epi16(src_r6); + src_r7 = _mm_cvtepu8_epi16(src_r7); + + pred_r0 = _mm_cvtepu8_epi16(pred_r0); + pred_r1 = _mm_cvtepu8_epi16(pred_r1); + pred_r2 = _mm_cvtepu8_epi16(pred_r2); + pred_r3 = _mm_cvtepu8_epi16(pred_r3); + pred_r4 = _mm_cvtepu8_epi16(pred_r4); + pred_r5 = _mm_cvtepu8_epi16(pred_r5); + pred_r6 = _mm_cvtepu8_epi16(pred_r6); + pred_r7 = _mm_cvtepu8_epi16(pred_r7); + + res_r0 = _mm_loadu_si128((__m128i *) (pi2_res)); + res_r1 = _mm_loadu_si128((__m128i *) (pi2_res + 8)); + + pi2_res += i4_res_stride; + + res_r2 = _mm_loadu_si128((__m128i *) (pi2_res)); + res_r3 = _mm_loadu_si128((__m128i *) (pi2_res + 8)); + + pi2_res += i4_res_stride; + + res_r4 = _mm_loadu_si128((__m128i *) (pi2_res)); + res_r5 = _mm_loadu_si128((__m128i *) (pi2_res + 8)); + + pi2_res += i4_res_stride; + + res_r6 = _mm_loadu_si128((__m128i *) (pi2_res)); + res_r7 = _mm_loadu_si128((__m128i *) (pi2_res + 8)); + + pi2_res += i4_res_stride; + + src_r0 = _mm_sub_epi16(src_r0, pred_r0); + src_r1 = _mm_sub_epi16(src_r1, pred_r1); + src_r2 = _mm_sub_epi16(src_r2, pred_r2); + src_r3 = _mm_sub_epi16(src_r3, pred_r3); + src_r4 = _mm_sub_epi16(src_r4, pred_r4); + src_r5 = _mm_sub_epi16(src_r5, pred_r5); + src_r6 = _mm_sub_epi16(src_r6, pred_r6); + src_r7 = _mm_sub_epi16(src_r7, pred_r7); + + src_r0 = _mm_sub_epi16(src_r0, res_r0); + src_r1 = _mm_sub_epi16(src_r1, res_r1); + src_r2 = _mm_sub_epi16(src_r2, res_r2); + src_r3 = _mm_sub_epi16(src_r3, res_r3); + src_r4 = _mm_sub_epi16(src_r4, res_r4); + src_r5 = _mm_sub_epi16(src_r5, res_r5); + src_r6 = _mm_sub_epi16(src_r6, res_r6); + src_r7 = _mm_sub_epi16(src_r7, res_r7); + + src_r0 = _mm_abs_epi16(src_r0); + src_r1 = _mm_abs_epi16(src_r1); + src_r2 = _mm_abs_epi16(src_r2); + src_r3 = _mm_abs_epi16(src_r3); + src_r4 = _mm_abs_epi16(src_r4); + src_r5 = _mm_abs_epi16(src_r5); + src_r6 = _mm_abs_epi16(src_r6); + src_r7 = _mm_abs_epi16(src_r7); + + src_r0 = _mm_adds_epu16(src_r0, src_r1); + src_r1 = _mm_adds_epu16(src_r2, src_r3); + src_r2 = _mm_adds_epu16(src_r4, src_r5); + src_r3 = _mm_adds_epu16(src_r6, src_r7); + + src_r0 = _mm_adds_epu16(src_r0, src_r1); + src_r1 = _mm_adds_epu16(src_r2, src_r3); + + src_r0 = _mm_adds_epu16(src_r0, src_r1); + + src_r1 = _mm_cvtepu16_epi32(src_r0); + src_r2 = _mm_srli_si128(src_r0, 8); + src_r2 = _mm_cvtepu16_epi32(src_r2); + + src_r0 = _mm_hadd_epi32(src_r1, src_r2); + src_r0 = _mm_hadd_epi32(src_r0, zero_4x32); + src_r0 = _mm_hadd_epi32(src_r0, zero_4x32); + + u4_sad += _mm_extract_epi32(src_r0, 0); + } + } + } + else + { + for(i = 0; i < u4_mb_ht; i++) + { + for(j = 0; j < u4_mb_wd; j++) + { + WORD16 i2_src = pu1_src[j + i * i4_src_stride]; + WORD16 i2_pred = pu1_pred[j + i * i4_pred_stride]; + WORD16 i2_res = pi2_res[j + i * i4_res_stride]; + u4_sad += ABS(i2_src - i2_pred - i2_res); + } + } + } + + return u4_sad; +} diff --git a/fuzzer/Android.bp b/fuzzer/Android.bp index e79e31e..ab716e5 100644 --- a/fuzzer/Android.bp +++ b/fuzzer/Android.bp @@ -24,6 +24,22 @@ cc_defaults { }, } +cc_defaults { + name: "libavc_enc_fuzzer_defaults", + host_supported: true, + static_libs: ["liblog"], + cflags: [ + "-Wall", + "-Werror", + ], + fuzz_config: { + cc: [ + "android-media-fuzzing-reports@google.com", + ], + componentid: 155276, + }, +} + cc_fuzz { name: "avc_dec_fuzzer", defaults: ["libavc_mvc_fuzzer_defaults"], @@ -48,22 +64,22 @@ cc_fuzz { cc_fuzz { name: "avc_enc_fuzzer", - host_supported: true, + defaults: ["libavc_enc_fuzzer_defaults"], srcs: [ "avc_enc_fuzzer.cpp", ], static_libs: [ - "libavcenc", - "liblog", + "libavcenc" ], - cflags: [ - "-Wall", - "-Werror", - ], - fuzz_config: { - cc: [ - "android-media-fuzzing-reports@google.com", - ], - componentid: 155276, - }, } + +cc_fuzz { + name: "svc_enc_fuzzer", + defaults: ["libavc_enc_fuzzer_defaults"], + srcs: [ + "svc_enc_fuzzer.cpp", + ], + static_libs: [ + "libsvcenc" + ], +} \ No newline at end of file diff --git a/fuzzer/svc_enc_fuzzer.cmake b/fuzzer/svc_enc_fuzzer.cmake new file mode 100644 index 0000000..527effa --- /dev/null +++ b/fuzzer/svc_enc_fuzzer.cmake @@ -0,0 +1,2 @@ +libavc_add_fuzzer(svc_enc_fuzzer libsvcenc SOURCES + ${AVC_ROOT}/fuzzer/svc_enc_fuzzer.cpp) diff --git a/fuzzer/svc_enc_fuzzer.cpp b/fuzzer/svc_enc_fuzzer.cpp new file mode 100644 index 0000000..d9f26b9 --- /dev/null +++ b/fuzzer/svc_enc_fuzzer.cpp @@ -0,0 +1,1343 @@ +/****************************************************************************** + * + * Copyright (C) 2020 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ +#include +#include +#include +#include +#include +#include +#include + +#include "ih264_typedefs.h" +#include "iv2.h" +#include "ive2.h" +#include "isvce.h" + +constexpr WORD32 kMeSpeedPreset[] = {100}; +constexpr WORD32 kDeblkLevel[] = {0, 2, 3, 4}; +constexpr IVE_AIR_MODE_T kAirMode[] = {IVE_AIR_MODE_NONE}; +constexpr IVE_SPEED_CONFIG kEncSpeed[] = {IVE_CONFIG, IVE_SLOWEST, IVE_NORMAL, + IVE_FAST, IVE_HIGH_SPEED, IVE_FASTEST}; +constexpr IV_PROFILE_T kProfile[] = {IV_PROFILE_BASE, IV_PROFILE_MAIN}; +constexpr IVE_RC_MODE_T kRCMode[] = {IVE_RC_NONE, IVE_RC_STORAGE, IVE_RC_CBR_NON_LOW_DELAY, + IVE_RC_CBR_LOW_DELAY}; +constexpr IV_COLOR_FORMAT_T kSupportedColorFormats[] = {IV_YUV_420P}; +constexpr WORD32 kSupportedLevels[] = {10, 9, 11, 12, 13, 20, 21, 22, + 30, 31, 32, 40, 41, 42, 50, 51}; +constexpr IVE_SLICE_MODE_T kSliceMode[] = {IVE_SLICE_MODE_NONE}; +constexpr IV_ARCH_T kArchs[] = { + ARCH_ARM_NONEON, ARCH_ARM_A9Q, ARCH_ARM_A9A, ARCH_ARM_A9, ARCH_ARM_A7, + ARCH_ARM_A5, ARCH_ARM_A15, ARCH_ARM_NEONINTR, ARCH_X86_GENERIC, ARCH_X86_SSSE3, + ARCH_X86_SSE42, ARCH_ARM_A53, ARCH_ARM_A57, ARCH_ARM_V8_NEON}; +constexpr DOUBLE kSpatialResRatio[] = {1.5, 2}; +constexpr UWORD8 kSpatialLayers[] = {1, 2, 3}; +constexpr UWORD8 kTemporalLayers[] = {1, 2, 3}; +constexpr size_t kAirModeNum = std::size(kAirMode); +constexpr size_t kEncSpeedNum = std::size(kEncSpeed); +constexpr size_t kMeSpeedPresetNum = std::size(kMeSpeedPreset); +constexpr size_t kDeblkLevelNum = std::size(kDeblkLevel); +constexpr size_t kProfileNum = std::size(kProfile); +constexpr size_t kRCModeNum = std::size(kRCMode); +constexpr size_t kSupportedColorFormatsNum = std::size(kSupportedColorFormats); +constexpr size_t kSupportedLevelsNum = std::size(kSupportedLevels); +constexpr size_t kSliceModeNum = std::size(kSliceMode); +constexpr size_t kSpatialResRatioNum = std::size(kSpatialResRatio); +constexpr size_t kSpatialLayersNum = std::size(kSpatialLayers); +constexpr size_t kTemporalLayersNum = std::size(kTemporalLayers); +constexpr size_t kMinQP = 0; +constexpr size_t kMaxQP = 51; +constexpr size_t kMaxWidth = 2560; +constexpr size_t kMaxHeight = 2560; +constexpr size_t kMaxBitrate = 500000000; +constexpr UWORD8 kNumSeiMdcvPrimaries = 3; +constexpr UWORD8 kNumSeiCcvPrimaries = 3; +constexpr double kSvcCompliantDimProb = 0.75; +constexpr size_t kMaxEncodeCalls = 100; + +typedef enum ARG_INDICES_T +{ + IDX_WD_BYTE_1, + IDX_WD_BYTE_2, + IDX_HT_BYTE_1, + IDX_HT_BYTE_2, + IDX_COLOR_FORMAT, + IDX_ARCH_TYPE, + IDX_RC_MODE, + IDX_NUM_CORES, + IDX_NUM_ARCH, + IDX_NUM_B_FRAMES, + IDX_ENC_SPEED, + IDX_CONSTRAINED_INTRA_FLAG, + IDX_INTRA_4x4, + IDX_I_FRAME_QP, + IDX_P_FRAME_QP, + IDX_B_FRAME_QP, + IDX_BITRATE_BYTE_1, + IDX_BITRATE_BYTE_2, + IDX_FRAME_RATE, + IDX_INTRA_REFRESH, + IDX_ENABLE_HALF_PEL, + IDX_ENABLE_Q_PEL, + IDX_ME_SPEED_PRESET, + IDX_AIR_MODE, + IDX_DISABLE_DEBLOCK_LEVEL, + IDX_SEARCH_RANGE_X, + IDX_SEARCH_RANGE_Y, + IDX_I_INTERVAL, + IDX_IDR_INTERVAL, + IDX_SEI_MDCV_FLAG, + IDX_SEI_CLL_FLAG, + IDX_SEI_AVE_FLAG, + IDX_SEI_CCV_FLAG, + IDX_PROFILE, + IDX_ASPECT_RATIO_FLAG, + IDX_NAL_HRD_FLAG, + IDX_VCL_HRD_FLAG, + IDX_ENABLE_FORCE_IDR, + IDX_ENABLE_DYNAMIC_BITRATE, + IDX_ENABLE_DYNAMIC_FRAME_RATE, + IDX_FORCE_IDR_INTERVAL, + IDX_DYNAMIC_BITRATE_INTERVAL, + IDX_DYNAMIC_FRAME_RATE_INTERVAL, + IDX_ENC_LEVEL, + IDX_RECON_FMT, + IDX_SLICE_MODE, + IDX_ENABLE_FAST_SAD, + IDX_NUM_SPATIAL_LAYERS, + IDX_NUM_TEMPORAL_LAYERS, + IDX_SPATIAL_RES_RATIO, + IDX_SVC_COMPLIANT_DIMS, + IDX_ENABLE_RECON, + IDX_ENABLE_NALU_INFO_EXPORT, + IDX_LAST +} ARG_INDICES_T; + +class Codec +{ + public: + struct FrameDims + { + size_t mWidth; + size_t mHeight; + + FrameDims(size_t w, size_t h) : mWidth(w), mHeight(h) {} + FrameDims(const std::pair &dimPair) + : FrameDims(dimPair.first, dimPair.second) + { + } + FrameDims(const FrameDims &other) : FrameDims(other.mWidth, other.mHeight) {} + + void operator=(const FrameDims &other) + { + mWidth = other.mWidth; + mHeight = other.mHeight; + } + + size_t getFrameSize() const { return (mWidth * mHeight * 3) / 2; }; + }; + + struct EncBufs + { + std::vector mInputBuf; + std::vector mOutputBuf; + std::vector mReconBuf; + std::vector mNaluInfoStructBuf; + std::vector> mNaluInfoDataBuf; + }; + + Codec() + : mCodecCtx(nullptr), + mMemRecords(), + mMemRecBufs(), + mEncBufs(), + mAirMode(IVE_AIR_MODE_NONE), + mEncSpeed(IVE_NORMAL), + mRCMode(IVE_RC_NONE), + mArch(ARCH_NA), + mSliceMode(IVE_SLICE_MODE_NONE), + mIvVideoColorFormat(IV_YUV_420P), + mProfile(IV_PROFILE_BASE), + mSvcCompDims{kMaxWidth, kMaxHeight}, + mInputDims{kMaxWidth, kMaxHeight}, + mHalfPelEnable(1), + mQPelEnable(1), + mIntra4x4(0), + mEnableFastSad(0), + mEnableAltRef(0), + mConstrainedIntraFlag(0), + mSeiCllFlag(1), + mSeiAveFlag(1), + mSeiCcvFlag(1), + mSeiMdcvFlag(1), + mAspectRatioFlag(0), + mNalHrdFlag(0), + mVclHrdFlag(0), + mIsForceIdrEnabled(false), + mIsDynamicBitRateChangeEnabled(false), + mIsDynamicFrameRateChangeEnabled(false), + mEnableRecon(false), + mEnableNaluInfoExport(false), + mAvcEncLevel(41), + mNumMemRecords(0), + mNumCores(1), + mBframes(0), + mSliceParam(256), + mMeSpeedPreset(100), + mIInterval(60), + mIDRInterval(60), + mDisableDeblockLevel(0), + m_I_QP(22), + m_P_QP(28), + m_B_QP(22), + mIntraRefresh(30), + mSearchRangeX(64), + mSearchRangeY(48), + mForceIdrInterval(0), + mDynamicBitRateInterval(0), + mDynamicFrameRateInterval(0), + mBitrate(6000000), + mFrameRate(30), + mNumSpatialLayers(1), + mNumTemporalLayers(1), + mSpatialResRatio(2) + { + } + + ~Codec() { delMemRecs(); }; + + bool initEncoder(const UWORD8 *data); + bool encodeFrames(const UWORD8 *data, size_t size); + + private: + void setEncParams(iv_raw_buf_t *psInpRawBuf, std::vector &buf, const FrameDims &dims, + IV_COLOR_FORMAT_T colorFormat = IV_YUV_420P); + void setFrameType(IV_PICTURE_CODING_TYPE_T eFrameType); + void setQp(); + void setEncMode(IVE_ENC_MODE_T eEncMode); + void setDimensions(); + void setNumCores(); + void setFrameRate(); + void setIpeParams(); + void setBitRate(); + void setAirParams(); + void setMeParams(); + void setGopParams(); + void setProfileParams(); + void setDeblockParams(); + void setVbvParams(); + void setDefault(); + void setVuiParams(); + void getBufInfo(); + void setSeiMdcvParams(); + void setSeiCllParams(); + void setSeiAveParams(); + void setSeiCcvParams(); + void logVersion(); + void initEncBufs(); + bool initMemRecs(); + void delMemRecs(); + + iv_obj_t *mCodecCtx; + std::vector mMemRecords; + std::vector mMemRecBufs; + EncBufs mEncBufs; + + IVE_AIR_MODE_T mAirMode; + IVE_SPEED_CONFIG mEncSpeed; + IVE_RC_MODE_T mRCMode; + IV_ARCH_T mArch; + IVE_SLICE_MODE_T mSliceMode; + IV_COLOR_FORMAT_T mIvVideoColorFormat; + IV_PROFILE_T mProfile; + FrameDims mSvcCompDims; + FrameDims mInputDims; + + bool mHalfPelEnable; + bool mQPelEnable; + bool mIntra4x4; + bool mEnableFastSad; + bool mEnableAltRef; + bool mConstrainedIntraFlag; + bool mSeiCllFlag; + bool mSeiAveFlag; + bool mSeiCcvFlag; + bool mSeiMdcvFlag; + bool mAspectRatioFlag; + bool mNalHrdFlag; + bool mVclHrdFlag; + bool mIsForceIdrEnabled; + bool mIsDynamicBitRateChangeEnabled; + bool mIsDynamicFrameRateChangeEnabled; + bool mEnableRecon; + bool mEnableNaluInfoExport; + UWORD32 mAvcEncLevel; + UWORD32 mNumMemRecords; + UWORD32 mNumCores; + UWORD32 mBframes; + UWORD32 mSliceParam; + UWORD32 mMeSpeedPreset; + UWORD32 mIInterval; + UWORD32 mIDRInterval; + UWORD32 mDisableDeblockLevel; + UWORD32 m_I_QP; + UWORD32 m_P_QP; + UWORD32 m_B_QP; + UWORD32 mIntraRefresh; + UWORD32 mSearchRangeX; + UWORD32 mSearchRangeY; + /* Units - number of frames */ + UWORD32 mForceIdrInterval; + /* Units - number of frames */ + UWORD32 mDynamicBitRateInterval; + /* Units - number of frames */ + UWORD32 mDynamicFrameRateInterval; + UWORD64 mBitrate; + DOUBLE mFrameRate; + UWORD8 mNumSpatialLayers; + UWORD8 mNumTemporalLayers; + DOUBLE mSpatialResRatio; +}; + +void Codec::initEncBufs() +{ + size_t frameSize = mInputDims.getFrameSize(); + constexpr size_t minOutBufSize = 0x800; + size_t outBufSize = std::max(minOutBufSize, frameSize * mNumSpatialLayers); + size_t naluInfoBufSize = 460 * mNumSpatialLayers; + + mEncBufs.mInputBuf.resize(frameSize); + mEncBufs.mOutputBuf.resize(outBufSize); + + if(mEnableRecon) + { + mEncBufs.mReconBuf.resize(frameSize); + } + + if(mEnableNaluInfoExport) + { + mEncBufs.mNaluInfoStructBuf.resize(mNumSpatialLayers * 2); + mEncBufs.mNaluInfoDataBuf.resize(mNumSpatialLayers); + + for(auto i = 0; i < mNumSpatialLayers; i++) + { + mEncBufs.mNaluInfoDataBuf[i].resize(naluInfoBufSize); + } + } +} + +bool Codec::initMemRecs() +{ + std::fill(mMemRecBufs.begin(), mMemRecBufs.end(), nullptr); + + for(auto i = 0u; i < mNumMemRecords; i++) + { + mMemRecBufs[i] = reinterpret_cast( + aligned_alloc(mMemRecords[i].u4_mem_alignment, mMemRecords[i].u4_mem_size)); + mMemRecords[i].pv_base = mMemRecBufs[i]; + + if(nullptr == mMemRecBufs[i]) + { + for(auto j = 0u; j < i; j++) + { + free(mMemRecBufs[j]); + } + + return false; + } + } + + return true; +} + +void Codec::delMemRecs() +{ + for(auto i = 0u; i < mNumMemRecords; i++) + { + if(mMemRecBufs[i]) + { + free(mMemRecBufs[i]); + } + } + + std::fill(mMemRecBufs.begin(), mMemRecBufs.end(), nullptr); +} + +bool Codec::initEncoder(const UWORD8 *data) +{ + mInputDims = FrameDims{((data[IDX_WD_BYTE_1] << 8) | data[IDX_WD_BYTE_2]) % kMaxWidth, + ((data[IDX_HT_BYTE_1] << 8) | data[IDX_HT_BYTE_2]) % kMaxHeight}; + + mNumSpatialLayers = kSpatialLayers[data[IDX_NUM_SPATIAL_LAYERS] % kSpatialLayersNum]; + mNumTemporalLayers = kTemporalLayers[data[IDX_NUM_TEMPORAL_LAYERS] % kTemporalLayersNum]; + mSpatialResRatio = kSpatialResRatio[data[IDX_SPATIAL_RES_RATIO] % kSpatialResRatioNum]; + bool useSvcCompliantDims = + data[IDX_SVC_COMPLIANT_DIMS] < + static_cast(std::numeric_limits::max() * kSvcCompliantDimProb); + + if(useSvcCompliantDims) + { + auto getSvcCompliantDims = [&]() -> FrameDims + { + auto maxResRatio = pow(mSpatialResRatio, mNumSpatialLayers - 1); + UWORD32 dimPadding = 0; + UWORD32 numDecimalDigits = mNumSpatialLayers; + constexpr auto minDimGcd = 16; + UWORD32 decPtDelMultiplier = static_cast(std::pow(10, numDecimalDigits)); + FrameDims dims{mInputDims}; + + if(std::fmod(minDimGcd, maxResRatio)) + { + dimPadding = std::lcm(minDimGcd * decPtDelMultiplier, + static_cast(maxResRatio * decPtDelMultiplier)) / + decPtDelMultiplier; + } + else + { + dimPadding = static_cast(minDimGcd * maxResRatio); + } + + if(mInputDims.mWidth % dimPadding) + { + dims.mWidth = mInputDims.mWidth - ((mInputDims.mWidth) % dimPadding) + dimPadding; + } + + if(mInputDims.mHeight % dimPadding) + { + dims.mHeight = + mInputDims.mHeight - ((mInputDims.mHeight) % dimPadding) + dimPadding; + } + + return dims; + }; + + mSvcCompDims = getSvcCompliantDims(); + mInputDims = mSvcCompDims; + } + + mIvVideoColorFormat = + kSupportedColorFormats[data[IDX_COLOR_FORMAT] % kSupportedColorFormatsNum]; + mArch = kArchs[data[IDX_ARCH_TYPE] % std::size(kArchs)]; + mRCMode = kRCMode[data[IDX_RC_MODE] % kRCModeNum]; + mNumCores = (data[IDX_NUM_CORES] & 0x07) + 1; + mBframes = 0; + + mEncSpeed = kEncSpeed[data[IDX_ENC_SPEED] % kEncSpeedNum]; + mConstrainedIntraFlag = data[IDX_CONSTRAINED_INTRA_FLAG] & 0x01; + mIntra4x4 = data[IDX_INTRA_4x4] & 0x01; + m_I_QP = data[IDX_I_FRAME_QP]; + m_P_QP = data[IDX_P_FRAME_QP]; + m_B_QP = data[IDX_B_FRAME_QP]; + mBitrate = (((data[IDX_BITRATE_BYTE_1] << 8) | data[IDX_BITRATE_BYTE_2]) * 1000) % kMaxBitrate; + mFrameRate = data[IDX_FRAME_RATE] % 120; + mIntraRefresh = data[IDX_INTRA_REFRESH] + 1; + mHalfPelEnable = data[IDX_ENABLE_HALF_PEL] & 0x01; + mQPelEnable = data[IDX_ENABLE_Q_PEL] & 0x01; + mMeSpeedPreset = kMeSpeedPreset[data[IDX_ME_SPEED_PRESET] % kMeSpeedPresetNum]; + mAirMode = kAirMode[data[IDX_AIR_MODE] % kAirModeNum]; + mDisableDeblockLevel = kDeblkLevel[data[IDX_DISABLE_DEBLOCK_LEVEL] % kDeblkLevelNum]; + mSearchRangeX = data[IDX_SEARCH_RANGE_X]; + mSearchRangeY = data[IDX_SEARCH_RANGE_Y]; + mIInterval = data[IDX_I_INTERVAL] + 1; + mIDRInterval = data[IDX_IDR_INTERVAL] + 1; + mSeiMdcvFlag = data[IDX_SEI_MDCV_FLAG] & 0x01; + mSeiCllFlag = data[IDX_SEI_CLL_FLAG] & 0x01; + mSeiAveFlag = data[IDX_SEI_AVE_FLAG] & 0x01; + mSeiCcvFlag = data[IDX_SEI_CCV_FLAG] & 0x01; + mProfile = kProfile[data[IDX_PROFILE] % kProfileNum]; + mAspectRatioFlag = data[IDX_ASPECT_RATIO_FLAG] & 0x01; + mNalHrdFlag = data[IDX_NAL_HRD_FLAG] & 0x01; + mVclHrdFlag = data[IDX_VCL_HRD_FLAG] & 0x01; + mIsForceIdrEnabled = data[IDX_ENABLE_FORCE_IDR] & 0x01; + mIsDynamicBitRateChangeEnabled = data[IDX_ENABLE_DYNAMIC_BITRATE] & 0x01; + mIsDynamicFrameRateChangeEnabled = data[IDX_ENABLE_DYNAMIC_FRAME_RATE] & 0x01; + mForceIdrInterval = data[IDX_FORCE_IDR_INTERVAL] & 0x07; + mDynamicBitRateInterval = data[IDX_DYNAMIC_BITRATE_INTERVAL] & 0x07; + mDynamicFrameRateInterval = data[IDX_DYNAMIC_FRAME_RATE_INTERVAL] & 0x07; + + mSliceParam = std::min(256u, static_cast(mInputDims.mHeight >> 4)); + mAvcEncLevel = kSupportedLevels[data[IDX_ENC_LEVEL] % kSupportedLevelsNum]; + mSliceMode = kSliceMode[data[IDX_SLICE_MODE] % kSliceModeNum]; + mEnableFastSad = data[IDX_ENABLE_FAST_SAD] & 0x01; + + mEnableRecon = !!(data[IDX_ENABLE_RECON] & 1); + mEnableNaluInfoExport = !!(data[IDX_ENABLE_NALU_INFO_EXPORT] & 1); + + isvce_num_mem_rec_ip_t s_num_mem_rec_ip{}; + isvce_num_mem_rec_op_t s_num_mem_rec_op{}; + + isvce_api_cmds_t s_api_cmds{ISVCE_CMD_GET_NUM_MEM_REC, ISVCE_CMD_CT_NA}; + + /* Getting Number of MemRecords */ + s_num_mem_rec_ip.s_ive_ip.u4_size = sizeof(isvce_num_mem_rec_ip_t); + s_num_mem_rec_op.s_ive_op.u4_size = sizeof(isvce_num_mem_rec_op_t); + + if(IV_SUCCESS != isvce_api_function(0, &s_num_mem_rec_ip, &s_num_mem_rec_op, &s_api_cmds)) + { + return false; + } + + mNumMemRecords = s_num_mem_rec_op.s_ive_op.u4_num_mem_rec; + mMemRecords.resize(mNumMemRecords); + mMemRecBufs.resize(mNumMemRecords); + + for(auto i = 0u; i < mNumMemRecords; i++) + { + mMemRecords[i].u4_size = sizeof(iv_mem_rec_t); + mMemRecords[i].pv_base = nullptr; + mMemRecords[i].u4_mem_size = 0; + mMemRecords[i].u4_mem_alignment = 0; + mMemRecords[i].e_mem_type = IV_NA_MEM_TYPE; + } + + isvce_fill_mem_rec_ip_t sFillMemRecIp{}; + isvce_fill_mem_rec_op_t sFillMemRecOp{}; + + s_api_cmds = {ISVCE_CMD_FILL_NUM_MEM_REC, ISVCE_CMD_CT_NA}; + + sFillMemRecIp.s_ive_ip.u4_size = sizeof(isvce_fill_mem_rec_ip_t); + sFillMemRecOp.s_ive_op.u4_size = sizeof(isvce_fill_mem_rec_op_t); + + sFillMemRecIp.s_ive_ip.ps_mem_rec = mMemRecords.data(); + sFillMemRecIp.s_ive_ip.u4_num_mem_rec = mNumMemRecords; + sFillMemRecIp.s_ive_ip.u4_max_wd = mInputDims.mWidth; + sFillMemRecIp.s_ive_ip.u4_max_ht = mInputDims.mHeight; + sFillMemRecIp.u4_wd = mInputDims.mWidth; + sFillMemRecIp.u4_ht = mInputDims.mHeight; + sFillMemRecIp.s_ive_ip.u4_max_level = mAvcEncLevel; + sFillMemRecIp.s_ive_ip.e_color_format = mIvVideoColorFormat; + sFillMemRecIp.s_ive_ip.u4_max_ref_cnt = 2; + sFillMemRecIp.s_ive_ip.u4_max_reorder_cnt = 0; + sFillMemRecIp.s_ive_ip.u4_max_srch_rng_x = 256; + sFillMemRecIp.s_ive_ip.u4_max_srch_rng_y = 256; + + sFillMemRecIp.s_svc_inp_params.u1_num_temporal_layers = mNumTemporalLayers; + sFillMemRecIp.s_svc_inp_params.u1_num_spatial_layers = mNumSpatialLayers; + sFillMemRecIp.s_svc_inp_params.d_spatial_res_ratio = mSpatialResRatio; + + if(IV_SUCCESS != isvce_api_function(0, &sFillMemRecIp, &sFillMemRecOp, &s_api_cmds)) + { + return false; + } + + if(!initMemRecs()) + { + return false; + } + + /* Codec Instance Creation */ + isvce_init_ip_t sInitIp{}; + isvce_init_op_t sInitOp{}; + + std::vector sMaxBitrates(mNumSpatialLayers, 240000000); + + mCodecCtx = reinterpret_cast(mMemRecords[0].pv_base); + mCodecCtx->u4_size = sizeof(iv_obj_t); + mCodecCtx->pv_fxns = reinterpret_cast(isvce_api_function); + + sInitIp.s_ive_ip.u4_size = sizeof(isvce_init_ip_t); + sInitOp.s_ive_op.u4_size = sizeof(isvce_init_op_t); + + s_api_cmds = {ISVCE_CMD_INIT, ISVCE_CMD_CT_NA}; + + sInitIp.s_ive_ip.u4_num_mem_rec = mNumMemRecords; + sInitIp.s_ive_ip.ps_mem_rec = mMemRecords.data(); + sInitIp.s_ive_ip.u4_max_wd = mInputDims.mWidth; + sInitIp.s_ive_ip.u4_max_ht = mInputDims.mHeight; + sInitIp.u4_wd = mInputDims.mWidth; + sInitIp.u4_ht = mInputDims.mHeight; + + sInitIp.s_ive_ip.u4_max_ref_cnt = 2; + sInitIp.s_ive_ip.u4_max_reorder_cnt = 0; + sInitIp.s_ive_ip.u4_max_level = mAvcEncLevel; + sInitIp.s_ive_ip.e_inp_color_fmt = mIvVideoColorFormat; + + sInitIp.s_ive_ip.u4_enable_recon = mEnableRecon; + sInitIp.s_ive_ip.e_recon_color_fmt = IV_YUV_420P; + sInitIp.b_nalu_info_export_enable = mEnableNaluInfoExport; + sInitIp.s_ive_ip.e_rc_mode = mRCMode; + sInitIp.s_ive_ip.u4_max_framerate = 120000; + sInitIp.pu4_max_bitrate = sMaxBitrates.data(); + sInitIp.s_svc_inp_params.u1_num_temporal_layers = mNumTemporalLayers; + sInitIp.s_svc_inp_params.u1_num_spatial_layers = mNumSpatialLayers; + sInitIp.s_svc_inp_params.d_spatial_res_ratio = mSpatialResRatio; + + sInitIp.s_ive_ip.u4_num_bframes = mBframes; + sInitIp.s_ive_ip.e_content_type = IV_PROGRESSIVE; + sInitIp.s_ive_ip.u4_max_srch_rng_x = 256; + sInitIp.s_ive_ip.u4_max_srch_rng_y = 256; + sInitIp.s_ive_ip.e_slice_mode = mSliceMode; + sInitIp.s_ive_ip.u4_slice_param = mSliceParam; + sInitIp.s_ive_ip.e_arch = mArch; + sInitIp.s_ive_ip.e_soc = SOC_GENERIC; + sInitIp.b_use_default_vui = true; + + if(IV_SUCCESS != isvce_api_function(mCodecCtx, &sInitIp, &sInitOp, &s_api_cmds)) + { + delMemRecs(); + + return false; + } + + setDefault(); + setNumCores(); + logVersion(); + getBufInfo(); + setDimensions(); + setFrameRate(); + setIpeParams(); + setBitRate(); + setQp(); + setAirParams(); + setVbvParams(); + setMeParams(); + setGopParams(); + setDeblockParams(); + setProfileParams(); + setEncMode(IVE_ENC_MODE_HEADER); + setVuiParams(); + setSeiMdcvParams(); + setSeiCllParams(); + setSeiAveParams(); + setSeiCcvParams(); + + initEncBufs(); + + return true; +} + +void Codec::setDimensions() +{ + isvce_ctl_set_dimensions_ip_t s_frame_dimensions_ip{}; + isvce_ctl_set_dimensions_op_t s_frame_dimensions_op{}; + + isvce_api_cmds_t s_api_cmds{ISVCE_CMD_VIDEO_CTL, ISVCE_CMD_CTL_SET_DIMENSIONS}; + + s_frame_dimensions_ip.s_ive_ip.u4_ht = mInputDims.mHeight; + s_frame_dimensions_ip.s_ive_ip.u4_wd = mInputDims.mWidth; + + s_frame_dimensions_ip.s_ive_ip.u4_timestamp_high = 0; + s_frame_dimensions_ip.s_ive_ip.u4_timestamp_low = 0; + + s_frame_dimensions_ip.s_ive_ip.u4_size = sizeof(isvce_ctl_set_dimensions_ip_t); + s_frame_dimensions_op.s_ive_op.u4_size = sizeof(isvce_ctl_set_dimensions_op_t); + + isvce_api_function(mCodecCtx, &s_frame_dimensions_ip, &s_frame_dimensions_op, &s_api_cmds); +} + +void Codec::setNumCores() +{ + isvce_ctl_set_num_cores_ip_t sNumCoresIp{}; + isvce_ctl_set_num_cores_op_t sNumCoresOp{}; + + isvce_api_cmds_t s_api_cmds{ISVCE_CMD_VIDEO_CTL, ISVCE_CMD_CTL_SET_NUM_CORES}; + + sNumCoresIp.s_ive_ip.u4_num_cores = mNumCores; + + sNumCoresIp.s_ive_ip.u4_timestamp_high = 0; + sNumCoresIp.s_ive_ip.u4_timestamp_low = 0; + + sNumCoresIp.s_ive_ip.u4_size = sizeof(isvce_ctl_set_num_cores_ip_t); + sNumCoresOp.s_ive_op.u4_size = sizeof(isvce_ctl_set_num_cores_op_t); + + isvce_api_function(mCodecCtx, (void *) &sNumCoresIp, (void *) &sNumCoresOp, &s_api_cmds); +} + +void Codec::setDefault() +{ + isvce_ctl_setdefault_ip_t sDefaultIp{}; + isvce_ctl_setdefault_op_t sDefaultOp{}; + + isvce_api_cmds_t s_api_cmds{ISVCE_CMD_VIDEO_CTL, ISVCE_CMD_CTL_SETDEFAULT}; + + sDefaultIp.s_ive_ip.u4_timestamp_high = 0; + sDefaultIp.s_ive_ip.u4_timestamp_low = 0; + + sDefaultIp.s_ive_ip.u4_size = sizeof(isvce_ctl_setdefault_ip_t); + sDefaultOp.s_ive_op.u4_size = sizeof(isvce_ctl_setdefault_op_t); + + isvce_api_function(mCodecCtx, &sDefaultIp, &sDefaultOp, &s_api_cmds); +} + +void Codec::getBufInfo() +{ + isvce_ctl_getbufinfo_ip_t s_get_buf_info_ip{}; + isvce_ctl_getbufinfo_op_t s_get_buf_info_op{}; + + isvce_api_cmds_t s_api_cmds{ISVCE_CMD_VIDEO_CTL, ISVCE_CMD_CTL_GETBUFINFO}; + + s_get_buf_info_ip.s_ive_ip.u4_size = sizeof(isvce_ctl_getbufinfo_ip_t); + s_get_buf_info_op.s_ive_op.u4_size = sizeof(isvce_ctl_getbufinfo_op_t); + + s_get_buf_info_ip.s_ive_ip.u4_max_ht = mInputDims.mHeight; + s_get_buf_info_ip.s_ive_ip.u4_max_wd = mInputDims.mWidth; + s_get_buf_info_ip.s_ive_ip.e_inp_color_fmt = mIvVideoColorFormat; + + isvce_api_function(mCodecCtx, &s_get_buf_info_ip, &s_get_buf_info_op, &s_api_cmds); +} + +void Codec::setFrameRate() +{ + isvce_ctl_set_frame_rate_ip_t sFrameRateIp{}; + isvce_ctl_set_frame_rate_op_t sFrameRateOp{}; + + isvce_api_cmds_t s_api_cmds{ISVCE_CMD_VIDEO_CTL, ISVCE_CMD_CTL_SET_FRAMERATE}; + + sFrameRateIp.s_ive_ip.u4_src_frame_rate = (UWORD32) mFrameRate; + sFrameRateIp.s_ive_ip.u4_tgt_frame_rate = (UWORD32) mFrameRate; + + sFrameRateIp.s_ive_ip.u4_timestamp_high = 0; + sFrameRateIp.s_ive_ip.u4_timestamp_low = 0; + + sFrameRateIp.s_ive_ip.u4_size = sizeof(isvce_ctl_set_frame_rate_ip_t); + sFrameRateOp.s_ive_op.u4_size = sizeof(isvce_ctl_set_frame_rate_op_t); + + isvce_api_function(mCodecCtx, &sFrameRateIp, &sFrameRateOp, &s_api_cmds); +} + +void Codec::setIpeParams() +{ + isvce_ctl_set_ipe_params_ip_t sIpeParamsIp{}; + isvce_ctl_set_ipe_params_op_t sIpeParamsOp{}; + + isvce_api_cmds_t s_api_cmds{ISVCE_CMD_VIDEO_CTL, ISVCE_CMD_CTL_SET_IPE_PARAMS}; + + sIpeParamsIp.s_ive_ip.u4_enable_intra_4x4 = mIntra4x4; + sIpeParamsIp.s_ive_ip.u4_enc_speed_preset = mEncSpeed; + + sIpeParamsIp.s_ive_ip.u4_timestamp_high = 0; + sIpeParamsIp.s_ive_ip.u4_timestamp_low = 0; + + sIpeParamsIp.s_ive_ip.u4_size = sizeof(isvce_ctl_set_ipe_params_ip_t); + sIpeParamsOp.s_ive_op.u4_size = sizeof(isvce_ctl_set_ipe_params_op_t); + + isvce_api_function(mCodecCtx, &sIpeParamsIp, &sIpeParamsOp, &s_api_cmds); +} + +void Codec::setBitRate() +{ + isvce_ctl_set_bitrate_ip_t sBitrateIp{}; + isvce_ctl_set_bitrate_op_t sBitrateOp{}; + + isvce_api_cmds_t s_api_cmds{ISVCE_CMD_VIDEO_CTL, ISVCE_CMD_CTL_SET_BITRATE}; + std::vector sTargetBitrates(mNumSpatialLayers, mBitrate); + + sBitrateIp.pu4_target_bitrate = sTargetBitrates.data(); + + sBitrateIp.s_ive_ip.u4_timestamp_high = 0; + sBitrateIp.s_ive_ip.u4_timestamp_low = 0; + + sBitrateIp.s_ive_ip.u4_size = sizeof(isvce_ctl_set_bitrate_ip_t); + sBitrateOp.s_ive_op.u4_size = sizeof(isvce_ctl_set_bitrate_op_t); + + isvce_api_function(mCodecCtx, &sBitrateIp, &sBitrateOp, &s_api_cmds); +} + +void Codec::setFrameType(IV_PICTURE_CODING_TYPE_T eFrameType) +{ + isvce_ctl_set_frame_type_ip_t sFrameTypeIp{}; + isvce_ctl_set_frame_type_op_t sFrameTypeOp{}; + + isvce_api_cmds_t s_api_cmds{ISVCE_CMD_VIDEO_CTL, ISVCE_CMD_CTL_SET_FRAMETYPE}; + + sFrameTypeIp.s_ive_ip.e_frame_type = eFrameType; + + sFrameTypeIp.s_ive_ip.u4_timestamp_high = 0; + sFrameTypeIp.s_ive_ip.u4_timestamp_low = 0; + + sFrameTypeIp.s_ive_ip.u4_size = sizeof(isvce_ctl_set_frame_type_ip_t); + sFrameTypeOp.s_ive_op.u4_size = sizeof(isvce_ctl_set_frame_type_op_t); + + isvce_api_function(mCodecCtx, &sFrameTypeIp, &sFrameTypeOp, &s_api_cmds); +} + +void Codec::setQp() +{ + constexpr UWORD8 u1NumSliceTypes = 3; + isvce_ctl_set_qp_ip_t s_QpIp{}; + isvce_ctl_set_qp_op_t s_QpOp{}; + + isvce_api_cmds_t s_api_cmds{ISVCE_CMD_VIDEO_CTL, ISVCE_CMD_CTL_SET_QP}; + std::vector sQps(u1NumSliceTypes * mNumSpatialLayers); + std::vector sMinQps(u1NumSliceTypes * mNumSpatialLayers); + std::vector sMaxQps(u1NumSliceTypes * mNumSpatialLayers); + + s_QpIp.pu4_i_qp = sQps.data(); + s_QpIp.pu4_i_qp_min = sMinQps.data(); + s_QpIp.pu4_i_qp_max = sMaxQps.data(); + + s_QpIp.pu4_p_qp = sQps.data() + mNumSpatialLayers; + s_QpIp.pu4_p_qp_min = sMinQps.data() + mNumSpatialLayers; + s_QpIp.pu4_p_qp_max = sMaxQps.data() + mNumSpatialLayers; + + s_QpIp.pu4_b_qp = sQps.data() + mNumSpatialLayers * 2; + s_QpIp.pu4_b_qp_min = sMinQps.data() + mNumSpatialLayers * 2; + s_QpIp.pu4_b_qp_max = sMaxQps.data() + mNumSpatialLayers * 2; + + for(auto i = 0; i < mNumSpatialLayers; i++) + { + s_QpIp.pu4_i_qp[i] = m_I_QP; + s_QpIp.pu4_i_qp_max[i] = kMaxQP; + s_QpIp.pu4_i_qp_min[i] = kMinQP; + + s_QpIp.pu4_p_qp[i] = m_P_QP; + s_QpIp.pu4_p_qp_max[i] = kMaxQP; + s_QpIp.pu4_p_qp_min[i] = kMinQP; + + s_QpIp.pu4_b_qp[i] = m_B_QP; + s_QpIp.pu4_b_qp_max[i] = kMaxQP; + s_QpIp.pu4_b_qp_min[i] = kMinQP; + } + + s_QpIp.s_ive_ip.u4_size = sizeof(isvce_ctl_set_qp_ip_t); + s_QpOp.s_ive_op.u4_size = sizeof(isvce_ctl_set_qp_op_t); + + isvce_api_function(mCodecCtx, &s_QpIp, &s_QpOp, &s_api_cmds); +} + +void Codec::setEncMode(IVE_ENC_MODE_T eEncMode) +{ + isvce_ctl_set_enc_mode_ip_t sEncModeIp{}; + isvce_ctl_set_enc_mode_op_t sEncModeOp{}; + + isvce_api_cmds_t s_api_cmds{ISVCE_CMD_VIDEO_CTL, ISVCE_CMD_CTL_SET_ENC_MODE}; + + sEncModeIp.s_ive_ip.e_enc_mode = eEncMode; + + sEncModeIp.s_ive_ip.u4_timestamp_high = 0; + sEncModeIp.s_ive_ip.u4_timestamp_low = 0; + + sEncModeIp.s_ive_ip.u4_size = sizeof(isvce_ctl_set_enc_mode_ip_t); + sEncModeOp.s_ive_op.u4_size = sizeof(isvce_ctl_set_enc_mode_op_t); + + isvce_api_function(mCodecCtx, &sEncModeIp, &sEncModeOp, &s_api_cmds); +} + +void Codec::setVbvParams() +{ + isvce_ctl_set_vbv_params_ip_t sVbvIp{}; + isvce_ctl_set_vbv_params_op_t sVbvOp{}; + + isvce_api_cmds_t s_api_cmds{ISVCE_CMD_VIDEO_CTL, ISVCE_CMD_CTL_SET_VBV_PARAMS}; + std::vector sBufferDelays(mNumSpatialLayers, 1000); + + sVbvIp.pu4_vbv_buffer_delay = sBufferDelays.data(); + + sVbvIp.s_ive_ip.u4_timestamp_high = 0; + sVbvIp.s_ive_ip.u4_timestamp_low = 0; + + sVbvIp.s_ive_ip.u4_size = sizeof(isvce_ctl_set_vbv_params_ip_t); + sVbvOp.s_ive_op.u4_size = sizeof(isvce_ctl_set_vbv_params_op_t); + + isvce_api_function(mCodecCtx, &sVbvIp, &sVbvOp, &s_api_cmds); +} + +void Codec::setAirParams() +{ + isvce_ctl_set_air_params_ip_t sAirIp{}; + isvce_ctl_set_air_params_op_t sAirOp{}; + + isvce_api_cmds_t s_api_cmds{ISVCE_CMD_VIDEO_CTL, ISVCE_CMD_CTL_SET_AIR_PARAMS}; + + sAirIp.s_ive_ip.e_air_mode = mAirMode; + sAirIp.s_ive_ip.u4_air_refresh_period = mIntraRefresh; + + sAirIp.s_ive_ip.u4_timestamp_high = 0; + sAirIp.s_ive_ip.u4_timestamp_low = 0; + + sAirIp.s_ive_ip.u4_size = sizeof(isvce_ctl_set_air_params_ip_t); + sAirOp.s_ive_op.u4_size = sizeof(isvce_ctl_set_air_params_op_t); + + isvce_api_function(mCodecCtx, &sAirIp, &sAirOp, &s_api_cmds); +} + +void Codec::setMeParams() +{ + isvce_ctl_set_me_params_ip_t sMeParamsIp{}; + isvce_ctl_set_me_params_op_t sMeParamsOp{}; + + isvce_api_cmds_t s_api_cmds{ISVCE_CMD_VIDEO_CTL, ISVCE_CMD_CTL_SET_ME_PARAMS}; + + sMeParamsIp.s_ive_ip.u4_enable_fast_sad = mEnableFastSad; + sMeParamsIp.s_ive_ip.u4_enable_alt_ref = mEnableAltRef; + + sMeParamsIp.s_ive_ip.u4_enable_hpel = mHalfPelEnable; + sMeParamsIp.s_ive_ip.u4_enable_qpel = mQPelEnable; + sMeParamsIp.s_ive_ip.u4_me_speed_preset = mMeSpeedPreset; + sMeParamsIp.s_ive_ip.u4_srch_rng_x = mSearchRangeX; + sMeParamsIp.s_ive_ip.u4_srch_rng_y = mSearchRangeY; + + sMeParamsIp.s_ive_ip.u4_timestamp_high = 0; + sMeParamsIp.s_ive_ip.u4_timestamp_low = 0; + + sMeParamsIp.s_ive_ip.u4_size = sizeof(isvce_ctl_set_me_params_ip_t); + sMeParamsOp.s_ive_op.u4_size = sizeof(isvce_ctl_set_me_params_op_t); + + isvce_api_function(mCodecCtx, &sMeParamsIp, &sMeParamsOp, &s_api_cmds); +} + +void Codec::setGopParams() +{ + isvce_ctl_set_gop_params_ip_t sGopParamsIp{}; + isvce_ctl_set_gop_params_op_t sGopParamsOp{}; + + isvce_api_cmds_t s_api_cmds{ISVCE_CMD_VIDEO_CTL, ISVCE_CMD_CTL_SET_GOP_PARAMS}; + + sGopParamsIp.s_ive_ip.u4_i_frm_interval = mIInterval; + sGopParamsIp.s_ive_ip.u4_idr_frm_interval = mIDRInterval; + + sGopParamsIp.s_ive_ip.u4_timestamp_high = 0; + sGopParamsIp.s_ive_ip.u4_timestamp_low = 0; + + sGopParamsIp.s_ive_ip.u4_size = sizeof(isvce_ctl_set_gop_params_ip_t); + sGopParamsOp.s_ive_op.u4_size = sizeof(isvce_ctl_set_gop_params_op_t); + + isvce_api_function(mCodecCtx, &sGopParamsIp, &sGopParamsOp, &s_api_cmds); +} + +void Codec::setProfileParams() +{ + isvce_ctl_set_profile_params_ip_t sProfileParamsIp{}; + isvce_ctl_set_profile_params_op_t sProfileParamsOp{}; + + isvce_api_cmds_t s_api_cmds{ISVCE_CMD_VIDEO_CTL, ISVCE_CMD_CTL_SET_PROFILE_PARAMS}; + + sProfileParamsIp.s_ive_ip.e_profile = mProfile; + if(sProfileParamsIp.s_ive_ip.e_profile == IV_PROFILE_BASE) + { + sProfileParamsIp.s_ive_ip.u4_entropy_coding_mode = 0; + } + else + { + sProfileParamsIp.s_ive_ip.u4_entropy_coding_mode = 1; + } + + sProfileParamsIp.s_ive_ip.u4_timestamp_high = 0; + sProfileParamsIp.s_ive_ip.u4_timestamp_low = 0; + + sProfileParamsIp.s_ive_ip.u4_size = sizeof(isvce_ctl_set_profile_params_ip_t); + sProfileParamsOp.s_ive_op.u4_size = sizeof(isvce_ctl_set_profile_params_op_t); + + isvce_api_function(mCodecCtx, &sProfileParamsIp, &sProfileParamsOp, &s_api_cmds); +} + +void Codec::setDeblockParams() +{ + isvce_ctl_set_deblock_params_ip_t sDeblockParamsIp{}; + isvce_ctl_set_deblock_params_op_t sDeblockParamsOp{}; + + isvce_api_cmds_t s_api_cmds{ISVCE_CMD_VIDEO_CTL, ISVCE_CMD_CTL_SET_DEBLOCK_PARAMS}; + + sDeblockParamsIp.s_ive_ip.u4_disable_deblock_level = mDisableDeblockLevel; + + sDeblockParamsIp.s_ive_ip.u4_timestamp_high = 0; + sDeblockParamsIp.s_ive_ip.u4_timestamp_low = 0; + + sDeblockParamsIp.s_ive_ip.u4_size = sizeof(isvce_ctl_set_deblock_params_ip_t); + sDeblockParamsOp.s_ive_op.u4_size = sizeof(isvce_ctl_set_deblock_params_op_t); + + isvce_api_function(mCodecCtx, &sDeblockParamsIp, &sDeblockParamsOp, &s_api_cmds); +} + +void Codec::setVuiParams() +{ + isvce_vui_ip_t sVuiParamsIp{}; + isvce_vui_op_t sVuiParamsOp{}; + + isvce_api_cmds_t s_api_cmds{ISVCE_CMD_VIDEO_CTL, ISVCE_CMD_CTL_SET_VUI_PARAMS}; + + sVuiParamsIp.u1_aspect_ratio_info_present_flag = mAspectRatioFlag; + sVuiParamsIp.u1_aspect_ratio_idc = 0; + sVuiParamsIp.u2_sar_width = 0; + sVuiParamsIp.u2_sar_height = 0; + sVuiParamsIp.u1_overscan_info_present_flag = 0; + sVuiParamsIp.u1_overscan_appropriate_flag = 0; + sVuiParamsIp.u1_video_signal_type_present_flag = 1; + sVuiParamsIp.u1_video_format = 0; + sVuiParamsIp.u1_video_full_range_flag = 0; + sVuiParamsIp.u1_colour_description_present_flag = 0; + sVuiParamsIp.u1_colour_primaries = 0; + sVuiParamsIp.u1_transfer_characteristics = 0; + sVuiParamsIp.u1_matrix_coefficients = 0; + sVuiParamsIp.u1_chroma_loc_info_present_flag = 0; + sVuiParamsIp.u1_chroma_sample_loc_type_top_field = 0; + sVuiParamsIp.u1_chroma_sample_loc_type_bottom_field = 0; + sVuiParamsIp.u1_vui_timing_info_present_flag = 0; + sVuiParamsIp.u4_vui_num_units_in_tick = 0; + sVuiParamsIp.u4_vui_time_scale = 0; + sVuiParamsIp.u1_fixed_frame_rate_flag = 0; + sVuiParamsIp.u1_nal_hrd_parameters_present_flag = mNalHrdFlag; + sVuiParamsIp.u1_vcl_hrd_parameters_present_flag = mVclHrdFlag; + sVuiParamsIp.u1_low_delay_hrd_flag = 0; + sVuiParamsIp.u1_pic_struct_present_flag = 0; + sVuiParamsIp.u1_bitstream_restriction_flag = 0; + sVuiParamsIp.u1_motion_vectors_over_pic_boundaries_flag = 0; + sVuiParamsIp.u1_max_bytes_per_pic_denom = 0; + sVuiParamsIp.u1_max_bits_per_mb_denom = 0; + sVuiParamsIp.u1_log2_max_mv_length_horizontal = 0; + sVuiParamsIp.u1_log2_max_mv_length_vertical = 0; + sVuiParamsIp.u1_num_reorder_frames = 0; + sVuiParamsIp.u1_max_dec_frame_buffering = 0; + + sVuiParamsIp.u4_size = sizeof(isvce_vui_ip_t); + sVuiParamsOp.u4_size = sizeof(isvce_vui_op_t); + + isvce_api_function(mCodecCtx, &sVuiParamsIp, &sVuiParamsOp, &s_api_cmds); +} + +void Codec::setSeiMdcvParams() +{ + isvce_ctl_set_sei_mdcv_params_ip_t sSeiMdcvParamsIp{}; + isvce_ctl_set_sei_mdcv_params_op_t sSeiMdcvParamsOp{}; + + isvce_api_cmds_t s_api_cmds{ISVCE_CMD_VIDEO_CTL, ISVCE_CMD_CTL_SET_SEI_MDCV_PARAMS}; + + sSeiMdcvParamsIp.u1_sei_mdcv_params_present_flag = mSeiMdcvFlag; + if(mSeiMdcvFlag) + { + for(int i4_count = 0; i4_count < kNumSeiMdcvPrimaries; ++i4_count) + { + sSeiMdcvParamsIp.au2_display_primaries_x[i4_count] = 30000; + sSeiMdcvParamsIp.au2_display_primaries_y[i4_count] = 35000; + } + sSeiMdcvParamsIp.u2_white_point_x = 30000; + sSeiMdcvParamsIp.u2_white_point_y = 35000; + sSeiMdcvParamsIp.u4_max_display_mastering_luminance = 100000000; + sSeiMdcvParamsIp.u4_min_display_mastering_luminance = 50000; + } + + sSeiMdcvParamsIp.u4_timestamp_high = 0; + sSeiMdcvParamsIp.u4_timestamp_low = 0; + + sSeiMdcvParamsIp.u4_size = sizeof(isvce_ctl_set_sei_mdcv_params_ip_t); + sSeiMdcvParamsOp.u4_size = sizeof(isvce_ctl_set_sei_mdcv_params_op_t); + + isvce_api_function(mCodecCtx, &sSeiMdcvParamsIp, &sSeiMdcvParamsOp, &s_api_cmds); +} + +void Codec::setSeiCllParams() +{ + isvce_ctl_set_sei_cll_params_ip_t sSeiCllParamsIp{}; + isvce_ctl_set_sei_cll_params_op_t sSeiCllParamsOp{}; + + isvce_api_cmds_t s_api_cmds{ISVCE_CMD_VIDEO_CTL, ISVCE_CMD_CTL_SET_SEI_CLL_PARAMS}; + + sSeiCllParamsIp.u1_sei_cll_params_present_flag = mSeiCllFlag; + + if(mSeiCllFlag) + { + sSeiCllParamsIp.u2_max_content_light_level = 0; + sSeiCllParamsIp.u2_max_pic_average_light_level = 0; + } + + sSeiCllParamsIp.u4_timestamp_high = 0; + sSeiCllParamsIp.u4_timestamp_low = 0; + + sSeiCllParamsIp.u4_size = sizeof(isvce_ctl_set_sei_cll_params_ip_t); + sSeiCllParamsOp.u4_size = sizeof(isvce_ctl_set_sei_cll_params_op_t); + + isvce_api_function(mCodecCtx, &sSeiCllParamsIp, &sSeiCllParamsOp, &s_api_cmds); +} + +void Codec::setSeiAveParams() +{ + isvce_ctl_set_sei_ave_params_ip_t sSeiAveParamsIp{}; + isvce_ctl_set_sei_ave_params_op_t sSeiAveParamsOp{}; + + isvce_api_cmds_t s_api_cmds{ISVCE_CMD_VIDEO_CTL, ISVCE_CMD_CTL_SET_SEI_AVE_PARAMS}; + + sSeiAveParamsIp.u1_sei_ave_params_present_flag = mSeiAveFlag; + + if(mSeiAveFlag) + { + sSeiAveParamsIp.u4_ambient_illuminance = 1; + sSeiAveParamsIp.u2_ambient_light_x = 0; + sSeiAveParamsIp.u2_ambient_light_y = 0; + } + + sSeiAveParamsIp.u4_timestamp_high = 0; + sSeiAveParamsIp.u4_timestamp_low = 0; + + sSeiAveParamsIp.u4_size = sizeof(isvce_ctl_set_sei_ave_params_ip_t); + sSeiAveParamsOp.u4_size = sizeof(isvce_ctl_set_sei_ave_params_op_t); + + isvce_api_function(mCodecCtx, &sSeiAveParamsIp, &sSeiAveParamsOp, &s_api_cmds); +} + +void Codec::setSeiCcvParams() +{ + isvce_ctl_set_sei_ccv_params_ip_t sSeiCcvParamsIp{}; + isvce_ctl_set_sei_ccv_params_op_t sSeiCcvParamsOp{}; + + isvce_api_cmds_t s_api_cmds{ISVCE_CMD_VIDEO_CTL, ISVCE_CMD_CTL_SET_SEI_CCV_PARAMS}; + + sSeiCcvParamsIp.u1_sei_ccv_params_present_flag = mSeiCcvFlag; + + if(mSeiCcvFlag) + { + sSeiCcvParamsIp.u1_ccv_cancel_flag = 0; + sSeiCcvParamsIp.u1_ccv_persistence_flag = 1; + sSeiCcvParamsIp.u1_ccv_primaries_present_flag = 1; + sSeiCcvParamsIp.u1_ccv_min_luminance_value_present_flag = 1; + sSeiCcvParamsIp.u1_ccv_max_luminance_value_present_flag = 1; + sSeiCcvParamsIp.u1_ccv_avg_luminance_value_present_flag = 1; + sSeiCcvParamsIp.u1_ccv_reserved_zero_2bits = 0; + for(int i4_count = 0; i4_count < kNumSeiCcvPrimaries; ++i4_count) + { + sSeiCcvParamsIp.ai4_ccv_primaries_x[i4_count] = 1; + sSeiCcvParamsIp.ai4_ccv_primaries_y[i4_count] = 1; + } + sSeiCcvParamsIp.u4_ccv_min_luminance_value = 1; + sSeiCcvParamsIp.u4_ccv_max_luminance_value = 1; + sSeiCcvParamsIp.u4_ccv_avg_luminance_value = 1; + } + + sSeiCcvParamsIp.u4_timestamp_high = 0; + sSeiCcvParamsIp.u4_timestamp_low = 0; + + sSeiCcvParamsIp.u4_size = sizeof(isvce_ctl_set_sei_ccv_params_ip_t); + sSeiCcvParamsOp.u4_size = sizeof(isvce_ctl_set_sei_ccv_params_op_t); + + isvce_api_function(mCodecCtx, &sSeiCcvParamsIp, &sSeiCcvParamsOp, &s_api_cmds); +} + +void Codec::logVersion() +{ + isvce_ctl_getversioninfo_ip_t s_ctl_set_getversioninfo_ip{}; + isvce_ctl_getversioninfo_op_t s_ctl_set_getversioninfo_op{}; + + CHAR ac_version_string[512]; + + isvce_api_cmds_t s_api_cmds{ISVCE_CMD_VIDEO_CTL, ISVCE_CMD_CTL_GETVERSION}; + + s_ctl_set_getversioninfo_ip.s_ive_ip.pu1_version = (UWORD8 *) ac_version_string; + s_ctl_set_getversioninfo_ip.s_ive_ip.u4_version_bufsize = sizeof(ac_version_string); + s_ctl_set_getversioninfo_ip.s_ive_ip.u4_size = sizeof(isvce_ctl_getversioninfo_ip_t); + s_ctl_set_getversioninfo_op.s_ive_op.u4_size = sizeof(isvce_ctl_getversioninfo_op_t); + + isvce_api_function(mCodecCtx, (void *) &s_ctl_set_getversioninfo_ip, + (void *) &s_ctl_set_getversioninfo_op, &s_api_cmds); +} + +bool Codec::encodeFrames(const UWORD8 *data, size_t size) +{ + isvce_video_encode_ip_t sEncodeIp{}; + isvce_video_encode_op_t sEncodeOp{}; + + isvce_api_cmds_t s_api_cmds{ISVCE_CMD_VIDEO_ENCODE, ISVCE_CMD_CT_NA}; + iv_raw_buf_t *psInpRawBuf = &sEncodeIp.s_ive_ip.s_inp_buf; + iv_raw_buf_t *psRecRawBuf = &sEncodeIp.s_ive_ip.s_recon_buf; + + size_t frameSize = mInputDims.getFrameSize(); + auto bytesLeft = std::min(size, frameSize); + auto bytesConsumed = 0; + UWORD32 numFrames = 0; + + sEncodeIp.s_ive_ip.s_out_buf.pv_buf = mEncBufs.mOutputBuf.data(); + sEncodeIp.s_ive_ip.s_out_buf.u4_bytes = 0; + sEncodeIp.s_ive_ip.s_out_buf.u4_bufsize = static_cast(mEncBufs.mOutputBuf.size()); + sEncodeOp.s_ive_op.s_out_buf.pv_buf = nullptr; + sEncodeIp.s_ive_ip.pv_bufs = nullptr; + sEncodeIp.s_ive_ip.pv_mb_info = nullptr; + sEncodeIp.s_ive_ip.pv_pic_info = nullptr; + sEncodeIp.s_ive_ip.u4_mb_info_type = 0; + sEncodeIp.s_ive_ip.u4_pic_info_type = 0; + sEncodeIp.s_ive_ip.u4_is_last = 0; + + sEncodeIp.s_ive_ip.u4_timestamp_high = 0; + sEncodeIp.s_ive_ip.u4_timestamp_low = 0; + + memset(psInpRawBuf, 0, sizeof(iv_raw_buf_t)); + psInpRawBuf->u4_size = sizeof(iv_raw_buf_t); + psInpRawBuf->e_color_fmt = mIvVideoColorFormat; + + sEncodeIp.s_ive_ip.u4_size = sizeof(isvce_video_encode_ip_t); + sEncodeOp.s_ive_op.u4_size = sizeof(isvce_video_encode_op_t); + + isvce_api_function(mCodecCtx, &sEncodeIp, &sEncodeOp, &s_api_cmds); + + if(mEnableNaluInfoExport) + { + sEncodeIp.ps_nalu_info_buf = mEncBufs.mNaluInfoStructBuf.data(); + sEncodeOp.ps_nalu_info_buf = mEncBufs.mNaluInfoStructBuf.data() + mNumSpatialLayers; + } + + while(!sEncodeOp.s_ive_op.u4_is_last && (kMaxEncodeCalls > (mNumSpatialLayers * numFrames))) + { + if(mEnableRecon) + { + setEncParams(psRecRawBuf, mEncBufs.mReconBuf, mInputDims); + } + + if(mEnableNaluInfoExport) + { + for(auto i = 0; i < mNumSpatialLayers; i++) + { + sEncodeIp.ps_nalu_info_buf[i].pu1_buf = mEncBufs.mNaluInfoDataBuf[i].data(); + sEncodeIp.ps_nalu_info_buf[i].u4_num_bytes = 0; + sEncodeIp.ps_nalu_info_buf[i].u4_buf_size = + static_cast(mEncBufs.mNaluInfoDataBuf[i].size()); + } + } + + if(size > 0) + { + bytesLeft = std::min(size, frameSize); + std::copy(data, data + bytesLeft, mEncBufs.mInputBuf.begin()); + std::fill(std::next(mEncBufs.mInputBuf.begin(), bytesLeft), mEncBufs.mInputBuf.end(), + data[0]); + setEncParams(psInpRawBuf, mEncBufs.mInputBuf, mInputDims, mIvVideoColorFormat); + + bytesConsumed = bytesLeft; + } + else + { + sEncodeIp.s_ive_ip.u4_is_last = 1; + + for(auto i = 0; i < 3; i++) + { + psInpRawBuf->apv_bufs[i] = nullptr; + } + + bytesConsumed = 0; + } + + if(mIsForceIdrEnabled && !sEncodeIp.s_ive_ip.u4_is_last) + { + if(numFrames == mForceIdrInterval) + { + setFrameType(IV_IDR_FRAME); + } + } + + if(mIsDynamicBitRateChangeEnabled && !sEncodeIp.s_ive_ip.u4_is_last) + { + if(numFrames == mDynamicBitRateInterval) + { + if(data[0] & 0x01) + { + mBitrate *= 2; + } + else + { + mBitrate /= 2; + } + + setBitRate(); + } + } + + if(mIsDynamicFrameRateChangeEnabled && !sEncodeIp.s_ive_ip.u4_is_last) + { + if(numFrames == mDynamicFrameRateInterval) + { + if(size > 1 && data[1] & 0x01) + { + mFrameRate *= 2; + } + else + { + mFrameRate /= 2; + } + + setFrameRate(); + } + } + + isvce_api_function(mCodecCtx, &sEncodeIp, &sEncodeOp, &s_api_cmds); + + if(!sEncodeOp.s_ive_op.u4_is_last) + { + numFrames++; + data += bytesConsumed; + size -= bytesConsumed; + } + } + + return true; +} + +void Codec::setEncParams(iv_raw_buf_t *psInpRawBuf, std::vector &buf, const FrameDims &dims, + IV_COLOR_FORMAT_T colorFormat) +{ + switch(colorFormat) + { + case IV_YUV_420SP_UV: + case IV_YUV_420SP_VU: + { + WORD32 yStride = dims.mWidth; + WORD32 uStride = dims.mWidth / 2; + + psInpRawBuf->apv_bufs[0] = buf.data(); + psInpRawBuf->apv_bufs[1] = buf.data() + dims.mWidth * dims.mHeight; + + psInpRawBuf->au4_wd[0] = dims.mWidth; + psInpRawBuf->au4_wd[1] = dims.mWidth; + + psInpRawBuf->au4_ht[0] = dims.mHeight; + psInpRawBuf->au4_ht[1] = dims.mHeight / 2; + + psInpRawBuf->au4_strd[0] = yStride; + psInpRawBuf->au4_strd[1] = uStride; + + break; + } + default: + { + WORD32 yStride = dims.mWidth; + WORD32 uStride = dims.mWidth / 2; + WORD32 vStride = dims.mWidth / 2; + + psInpRawBuf->apv_bufs[0] = buf.data(); + psInpRawBuf->apv_bufs[1] = buf.data() + dims.mWidth * dims.mHeight; + psInpRawBuf->apv_bufs[2] = buf.data() + (dims.mWidth * dims.mHeight * 5) / 4; + + psInpRawBuf->au4_wd[0] = dims.mWidth; + psInpRawBuf->au4_wd[1] = dims.mWidth / 2; + psInpRawBuf->au4_wd[2] = dims.mWidth / 2; + + psInpRawBuf->au4_ht[0] = dims.mHeight; + psInpRawBuf->au4_ht[1] = dims.mHeight / 2; + psInpRawBuf->au4_ht[2] = dims.mHeight / 2; + + psInpRawBuf->au4_strd[0] = yStride; + psInpRawBuf->au4_strd[1] = uStride; + psInpRawBuf->au4_strd[2] = vStride; + + break; + } + } +} + +extern "C" int LLVMFuzzerTestOneInput(const UWORD8 *data, size_t size) +{ + if(size < IDX_LAST) + { + return 0; + } + + std::unique_ptr codec = std::make_unique(); + + if(codec->initEncoder(data)) + { + codec->encodeFrames(data, size); + } + + return 0; +} diff --git a/libavc_blocklist.txt b/libavc_blocklist.txt index 30a2cef..1da77c6 100644 --- a/libavc_blocklist.txt +++ b/libavc_blocklist.txt @@ -27,6 +27,7 @@ fun:irc_initialise_rate_control # Numerous overflows in multiple functions, CAVLC is a compression technique. src:*/decoder/ih264d_parse_cavlc.c src:*/encoder/ih264e_cavlc.c +src:*/encoder/svc/isvce_cavlc.c # Performance related fun:ih264e_pack_c_mb diff --git a/test/Android.bp b/test/Android.bp index 61a9e26..304fba5 100644 --- a/test/Android.bp +++ b/test/Android.bp @@ -27,6 +27,25 @@ cc_defaults { }, } +cc_defaults { + name: "libavc_enc_app_defaults", + gtest: false, + host_supported:true, + cflags: [ + "-DPROFILE_ENABLE", + "-DARM", + "-DMD5_DISABLE", + "-fPIC", + "-Wall", + "-Werror", + "-Wno-unused-variable", + ], + local_include_dirs: [ + "encoder/", + ], + static_libs: ["libavcenc"], +} + cc_test { name: "avcdec", defaults: ["libavc_mvc_app_defaults"], @@ -51,21 +70,8 @@ cc_test { cc_test { name: "avcenc", - gtest: false, - host_supported: true, + defaults: ["libavc_enc_app_defaults"], - cflags: [ - "-DPROFILE_ENABLE", - "-DARM", - "-DMD5_DISABLE", - "-fPIC", - "-Wall", - "-Werror", - "-Wno-unused-variable", - ], - local_include_dirs: [ - "encoder/", - ], srcs: [ "encoder/main.c", "encoder/psnr.c", @@ -73,5 +79,25 @@ cc_test { "encoder/output.c", "encoder/recon.c", ], - static_libs: ["libavcenc"], +} + +cc_test { + name: "svcenc", + defaults: ["libavc_enc_app_defaults"], + + local_include_dirs: [ + "svcenc/", + ], + + srcs: [ + "svcenc/main.c", + "svcenc/input.c", + "svcenc/output.c", + "svcenc/psnr.c", + "svcenc/recon.c", + ], + + static_libs: [ + "libsvcenc", + ], } diff --git a/test/svcenc/app.h b/test/svcenc/app.h new file mode 100644 index 0000000..682557c --- /dev/null +++ b/test/svcenc/app.h @@ -0,0 +1,417 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ +/*****************************************************************************/ +/* */ +/* File Name : app.h */ +/* */ +/* Description : This file contains all the necessary structure and */ +/* enumeration definitions needed for the Application */ +/* */ +/* List of Functions : */ +/* */ +/* Issues / Problems : None */ +/* */ +/* Revision History : */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 26 08 2010 Ittiam Draft */ +/* */ +/*****************************************************************************/ + +#ifndef _SVCE_APP_H_ +#define _SVCE_APP_H_ + +#include +#include + +#include "iv2.h" +#include "ive2.h" + +/*****************************************************************************/ +/* Function Macros */ +/*****************************************************************************/ +#define MAX(a, b) ((a) > (b)) ? (a) : (b) +#define MIN(a, b) ((a) < (b)) ? (a) : (b) + +#define ALIGN16(x) ((((x) + 15) >> 4) << 4) + +/*****************************************************************************/ +/* Constant Macros */ +/*****************************************************************************/ + +#define DEFAULT_NUM_INPUT_BUFS 32 +#define DEFAULT_MAX_INPUT_BUFS 32 + +#define DEFAULT_NUM_OUTPUT_BUFS 32 +#define DEFAULT_MAX_OUTPUT_BUFS 32 + +#define DEFAULT_NUM_RECON_BUFS 32 +#define DEFAULT_MAX_RECON_BUFS DEFAULT_NUM_RECON_BUFS + +#define DEFAULT_NUM_NALU_INFO_BUFS 32 +#define DEFAULT_MAX_NALU_INFO_BUFS 32 + +#define LEN_STATUS_BUFFER (10 * 1024) +#define MAX_VBV_BUFF_SIZE (120 * 16384) +#define MAX_NUM_IO_BUFS 3 + +#define DEFAULT_MAX_REF_FRM 2 +#define DEFAULT_MAX_REORDER_FRM 0 +#define DEFAULT_QP_MIN 4 +#define DEFAULT_QP_MAX 51 +#define DEFAULT_MAX_BITRATE 240000000 +#define DEFAULT_NUM_BFRAMES 0 +#define DEFAULT_MAX_SRCH_RANGE_X 256 +#define DEFAULT_MAX_SRCH_RANGE_Y 256 +#define DEFAULT_MAX_FRAMERATE 120000 +#define DEFAULT_NUM_CORES 1 +#define DEFAULT_NUM_CORES_PRE_ENC 0 +#define DEFAULT_FPS 30 +#define DEFAULT_ENC_SPEED 100 +#define DEFAULT_MEM_REC_CNT 0 +#define DEFAULT_RECON_ENABLE 0 +#define DEFAULT_NALU_INFO_EXPORT_ENABLE 0 +#define DEFAULT_CHKSUM_ENABLE 0 +#define DEFAULT_START_FRM 0 +#define DEFAULT_NUM_FRMS 100 +#define DEFAULT_INP_COLOR_FMT IV_YUV_420P +#define DEFAULT_RECON_COLOR_FMT IV_YUV_420P +#define DEFAULT_LOOPBACK 0 +#define DEFAULT_SRC_FRAME_RATE 50 +#define DEFAULT_TGT_FRAME_RATE 50 +#define DEFAULT_MAX_WD 1920 +#define DEFAULT_MAX_HT 1920 +#define DEFAULT_MAX_LEVEL 51 +#define DEFAULT_STRIDE 0 +#define DEFAULT_WD 1920 +#define DEFAULT_HT 1080 +#define DEFAULT_PSNR_ENABLE 0 +#define DEFAULT_ME_SPEED 100 +#define DEFAULT_ENABLE_FAST_SAD 0 +#define DEFAULT_ENABLE_ALT_REF 0 +#define DEFAULT_RC 0 +#define DEFAULT_BITRATE 6000000 +#define DEFAULT_I_QP 25 +#define DEFAULT_I_QP_MAX DEFAULT_QP_MAX +#define DEFAULT_I_QP_MIN 0 +#define DEFAULT_P_QP 28 +#define DEFAULT_P_QP_MAX DEFAULT_QP_MAX +#define DEFAULT_P_QP_MIN 0 +#define DEFAULT_B_QP 28 +#define DEFAULT_B_QP_MAX DEFAULT_QP_MAX +#define DEFAULT_B_QP_MIN 0 +#define DEFAULT_AIR 0 +#define DEFAULT_AIR_REFRESH_PERIOD 30 +#define DEFAULT_SRCH_RNG_X 64 +#define DEFAULT_SRCH_RNG_Y 48 +#define DEFAULT_I_INTERVAL 50 +#define DEFAULT_IDR_INTERVAL 100 +#define DEFAULT_B_FRAMES 0 +#define DEFAULT_DISABLE_DEBLK_LEVEL 4 +#define DEFAULT_HPEL 1 +#define DEFAULT_QPEL 1 +#define DEFAULT_I4 1 +#define DEFAULT_EPROFILE IV_PROFILE_BASE +#define DEFAULT_SLICE_MODE 0 +#define DEFAULT_SLICE_PARAM 256 +#define DEFAULT_ENTROPY_CODING_MODE 1 +#define DEFAULT_NUM_TEMPORAL_LAYERS 1 +#define DEFAULT_NUM_SPATIAL_LAYERS 1 +#define DEFAULT_SPATIAL_RES_RATIO 2.0 + +#define DEFAULT_MAX_DISPLAY_MASTERING_LUMINANCE 50000 +#define DEFAULT_MIN_DISPLAY_MASTERING_LUMINANCE 1 + +#define STRLENGTH 500 + +/* specifies the number of colour primary components of the mastering + display */ +#define NUM_SEI_MDCV_PRIMARIES 3 + +/* specifies the number of colour primary components of the nominal + content colour volume */ +#define NUM_SEI_CCV_PRIMARIES 3 + +/*****************************************************************************/ +/* profile Macros */ +/*****************************************************************************/ +#ifdef PROFILE_ENABLE +#ifdef WINDOWS +typedef LARGE_INTEGER TIMER; +#else +typedef struct timeval TIMER; +#endif +#else +typedef int32_t TIMER; +#endif + +#ifdef PROFILE_ENABLE +#ifdef WINDOWS +#define GETTIME(timer) QueryPerformanceCounter(timer); +#else +#define GETTIME(timer) gettimeofday(timer, NULL); +#endif + +#ifdef WINDOWS +#define ELAPSEDTIME(s_start_timer, s_end_timer, s_elapsed_time, frequency) \ + { \ + TIMER s_temp_time; \ + s_temp_time.LowPart = s_end_timer.LowPart - s_start_timer.LowPart; \ + s_elapsed_time = \ + (UWORD32) (((DOUBLE) s_temp_time.LowPart / (DOUBLE) frequency.LowPart) * 1000000); \ + } +#else +#define ELAPSEDTIME(s_start_timer, s_end_timer, s_elapsed_time, frequency) \ + s_elapsed_time = ((s_end_timer.tv_sec - s_start_timer.tv_sec) * 1000000) + \ + (s_end_timer.tv_usec - s_start_timer.tv_usec); +#endif +#else +#define GETTIME(timer) +#define ELAPSEDTIME(s_start_timer, s_end_timer, s_elapsed_time, frequency) +#endif + +/*****************************************************************************/ +/* Structure definitions */ +/*****************************************************************************/ +typedef struct +{ + UWORD8 *pu1_buf; + UWORD32 u4_buf_size; + UWORD32 u4_timestamp_low; + UWORD32 u4_timestamp_high; + UWORD32 u4_is_free; + void *pv_mb_info; + void *pv_pic_info; +} input_buf_t; + +typedef struct +{ + UWORD8 *pu1_buf; + UWORD32 u4_buf_size; + UWORD32 u4_timestamp_low; + UWORD32 u4_timestamp_high; + UWORD32 u4_is_free; +} output_buf_t; + +typedef struct +{ + UWORD8 *pu1_buf; + UWORD32 u4_buf_size; + UWORD32 u4_timestamp_low; + UWORD32 u4_timestamp_high; + UWORD32 u4_is_free; +} recon_buf_t; + +typedef struct nalu_info_buf_t +{ + UWORD8 *pu1_buf; + + UWORD32 u4_buf_size; + + bool b_is_free; +} nalu_info_buf_t; + +typedef struct +{ + iv_obj_t *ps_enc; + iv_mem_rec_t *ps_mem_rec; + UWORD32 u4_num_mem_rec; + UWORD32 u4_recon_enable; + UWORD32 u4_chksum_enable; + UWORD32 u4_nalu_info_export_enable; + UWORD32 u4_mb_info_type; + UWORD32 u4_pic_info_type; + UWORD32 u4_mb_info_size; + UWORD32 u4_pic_info_size; + UWORD32 u4_start_frm; + UWORD32 u4_max_num_frms; + UWORD32 u4_total_bytes; + UWORD32 u4_pics_cnt; + IV_COLOR_FORMAT_T e_inp_color_fmt; + IV_COLOR_FORMAT_T e_recon_color_fmt; + IV_ARCH_T e_arch; + IV_SOC_T e_soc; + + WORD32 header_generated; + void *pv_codec_obj; + + UWORD32 u4_num_cores; + UWORD32 u4_pre_enc_me; + UWORD32 u4_pre_enc_ipe; + CHAR ac_ip_fname[STRLENGTH]; + CHAR ac_op_fname[STRLENGTH]; + CHAR ac_recon_fname[STRLENGTH]; + CHAR ac_nalu_info_csv_fname[STRLENGTH]; + CHAR ac_chksum_fname[STRLENGTH]; + CHAR ac_mb_info_fname[STRLENGTH]; + CHAR ac_pic_info_fname[STRLENGTH]; + + FILE *fp_ip; + FILE *fp_op; + FILE *fp_recon; + FILE *fp_nalu_info; + FILE *fp_chksum; + FILE *fp_psnr_ip; + FILE *fp_mb_info; + FILE *fp_pic_info; + FILE *fp_dump_op; + + UWORD32 u4_loopback; + UWORD32 u4_max_frame_rate; + UWORD32 u4_src_frame_rate; + UWORD32 u4_tgt_frame_rate; + UWORD32 u4_max_wd; + UWORD32 u4_max_ht; + UWORD32 u4_max_level; + + UWORD32 u4_strd; + + UWORD32 u4_wd; + UWORD32 u4_ht; + + UWORD32 u4_enc_wd; + UWORD32 u4_enc_ht; + + UWORD32 u4_psnr_enable; + + UWORD32 u4_enc_speed; + UWORD32 u4_me_speed; + UWORD32 u4_enable_fast_sad; + UWORD32 u4_enable_alt_ref; + UWORD32 u4_rc; + UWORD32 *pu4_max_bitrate; + UWORD32 *pu4_bitrate; + UWORD32 *pu4_i_qp, *pu4_i_qp_max, *pu4_i_qp_min; + UWORD32 *pu4_p_qp, *pu4_p_qp_max, *pu4_p_qp_min; + UWORD32 *pu4_b_qp, *pu4_b_qp_max, *pu4_b_qp_min; + UWORD32 u4_air; + UWORD32 u4_air_refresh_period; + UWORD32 u4_srch_rng_x; + UWORD32 u4_srch_rng_y; + UWORD32 u4_i_interval; + UWORD32 u4_idr_interval; + UWORD32 u4_b_frames; + UWORD32 u4_num_bframes; + UWORD32 u4_disable_deblock_level; + UWORD32 u4_hpel; + UWORD32 u4_qpel; + UWORD32 u4_enable_intra_4x4; + IV_PROFILE_T e_profile; + + UWORD32 u4_slice_mode; + UWORD32 u4_slice_param; + UWORD32 u4_entropy_coding_mode; + + void *pv_input_thread_handle; + void *pv_output_thread_handle; + void *pv_recon_thread_handle; + + isvce_ctl_getbufinfo_op_t s_get_buf_info_op; + input_buf_t as_input_buf[DEFAULT_MAX_INPUT_BUFS]; + output_buf_t as_output_buf[DEFAULT_MAX_OUTPUT_BUFS]; + recon_buf_t as_recon_buf[DEFAULT_MAX_RECON_BUFS]; + nalu_info_buf_t as_nalu_info_bufs[DEFAULT_MAX_NALU_INFO_BUFS]; + + DOUBLE adbl_psnr[3]; + UWORD32 u4_psnr_cnt; + UWORD8 *pu1_psnr_buf; + UWORD8 u4_psnr_buf_size; + + UWORD32 *pu4_vbv_buffer_delay; + + TIMER enc_start_time; + TIMER enc_last_time; + WORD32 avg_time; + + UWORD32 u4_sei_mdcv_params_present_flag; + UWORD32 au4_display_primaries_x[NUM_SEI_MDCV_PRIMARIES]; + UWORD32 au4_display_primaries_y[NUM_SEI_MDCV_PRIMARIES]; + UWORD32 u4_white_point_x; + UWORD32 u4_white_point_y; + UWORD32 u4_max_display_mastering_luminance; + UWORD32 u4_min_display_mastering_luminance; + + UWORD32 u4_sei_cll_params_present_flag; + UWORD32 u4_max_content_light_level; + UWORD32 u4_max_pic_average_light_level; + + UWORD32 u4_sei_ave_params_present_flag; + UWORD32 u4_ambient_illuminance; + UWORD32 u4_ambient_light_x; + UWORD32 u4_ambient_light_y; + + UWORD32 u4_sei_ccv_params_present_flag; + UWORD32 u4_ccv_cancel_flag; + UWORD32 u4_ccv_persistence_flag; + UWORD32 u4_ccv_primaries_present_flag; + UWORD32 u4_ccv_min_luminance_value_present_flag; + UWORD32 u4_ccv_max_luminance_value_present_flag; + UWORD32 u4_ccv_avg_luminance_value_present_flag; + UWORD32 u4_ccv_reserved_zero_2bits; + WORD32 ai4_ccv_primaries_x[NUM_SEI_CCV_PRIMARIES]; + WORD32 ai4_ccv_primaries_y[NUM_SEI_CCV_PRIMARIES]; + UWORD32 u4_ccv_min_luminance_value; + UWORD32 u4_ccv_max_luminance_value; + UWORD32 u4_ccv_avg_luminance_value; + UWORD32 u4_use_default_vui; + + isvce_ctl_set_sei_mdcv_params_ip_t s_sei_mdcv_params; + isvce_ctl_set_sei_cll_params_ip_t s_sei_cll_params; + isvce_ctl_set_sei_ave_params_ip_t s_sei_ave_params; + + UWORD8 u1_num_temporal_layers; + UWORD8 u1_num_spatial_layers; + DOUBLE d_spatial_res_ratio; + +} app_ctxt_t; + +/*****************************************************************************/ +/* Function Declarations */ +/*****************************************************************************/ +void codec_exit(CHAR *pc_err_message); +void allocate_input(app_ctxt_t *ps_app_ctxt); +void allocate_output(app_ctxt_t *ps_app_ctxt); +void allocate_recon(app_ctxt_t *ps_app_ctxt); + +IV_STATUS_T read_input(FILE *fp, iv_raw_buf_t *ps_raw_buf); +IV_STATUS_T write_recon(FILE *fp, iv_raw_buf_t *ps_raw_buf); +IV_STATUS_T write_output(FILE *fp, UWORD8 *pu1_buf, WORD32 num_bytes); + +IV_STATUS_T read_mb_info(app_ctxt_t *ps_app_ctxt, void *pv_mb_info); +IV_STATUS_T read_pic_info(app_ctxt_t *ps_app_ctxt, void *pv_pic_info); + +void *isvca_aligned_malloc(WORD32 alignment, WORD32 size); +void isvca_aligned_free(void *pv_buf); + +void free_input(app_ctxt_t *ps_app_ctxt); +void free_recon(app_ctxt_t *ps_app_ctxt); +void free_output(app_ctxt_t *ps_app_ctxt); + +void init_raw_buf_descr(app_ctxt_t *ps_app_ctxt, iv_raw_buf_t *ps_raw_buf, UWORD8 *pu1_buf, + IV_COLOR_FORMAT_T e_color_fmt); + +#ifndef MD5_DISABLE +void calc_md5_cksum(UWORD8 *pu1_inbuf, UWORD32 u4_stride, UWORD32 u4_width, UWORD32 u4_height, + UWORD8 *pu1_cksum_p); +#else +#define calc_md5_cksum(a, b, c, d, e) +#endif + +#endif diff --git a/test/svcenc/enc.cfg b/test/svcenc/enc.cfg new file mode 100644 index 0000000..ba62199 --- /dev/null +++ b/test/svcenc/enc.cfg @@ -0,0 +1,47 @@ +--input input_qvga.yuv +--output output.264 +--recon recon.yuv +--chksum chksum.md5 +--chksum_enable 0 +--recon_enable 0 +--input_chroma_format YUV_420P +--recon_chroma_format YUV_420P +--qp_i 24 +--qp_p 27 +--qp_b 29 +--qp_i_min 4 +--qp_i_max 49 +--qp_p_min 4 +--qp_p_max 49 +--qp_b_min 4 +--qp_b_max 49 +--max_wd 1920 +--max_ht 1080 +--psnr 0 +--slice 0 +--slice_param 0 +--num_frames -1 +--search_range_x 16 +--search_range_y 16 +--width 320 +--height 240 +--src_framerate 30 +--tgt_framerate 30 +--num_cores 4 +--rc 2 +--bitrate 256000 +--vbv_delay 1000 +--disable_deblock_level 0 +--intra_4x4_enable 1 +--i_interval 1000 +--me_speed 100 +--hpel 1 +--fast_sad 0 +--speed NORMAL +--max_level 41 +--idr_interval 1000 +--entropy 0 +--bframes 0 +--adaptive_intra_refresh 0 +--air_refresh_period 30 + diff --git a/test/svcenc/input.c b/test/svcenc/input.c new file mode 100644 index 0000000..47ed17a --- /dev/null +++ b/test/svcenc/input.c @@ -0,0 +1,300 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System include files */ +#include +#include +#include +#include + +/* User include files */ +#include "ih264_typedefs.h" +#include "iv2.h" +#include "ive2.h" +#include "isvce.h" +#include "app.h" + +/*****************************************************************************/ +/* Constant Macros */ +/*****************************************************************************/ + +/*****************************************************************************/ +/* Macros */ +/*****************************************************************************/ + +/*****************************************************************************/ +/* Function Definitions */ +/*****************************************************************************/ + +IV_STATUS_T read_pic_info(app_ctxt_t *ps_app_ctxt, void *pv_pic_info) +{ + IV_STATUS_T ret = IV_SUCCESS; + WORD32 size, bytes; + + switch(ps_app_ctxt->u4_pic_info_type) + { + case 1: + size = sizeof(isvce_pic_info1_t); + ps_app_ctxt->u4_pic_info_size = sizeof(isvce_pic_info1_t); + break; + case 2: + size = sizeof(isvce_pic_info2_t); + ps_app_ctxt->u4_pic_info_size = sizeof(isvce_pic_info2_t); + break; + default: + size = 0; + break; + } + + bytes = (WORD32) fread(pv_pic_info, 1, size, ps_app_ctxt->fp_pic_info); + if(bytes != size) ret = IV_FAIL; + + return ret; +} + +IV_STATUS_T read_mb_info(app_ctxt_t *ps_app_ctxt, void *pv_mb_info) +{ + IV_STATUS_T ret = IV_SUCCESS; + WORD32 num_mbs; + WORD32 size; + WORD32 bytes; + + num_mbs = ALIGN16(ps_app_ctxt->u4_wd) * ALIGN16(ps_app_ctxt->u4_ht); + num_mbs /= 256; + + switch(ps_app_ctxt->u4_mb_info_type) + { + case 1: + size = sizeof(isvce_mb_info1_t) * num_mbs; + ps_app_ctxt->u4_mb_info_size = sizeof(isvce_mb_info1_t); + break; + case 2: + size = sizeof(isvce_mb_info2_t) * num_mbs; + ps_app_ctxt->u4_mb_info_size = sizeof(isvce_mb_info2_t); + break; + case 3: + size = sizeof(isvce_mb_info3_t) * num_mbs; + ps_app_ctxt->u4_mb_info_size = sizeof(isvce_mb_info3_t); + break; + case 4: + size = sizeof(isvce_mb_info4_t) * num_mbs; + ps_app_ctxt->u4_mb_info_size = sizeof(isvce_mb_info4_t); + break; + default: + size = 0; + break; + } + + bytes = (WORD32) fread(pv_mb_info, 1, size, ps_app_ctxt->fp_mb_info); + if(bytes != size) ret = IV_FAIL; + + return ret; +} + +IV_STATUS_T read_input(FILE *fp, iv_raw_buf_t *ps_raw_buf) +{ + WORD32 bytes; + WORD32 wd, ht, strd; + UWORD8 *pu1_buf; + WORD32 i; + WORD32 comp; + WORD32 num_comp; + + if(IV_YUV_422ILE == ps_raw_buf->e_color_fmt) + { + wd = ps_raw_buf->au4_wd[0]; + ht = ps_raw_buf->au4_ht[0]; + strd = ps_raw_buf->au4_strd[0]; + pu1_buf = ps_raw_buf->apv_bufs[0]; + + for(i = 0; i < ht; i++) + { + bytes = (WORD32) fread(pu1_buf, sizeof(UWORD8), wd, fp); + if(bytes != wd) + { + return (IV_FAIL); + } + pu1_buf += strd; + } + } + else + { + num_comp = 2; + + if(IV_YUV_420P == ps_raw_buf->e_color_fmt) num_comp = 3; + + for(comp = 0; comp < num_comp; comp++) + { + wd = ps_raw_buf->au4_wd[comp]; + ht = ps_raw_buf->au4_ht[comp]; + strd = ps_raw_buf->au4_strd[comp]; + pu1_buf = ps_raw_buf->apv_bufs[comp]; + + for(i = 0; i < ht; i++) + { + bytes = (WORD32) fread(pu1_buf, sizeof(UWORD8), wd, fp); + if(bytes != wd) + { + return (IV_FAIL); + } + pu1_buf += strd; + } + } + } + return IV_SUCCESS; +} + +IV_STATUS_T dump_input(FILE *fp, iv_raw_buf_t *ps_raw_buf) +{ + WORD32 bytes; + WORD32 wd, ht, strd; + UWORD8 *pu1_buf; + WORD32 i; + WORD32 comp; + WORD32 num_comp; + + if(IV_YUV_422ILE == ps_raw_buf->e_color_fmt) + { + wd = ps_raw_buf->au4_wd[0]; + ht = ps_raw_buf->au4_ht[0]; + strd = ps_raw_buf->au4_strd[0]; + pu1_buf = ps_raw_buf->apv_bufs[0]; + + for(i = 0; i < ht; i++) + { + bytes = (WORD32) fwrite(pu1_buf, sizeof(UWORD8), wd, fp); + if(bytes != wd) + { + return (IV_FAIL); + } + pu1_buf += strd; + } + } + else + { + num_comp = 2; + + if(IV_YUV_420P == ps_raw_buf->e_color_fmt) num_comp = 3; + + for(comp = 0; comp < num_comp; comp++) + { + wd = ps_raw_buf->au4_wd[comp]; + ht = ps_raw_buf->au4_ht[comp]; + strd = ps_raw_buf->au4_strd[comp]; + pu1_buf = ps_raw_buf->apv_bufs[comp]; + + for(i = 0; i < ht; i++) + { + bytes = (WORD32) fwrite(pu1_buf, sizeof(UWORD8), wd, fp); + if(bytes != wd) + { + return (IV_FAIL); + } + pu1_buf += strd; + } + } + } + return IV_SUCCESS; +} + +void allocate_input(app_ctxt_t *ps_app_ctxt) +{ + WORD32 num_bufs; + WORD32 pic_size; + WORD32 luma_size; + WORD32 chroma_size; + WORD32 num_mbs; + WORD32 i; + UWORD8 *pu1_buf[3]; + + isvce_ctl_getbufinfo_op_t *ps_get_buf_info_op = &ps_app_ctxt->s_get_buf_info_op; + + num_bufs = MAX(DEFAULT_NUM_INPUT_BUFS, ps_get_buf_info_op->s_ive_op.u4_min_inp_bufs); + num_bufs = MIN(DEFAULT_MAX_INPUT_BUFS, num_bufs); + + /* Size of buffer */ + luma_size = ps_app_ctxt->u4_wd * ps_app_ctxt->u4_ht; + chroma_size = luma_size >> 1; + pic_size = luma_size + chroma_size; + + num_mbs = ALIGN16(ps_app_ctxt->u4_max_wd) * ALIGN16(ps_app_ctxt->u4_max_ht); + num_mbs /= 256; + + /* Memset the input buffer array to set is_free to 0 */ + memset(ps_app_ctxt->as_input_buf, 0, sizeof(input_buf_t) * DEFAULT_MAX_INPUT_BUFS); + + for(i = 0; i < num_bufs; i++) + { + pu1_buf[0] = (UWORD8 *) isvca_aligned_malloc(16, pic_size); + if(NULL == pu1_buf[0]) + { + CHAR ac_error[STRLENGTH]; + snprintf(ac_error, sizeof(ac_error) - 1, + "Allocation failed for input buffer of size %d\n", pic_size); + codec_exit(ac_error); + } + ps_app_ctxt->as_input_buf[i].pu1_buf = pu1_buf[0]; + + pu1_buf[0] = (UWORD8 *) isvca_aligned_malloc(16, num_mbs * sizeof(isvce_api_mb_info_t)); + if(NULL == pu1_buf[0]) + { + CHAR ac_error[STRLENGTH]; + snprintf(ac_error, sizeof(ac_error) - 1, + "Allocation failed for mb info buffer of size %d\n", + (WORD32) (num_mbs * sizeof(isvce_api_mb_info_t))); + codec_exit(ac_error); + } + ps_app_ctxt->as_input_buf[i].pv_mb_info = pu1_buf[0]; + pu1_buf[0] = (UWORD8 *) isvca_aligned_malloc(16, sizeof(isvce_pic_info2_t)); + if(NULL == pu1_buf[0]) + { + CHAR ac_error[STRLENGTH]; + snprintf(ac_error, sizeof(ac_error) - 1, + "Allocation failed for pic info buffer of size %d\n", + (WORD32) sizeof(isvce_pic_info2_t)); + codec_exit(ac_error); + } + ps_app_ctxt->as_input_buf[i].pv_pic_info = pu1_buf[0]; + ps_app_ctxt->as_input_buf[i].u4_buf_size = pic_size; + ps_app_ctxt->as_input_buf[i].u4_is_free = 1; + } + return; +} + +void free_input(app_ctxt_t *ps_app_ctxt) +{ + WORD32 num_bufs; + WORD32 i; + + num_bufs = MAX(DEFAULT_NUM_INPUT_BUFS, ps_app_ctxt->s_get_buf_info_op.s_ive_op.u4_min_inp_bufs); + num_bufs = MIN(DEFAULT_MAX_INPUT_BUFS, num_bufs); + + for(i = 0; i < num_bufs; i++) + { + isvca_aligned_free(ps_app_ctxt->as_input_buf[i].pu1_buf); + isvca_aligned_free(ps_app_ctxt->as_input_buf[i].pv_mb_info); + isvca_aligned_free(ps_app_ctxt->as_input_buf[i].pv_pic_info); + } + return; +} diff --git a/test/svcenc/main.c b/test/svcenc/main.c new file mode 100644 index 0000000..db53278 --- /dev/null +++ b/test/svcenc/main.c @@ -0,0 +1,3253 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +#include +#include +#include +#include +#include +#include + +#include "ih264_typedefs.h" +#include "iv2.h" +#include "ive2.h" +#include "isvce.h" +#include "app.h" +#include "psnr.h" + +#ifndef MD5_DISABLE +void calc_md5_cksum(UWORD8 *pu1_inbuf, UWORD32 u4_stride, UWORD32 u4_width, UWORD32 u4_height, + UWORD8 *pu1_cksum_p); +#else +#define calc_md5_cksum(a, b, c, d, e) +#endif + +/*****************************************************************************/ +/* Enums */ +/*****************************************************************************/ +typedef enum +{ + INVALID, + HELP, + INPUT_FILE, + OUTPUT_FILE, + RECON_FILE, + RECON_ENABLE, + NALU_INFO_EXPORT_ENABLE, + NALU_INFO_CSV, + CHKSUM_ENABLE, + CHKSUM_FILE, + INPUT_CHROMA_FORMAT, + RECON_CHROMA_FORMAT, + MAX_WD, + MAX_HT, + WD, + HT, + MAX_LEVEL, + ENC_SPEED, + ME_SPEED, + START_FRM, + NUM_FRMS, + MAX_FRAMERATE, + SRC_FRAMERATE, + TGT_FRAMERATE, + RC, + MAX_BITRATE, + BITRATE, + I_QP, + P_QP, + B_QP, + I_QP_MAX, + P_QP_MAX, + B_QP_MAX, + I_QP_MIN, + P_QP_MIN, + B_QP_MIN, + ENTROPY, + AIR, + AIR_REFRESH_PERIOD, + ARCH, + SOC, + NUMCORES, + PRE_ENC_ME, + PRE_ENC_IPE, + HPEL, + QPEL, + SRCH_RNG_X, + SRCH_RNG_Y, + I_INTERVAL, + IDR_INTERVAL, + B_FRMS, + NUM_B_FRMS, + DISABLE_DBLK, + PROFILE, + FAST_SAD, + ALT_REF, + DISABLE_DEBLOCK_LEVEL, + PSNR, + SLICE_MODE, + SLICE_PARAM, + CONFIG, + LOOPBACK, + VBV_DELAY, + INTRA_4x4_ENABLE, + MB_INFO_FILE, + MB_INFO_TYPE, + PIC_INFO_FILE, + PIC_INFO_TYPE, + DISABLE_VUI, + NUM_TEMPORAL_LAYERS, + NUM_SPATIAL_LAYERS, + SPATIAL_RES_RATIO, +} ARGUMENT_T; + +typedef struct +{ + CHAR argument_shortname[8]; + CHAR argument_name[128]; + ARGUMENT_T argument; + CHAR description[512]; +} argument_t; + +static const argument_t argument_mapping[] = { + {"--", "--help", HELP, "Print this help\n"}, + {"-i", "--input", INPUT_FILE, "Input file\n"}, + {"-o", "--output", OUTPUT_FILE, "Output file\n"}, + {"--", "--recon_enable", RECON_ENABLE, "Recon enable flag\n"}, + {"--", "--nalu_info_export", NALU_INFO_EXPORT_ENABLE, "Enable NALU Info export\n"}, + {"--", "--nalu_info_csv", NALU_INFO_CSV, "Path to NALU Info CSV File\n"}, + {"-r", "--recon", RECON_FILE, "Recon file \n"}, + {"--", "--input_chroma_format", INPUT_CHROMA_FORMAT, + "Input Chroma format Supported values YUV_420P, YUV_420SP_UV, " + "YUV_420SP_VU\n"}, + {"--", "--recon_chroma_format", RECON_CHROMA_FORMAT, + "Recon Chroma format Supported values YUV_420P, YUV_420SP_UV, " + "YUV_420SP_VU\n"}, + {"-w", "--width", WD, "Width of input file\n"}, + {"-h", "--height", HT, "Height file\n"}, + {"--", "--start_frame", START_FRM, "Starting frame number\n"}, + {"-f", "--num_frames", NUM_FRMS, "Number of frames to be encoded\n"}, + {"--", "--rc", RC, + "Rate control mode 0: Constant Qp, 1: Storage, 2: CBR non low delay, 3: " + "CBR low delay \n"}, + {"--", "--max_framerate", MAX_FRAMERATE, "Maximum frame rate \n"}, + {"--", "--tgt_framerate", TGT_FRAMERATE, "Target frame rate \n"}, + {"--", "--src_framerate", SRC_FRAMERATE, "Source frame rate \n"}, + {"--", "--i_interval", I_INTERVAL, "Intra frame interval \n"}, + {"--", "--idr_interval", IDR_INTERVAL, "IDR frame interval \n"}, + {"--", "--bframes", NUM_B_FRMS, "Maximum number of consecutive B frames \n"}, + {"--", "--speed", ENC_SPEED, "Encoder speed preset 0 (slowest) and 100 (fastest)\n"}, + {"--", "--me_speed", ME_SPEED, "Encoder speed preset 0 (slowest) and 100 (fastest)\n"}, + {"--", "--fast_sad", FAST_SAD, " Flag for faster sad execution\n"}, + {"--", "--alt_ref", ALT_REF, "Flag to enable alternate refernce frames\n"}, + {"--", "--hpel", HPEL, "Flag to enable/disable Quarter pel estimation \n"}, + {"--", "--qpel", QPEL, "Flag to enable/disable Quarter pel estimation \n"}, + {"--", "--disable_deblock_level", DISABLE_DEBLOCK_LEVEL, + "Disable deblock level - 0 : Enables deblock completely, 1: enables for I " + "and 8th frame , 2: Enables for I only, 3 : disables completely\n"}, + {"--", "--search_range_x", SRCH_RNG_X, "Search range for X \n"}, + {"--", "--search_range_y", SRCH_RNG_Y, "Search range for Y \n"}, + {"--", "--psnr", PSNR, "Enable PSNR computation (Disable while benchmarking performance) \n"}, + {"--", "--pre_enc_me", PRE_ENC_ME, "Flag to enable/disable Pre Enc Motion Estimation\n"}, + {"--", "--pre_enc_ipe", PRE_ENC_IPE, + "Flag to enable/disable Pre Enc Intra prediction Estimation\n"}, + {"-n", "--num_cores", NUMCORES, "Number of cores to be used\n"}, + {"--", "--adaptive_intra_refresh", AIR, "Adaptive Intra Refresh enable/disable\n"}, + {"--", "--air_refresh_period", AIR_REFRESH_PERIOD, "adaptive intra refresh period\n"}, + {"--", "--slice", SLICE_MODE, + "Slice mode- 0 :No slice, 1: Bytes per slice, 2: MB/CTB per slice \n"}, + {"--", "--slice_param", SLICE_PARAM, + "Slice param value based on slice mode. Slice mode of 1 implies number of " + "bytes per slice, 2 implies number of MBs/CTBs, for 0 value is neglected " + "\n"}, + {"--", "--max_wd", MAX_WD, "Maximum width (Default: 1920) \n"}, + {"--", "--max_ht", MAX_HT, "Maximum height (Default: 1088)\n"}, + {"--", "--max_level", MAX_LEVEL, "Maximum Level (Default: 50)\n"}, + {"--", "--arch", ARCH, + "Set Architecture. Supported values ARCH_GENERIC, ARM_A9Q, ARM_A7, " + "ARM_A5, " + "ARM_NEONINTR, X86_SSSE3, X86_SSE42 \n"}, + {"--", "--soc", SOC, "Set SOC. Supported values GENERIC, HISI_37X \n"}, + {"--", "--chksum", CHKSUM_FILE, "Save Check sum file for recon data\n"}, + {"--", "--chksum_enable", CHKSUM_ENABLE, "Recon MD5 Checksum file\n"}, + {"-c", "--config", CONFIG, "config file (Default: enc.cfg)\n"}, + {"--", "--loopback", LOOPBACK, "Enable encoding in a loop\n"}, + {"--", "--profile", PROFILE, "Profile mode: Supported values BASE, MAIN, HIGH\n"}, + {"--", "--max_bitrate", MAX_BITRATE, "Max bitrate\n"}, + {"--", "--bitrate", BITRATE, "Target bitrate\n"}, + {"--", "--qp_i", I_QP, "QP for I frames\n"}, + {"--", "--qp_p", P_QP, "QP for P frames\n"}, + {"--", "--qp_b", B_QP, "QP for B frames\n"}, + {"--", "--qp_i_max", I_QP_MAX, "Max QP for I frames\n"}, + {"--", "--qp_p_max", P_QP_MAX, "Max QP for P frames\n"}, + {"--", "--qp_b_max", B_QP_MAX, "Max QP for B frames\n"}, + {"--", "--qp_i_min", I_QP_MIN, "Min QP for I frames\n"}, + {"--", "--qp_p_min", P_QP_MIN, "Min QP for P frames\n"}, + {"--", "--qp_b_min", B_QP_MIN, "Min QP for B frames\n"}, + {"--", "--entropy", ENTROPY, "Entropy coding mode(0: CAVLC or 1: CABAC)\n"}, + {"--", "--vbv_delay", VBV_DELAY, "VBV buffer delay\n"}, + {"-i4", "--intra_4x4_enable", INTRA_4x4_ENABLE, "Intra 4x4 enable \n"}, + {"--", "--mb_info_file", MB_INFO_FILE, "MB info file\n"}, + {"--", "--mb_info_type", MB_INFO_TYPE, "MB info type\n"}, + {"--", "--pic_info_file", PIC_INFO_FILE, "Pic info file\n"}, + {"--", "--pic_info_type", PIC_INFO_TYPE, "Pic info type\n"}, + {"--", "--num_temporal_layers", NUM_TEMPORAL_LAYERS, "SVC Parameter : Num temporal layers\n"}, + {"--", "--num_spatial_layers", NUM_SPATIAL_LAYERS, "SVC Parameter : Num spatial layers\n"}, + {"--", "--spatial_res_ratio", SPATIAL_RES_RATIO, + "SVC Parameter : Resolution ratio between successive spatial layers\n"}, + {"--", "--disable_vui", DISABLE_VUI, "disable vui\n"}, +}; + +void *isvca_aligned_malloc(WORD32 alignment, WORD32 size) +{ + void *buf = NULL; + + if(0 != posix_memalign(&buf, alignment, size)) + { + return NULL; + } + return buf; +} + +void isvca_aligned_free(void *pv_buf) { free(pv_buf); } + +/*****************************************************************************/ +/* */ +/* Function Name : codec_exit */ +/* */ +/* Description : handles unrecoverable errors */ +/* Inputs : Error message */ +/* Globals : None */ +/* Processing : Prints error message to console and exits. */ +/* Outputs : Error message to the console */ +/* Returns : None */ +/* */ +/* Issues : */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 07 06 2006 Sankar Creation */ +/* */ +/*****************************************************************************/ +void codec_exit(CHAR *pc_err_message) +{ + printf("%s\n", pc_err_message); + exit(-1); +} + +/*****************************************************************************/ +/* */ +/* Function Name : codec_exit */ +/* */ +/* Description : handles unrecoverable errors */ +/* Inputs : Error message */ +/* Globals : None */ +/* Processing : Prints error message to console and exits. */ +/* Outputs : Error mesage to the console */ +/* Returns : None */ +/* */ +/* Issues : */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 07 06 2006 Sankar Creation */ +/* */ +/*****************************************************************************/ +IV_COLOR_FORMAT_T get_chroma_fmt(CHAR *value) +{ + IV_COLOR_FORMAT_T e_chroma_format; + if((strcmp(value, "YUV_420P")) == 0) + e_chroma_format = IV_YUV_420P; + else if((strcmp(value, "YUV_422ILE")) == 0) + e_chroma_format = IV_YUV_422ILE; + else if((strcmp(value, "RGB_565")) == 0) + e_chroma_format = IV_RGB_565; + else if((strcmp(value, "RGBA_8888")) == 0) + e_chroma_format = IV_RGBA_8888; + else if((strcmp(value, "YUV_420SP_UV")) == 0) + e_chroma_format = IV_YUV_420SP_UV; + else if((strcmp(value, "YUV_420SP_VU")) == 0) + e_chroma_format = IV_YUV_420SP_VU; + else + { + printf("\nInvalid colour format setting it to IV_YUV_420P\n"); + e_chroma_format = IV_YUV_420P; + } + return e_chroma_format; +} + +/*****************************************************************************/ +/* */ +/* Function Name : codec_exit */ +/* */ +/* Description : handles unrecoverable errors */ +/* Inputs : Error message */ +/* Globals : None */ +/* Processing : Prints error message to console and exits. */ +/* Outputs : Error mesage to the console */ +/* Returns : None */ +/* */ +/* Issues : */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 07 06 2006 Sankar Creation */ +/* */ +/*****************************************************************************/ +IVE_SPEED_CONFIG get_speed_preset(CHAR *value) +{ + IVE_SPEED_CONFIG e_enc_speed_preset; + if((strcmp(value, "CONFIG")) == 0) + e_enc_speed_preset = IVE_CONFIG; + else if((strcmp(value, "SLOWEST")) == 0) + e_enc_speed_preset = IVE_SLOWEST; + else if((strcmp(value, "NORMAL")) == 0) + e_enc_speed_preset = IVE_NORMAL; + else if((strcmp(value, "FAST")) == 0) + e_enc_speed_preset = IVE_FAST; + else if((strcmp(value, "HIGH_SPEED")) == 0) + e_enc_speed_preset = IVE_HIGH_SPEED; + else if((strcmp(value, "FASTEST")) == 0) + e_enc_speed_preset = IVE_FASTEST; + else + { + printf("\nInvalid speed preset, setting it to IVE_FASTEST\n"); + e_enc_speed_preset = IVE_FASTEST; + } + return e_enc_speed_preset; +} + +/*****************************************************************************/ +/* */ +/* Function Name : print_usage */ +/* */ +/* Description : Prints argument format */ +/* */ +/* */ +/* Inputs : */ +/* Globals : */ +/* Processing : Prints argument format */ +/* */ +/* Outputs : */ +/* Returns : */ +/* */ +/* Issues : */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 07 09 2012 100189 Initial Version */ +/* */ +/*****************************************************************************/ + +void print_usage(void) +{ + WORD32 i = 0; + WORD32 num_entries = sizeof(argument_mapping) / sizeof(argument_t); + printf("\nUsage:\n"); + while(i < num_entries) + { + printf("%-32s\t %s", argument_mapping[i].argument_name, argument_mapping[i].description); + i++; + } +} + +/*****************************************************************************/ +/* */ +/* Function Name : get_argument */ +/* */ +/* Description : Gets argument for a given string */ +/* */ +/* */ +/* Inputs : name */ +/* Globals : */ +/* Processing : Searches the given string in the array and returns */ +/* appropriate argument ID */ +/* */ +/* Outputs : Argument ID */ +/* Returns : Argument ID */ +/* */ +/* Issues : */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 07 09 2012 100189 Initial Version */ +/* */ +/*****************************************************************************/ +ARGUMENT_T get_argument(CHAR *name) +{ + WORD32 i = 0; + WORD32 num_entries = sizeof(argument_mapping) / sizeof(argument_t); + while(i < num_entries) + { + if((0 == strcmp(argument_mapping[i].argument_name, name)) || + ((0 == strcmp(argument_mapping[i].argument_shortname, name)) && + (0 != strcmp(argument_mapping[i].argument_shortname, "--")))) + { + return argument_mapping[i].argument; + } + i++; + } + return INVALID; +} + +/*****************************************************************************/ +/* */ +/* Function Name : get_argument */ +/* */ +/* Description : Gets argument for a given string */ +/* */ +/* */ +/* Inputs : name */ +/* Globals : */ +/* Processing : Searches the given string in the array and returns */ +/* appropriate argument ID */ +/* */ +/* Outputs : Argument ID */ +/* Returns : Argument ID */ +/* */ +/* Issues : */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 07 09 2012 100189 Initial Version */ +/* */ +/*****************************************************************************/ +void parse_argument(app_ctxt_t *ps_app_ctxt, CHAR *argument, CHAR *value) +{ + ARGUMENT_T arg = get_argument(argument); + + switch(arg) + { + case HELP: + print_usage(); + exit(-1); + break; + case SLICE_MODE: + sscanf(value, "%d", &ps_app_ctxt->u4_slice_mode); + break; + case SLICE_PARAM: + sscanf(value, "%d", &ps_app_ctxt->u4_slice_param); + break; + case INPUT_FILE: + sscanf(value, "%s", ps_app_ctxt->ac_ip_fname); + break; + + case OUTPUT_FILE: + sscanf(value, "%s", ps_app_ctxt->ac_op_fname); + break; + + case RECON_FILE: + sscanf(value, "%s", ps_app_ctxt->ac_recon_fname); + break; + + case RECON_ENABLE: + sscanf(value, "%d", &ps_app_ctxt->u4_recon_enable); + break; + + case NALU_INFO_EXPORT_ENABLE: + sscanf(value, "%d", &ps_app_ctxt->u4_nalu_info_export_enable); + break; + + case NALU_INFO_CSV: + sscanf(value, "%s", ps_app_ctxt->ac_nalu_info_csv_fname); + break; + + case CHKSUM_FILE: + sscanf(value, "%s", ps_app_ctxt->ac_chksum_fname); + break; + + case CHKSUM_ENABLE: + sscanf(value, "%d", &ps_app_ctxt->u4_chksum_enable); + break; + + case MB_INFO_FILE: + sscanf(value, "%s", ps_app_ctxt->ac_mb_info_fname); + break; + + case MB_INFO_TYPE: + sscanf(value, "%d", &ps_app_ctxt->u4_mb_info_type); + break; + + case PIC_INFO_FILE: + sscanf(value, "%s", ps_app_ctxt->ac_pic_info_fname); + break; + + case PIC_INFO_TYPE: + sscanf(value, "%d", &ps_app_ctxt->u4_pic_info_type); + break; + + case INPUT_CHROMA_FORMAT: + ps_app_ctxt->e_inp_color_fmt = get_chroma_fmt(value); + break; + + case RECON_CHROMA_FORMAT: + ps_app_ctxt->e_recon_color_fmt = get_chroma_fmt(value); + break; + + case MAX_WD: + sscanf(value, "%d", &ps_app_ctxt->u4_max_wd); + break; + + case MAX_HT: + sscanf(value, "%d", &ps_app_ctxt->u4_max_ht); + break; + + case WD: + sscanf(value, "%d", &ps_app_ctxt->u4_wd); + break; + + case HT: + sscanf(value, "%d", &ps_app_ctxt->u4_ht); + break; + + case MAX_LEVEL: + sscanf(value, "%d", &ps_app_ctxt->u4_max_level); + break; + + case ENC_SPEED: + ps_app_ctxt->u4_enc_speed = get_speed_preset(value); + break; + + case ME_SPEED: + sscanf(value, "%d", &ps_app_ctxt->u4_me_speed); + break; + + case START_FRM: + sscanf(value, "%d", &ps_app_ctxt->u4_start_frm); + break; + + case NUM_FRMS: + sscanf(value, "%d", &ps_app_ctxt->u4_max_num_frms); + break; + + case MAX_FRAMERATE: + sscanf(value, "%d", &ps_app_ctxt->u4_max_frame_rate); + if(ps_app_ctxt->u4_max_frame_rate <= 0) + ps_app_ctxt->u4_max_frame_rate = DEFAULT_MAX_FRAMERATE; + break; + + case SRC_FRAMERATE: + sscanf(value, "%d", &ps_app_ctxt->u4_src_frame_rate); + if(ps_app_ctxt->u4_src_frame_rate <= 0) + ps_app_ctxt->u4_src_frame_rate = DEFAULT_SRC_FRAME_RATE; + break; + + case TGT_FRAMERATE: + sscanf(value, "%d", &ps_app_ctxt->u4_tgt_frame_rate); + if(ps_app_ctxt->u4_tgt_frame_rate <= 0) + ps_app_ctxt->u4_tgt_frame_rate = DEFAULT_TGT_FRAME_RATE; + break; + + case RC: + sscanf(value, "%d", &ps_app_ctxt->u4_rc); + break; + + case ENTROPY: + sscanf(value, "%d", &ps_app_ctxt->u4_entropy_coding_mode); + break; + + case AIR: + sscanf(value, "%d", &ps_app_ctxt->u4_air); + break; + + case ARCH: + if((strcmp(value, "ARCH_GENERIC")) == 0) + ps_app_ctxt->e_arch = ARCH_X86_GENERIC; + else if((strcmp(value, "ARM_A9Q")) == 0) + ps_app_ctxt->e_arch = ARCH_ARM_A9Q; + else if((strcmp(value, "ARM_A7")) == 0) + ps_app_ctxt->e_arch = ARCH_ARM_A7; + else if((strcmp(value, "ARM_A5")) == 0) + ps_app_ctxt->e_arch = ARCH_ARM_A5; + else if((strcmp(value, "ARM_NEONINTR")) == 0) + ps_app_ctxt->e_arch = ARCH_ARM_NEONINTR; + else if((strcmp(value, "X86_SSSE3")) == 0) + ps_app_ctxt->e_arch = ARCH_X86_SSSE3; + else if((strcmp(value, "X86_SSE42")) == 0) + ps_app_ctxt->e_arch = ARCH_X86_SSE42; + else if((strcmp(value, "ARM_A53")) == 0) + ps_app_ctxt->e_arch = ARCH_ARM_A53; + else if((strcmp(value, "ARM_A57")) == 0) + ps_app_ctxt->e_arch = ARCH_ARM_A57; + else if((strcmp(value, "ARM_V8_NEON")) == 0) + ps_app_ctxt->e_arch = ARCH_ARM_V8_NEON; + else + { + printf("\nInvalid Arch. Setting it to ARM_A9Q\n"); + ps_app_ctxt->e_arch = ARCH_ARM_A9Q; + } + + break; + case SOC: + if((strcmp(value, "GENERIC")) == 0) + ps_app_ctxt->e_soc = SOC_GENERIC; + else if((strcmp(value, "HISI_37X")) == 0) + ps_app_ctxt->e_soc = SOC_HISI_37X; + else + { + ps_app_ctxt->e_soc = SOC_GENERIC; + } + break; + + case NUMCORES: + sscanf(value, "%d", &ps_app_ctxt->u4_num_cores); + break; + + case LOOPBACK: + sscanf(value, "%d", &ps_app_ctxt->u4_loopback); + break; + + case PRE_ENC_ME: + sscanf(value, "%d", &ps_app_ctxt->u4_pre_enc_me); + break; + + case PRE_ENC_IPE: + sscanf(value, "%d", &ps_app_ctxt->u4_pre_enc_ipe); + break; + + case HPEL: + sscanf(value, "%d", &ps_app_ctxt->u4_hpel); + break; + + case QPEL: + sscanf(value, "%d", &ps_app_ctxt->u4_qpel); + break; + + case SRCH_RNG_X: + sscanf(value, "%d", &ps_app_ctxt->u4_srch_rng_x); + break; + + case SRCH_RNG_Y: + sscanf(value, "%d", &ps_app_ctxt->u4_srch_rng_y); + break; + + case I_INTERVAL: + sscanf(value, "%d", &ps_app_ctxt->u4_i_interval); + break; + + case IDR_INTERVAL: + sscanf(value, "%d", &ps_app_ctxt->u4_idr_interval); + break; + + case NUM_B_FRMS: + sscanf(value, "%d", &ps_app_ctxt->u4_num_bframes); + break; + + case DISABLE_DEBLOCK_LEVEL: + sscanf(value, "%d", &ps_app_ctxt->u4_disable_deblock_level); + break; + + case FAST_SAD: + sscanf(value, "%d", &ps_app_ctxt->u4_enable_fast_sad); + break; + + case ALT_REF: + sscanf(value, "%d", &ps_app_ctxt->u4_enable_alt_ref); + break; + + case AIR_REFRESH_PERIOD: + sscanf(value, "%d", &ps_app_ctxt->u4_air_refresh_period); + break; + + case PROFILE: + if((strcmp(value, "BASE")) == 0) + ps_app_ctxt->e_profile = IV_PROFILE_BASE; + else if((strcmp(value, "MAIN")) == 0) + ps_app_ctxt->e_profile = IV_PROFILE_MAIN; + else if((strcmp(value, "HIGH")) == 0) + ps_app_ctxt->e_profile = IV_PROFILE_HIGH; + else + { + printf("\nInvalid profile. Setting it to BASE\n"); + ps_app_ctxt->e_profile = IV_PROFILE_BASE; + } + break; + + case PSNR: + sscanf(value, "%d", &ps_app_ctxt->u4_psnr_enable); + break; + + case INTRA_4x4_ENABLE: + sscanf(value, "%d", &ps_app_ctxt->u4_enable_intra_4x4); + break; + + case DISABLE_VUI: + sscanf(value, "%d", &ps_app_ctxt->u4_use_default_vui); + break; + + case NUM_TEMPORAL_LAYERS: + { + sscanf(value, "%hhu", &ps_app_ctxt->u1_num_temporal_layers); + break; + } + + case NUM_SPATIAL_LAYERS: + { + sscanf(value, "%hhu", &ps_app_ctxt->u1_num_spatial_layers); + break; + } + + case SPATIAL_RES_RATIO: + { + sscanf(value, "%lf", &ps_app_ctxt->d_spatial_res_ratio); + break; + } + + case MAX_BITRATE: + case BITRATE: + case I_QP: + case I_QP_MAX: + case I_QP_MIN: + case P_QP: + case P_QP_MAX: + case P_QP_MIN: + case B_QP: + case B_QP_MAX: + case B_QP_MIN: + case VBV_DELAY: + break; + + case INVALID: + default: + printf("Ignoring argument : %s\n", argument); + break; + } +} + +void parse_rc_argument(app_ctxt_t *ps_app_ctxt, CHAR *argument, CHAR *value) +{ +#define ITERATE_TO_NEXT_ARG() \ + while(*value != ',' && *value != '-' && *value != '\0') \ + { \ + value++; \ + } \ + if((*value == '-' || *value == '\0') && i + 1 < u1_num_spatial_layers) break; \ + value++; + + ARGUMENT_T arg; + int i = 0; + UWORD8 u1_num_spatial_layers = ps_app_ctxt->u1_num_spatial_layers; + + arg = get_argument(argument); + switch(arg) + { + case MAX_BITRATE: + for(i = 0; i < u1_num_spatial_layers; i++) + { + sscanf(value, "%d", &ps_app_ctxt->pu4_max_bitrate[i]); + ITERATE_TO_NEXT_ARG(); + } + break; + + case BITRATE: + for(i = 0; i < u1_num_spatial_layers; i++) + { + sscanf(value, "%d", &ps_app_ctxt->pu4_bitrate[i]); + ITERATE_TO_NEXT_ARG(); + } + break; + + case I_QP: + for(i = 0; i < u1_num_spatial_layers; i++) + { + sscanf(value, "%d", &ps_app_ctxt->pu4_i_qp[i]); + ITERATE_TO_NEXT_ARG(); + } + break; + + case I_QP_MAX: + for(i = 0; i < u1_num_spatial_layers; i++) + { + sscanf(value, "%d", &ps_app_ctxt->pu4_i_qp_max[i]); + ITERATE_TO_NEXT_ARG(); + } + break; + + case I_QP_MIN: + for(i = 0; i < u1_num_spatial_layers; i++) + { + sscanf(value, "%d", &ps_app_ctxt->pu4_i_qp_min[i]); + ITERATE_TO_NEXT_ARG(); + } + break; + + case P_QP: + for(i = 0; i < u1_num_spatial_layers; i++) + { + sscanf(value, "%d", &ps_app_ctxt->pu4_p_qp[i]); + ITERATE_TO_NEXT_ARG(); + } + break; + + case P_QP_MAX: + for(i = 0; i < u1_num_spatial_layers; i++) + { + sscanf(value, "%d", &ps_app_ctxt->pu4_p_qp_max[i]); + ITERATE_TO_NEXT_ARG(); + } + break; + + case P_QP_MIN: + for(i = 0; i < u1_num_spatial_layers; i++) + { + sscanf(value, "%d", &ps_app_ctxt->pu4_p_qp_min[i]); + ITERATE_TO_NEXT_ARG(); + } + break; + + case B_QP: + for(i = 0; i < u1_num_spatial_layers; i++) + { + sscanf(value, "%d", &ps_app_ctxt->pu4_b_qp[i]); + ITERATE_TO_NEXT_ARG(); + } + break; + + case B_QP_MAX: + for(i = 0; i < u1_num_spatial_layers; i++) + { + sscanf(value, "%d", &ps_app_ctxt->pu4_b_qp_max[i]); + ITERATE_TO_NEXT_ARG(); + } + break; + + case B_QP_MIN: + for(i = 0; i < u1_num_spatial_layers; i++) + { + sscanf(value, "%d", &ps_app_ctxt->pu4_b_qp_min[i]); + ITERATE_TO_NEXT_ARG(); + } + break; + + case VBV_DELAY: + for(i = 0; i < u1_num_spatial_layers; i++) + { + sscanf(value, "%d", &ps_app_ctxt->pu4_vbv_buffer_delay[i]); + ITERATE_TO_NEXT_ARG(); + } + break; + + default: + break; + } +} + +/*****************************************************************************/ +/* */ +/* Function Name : read_cfg_file */ +/* */ +/* Description : Reads arguments from a configuration file */ +/* */ +/* */ +/* Inputs : ps_app_ctxt : Application context */ +/* fp_cfg_file : Configuration file handle */ +/* Globals : */ +/* Processing : Parses the arguments and fills in the application context*/ +/* */ +/* Outputs : Arguments parsed */ +/* Returns : None */ +/* */ +/* Issues : */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 07 09 2012 100189 Initial Version */ +/* */ +/*****************************************************************************/ +void read_cfg_file(app_ctxt_t *ps_app_ctxt, FILE *fp_cfg) +{ + CHAR line[STRLENGTH]; + CHAR description[STRLENGTH]; + CHAR value[STRLENGTH]; + CHAR argument[STRLENGTH]; + + while(0 == (feof(fp_cfg))) + { + int ret; + line[0] = '\0'; + if(NULL == fgets(line, sizeof(line), fp_cfg)) break; + argument[0] = '\0'; + /* Reading Input File Name */ + ret = sscanf(line, "%s %s %s", argument, value, description); + if(ret < 2) continue; + + parse_argument(ps_app_ctxt, argument, value); + } +} + +/*****************************************************************************/ +/* */ +/* Function Name : read_cfg_file_rc_params */ +/* */ +/* Description : Reads RC specific arguments from a configuration file */ +/* */ +/* */ +/* Inputs : ps_app_ctxt : Application context */ +/* fp_cfg_file : Configuration file handle */ +/* Globals : */ +/* Processing : Parses the arguments and fills in the application context*/ +/* */ +/* Outputs : Arguments parsed */ +/* Returns : None */ +/* */ +/* Issues : */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 07 09 2012 100189 Initial Version */ +/* */ +/*****************************************************************************/ +void read_cfg_file_rc_params(app_ctxt_t *ps_app_ctxt, FILE *fp_cfg) +{ + CHAR line[STRLENGTH]; + CHAR description[STRLENGTH]; + CHAR value[STRLENGTH]; + CHAR argument[STRLENGTH]; + + while(0 == (feof(fp_cfg))) + { + int ret; + line[0] = '\0'; + if(NULL == fgets(line, sizeof(line), fp_cfg)) break; + argument[0] = '\0'; + /* Reading Input File Name */ + ret = sscanf(line, "%s %s %s", argument, value, description); + if(ret < 2) continue; + + parse_rc_argument(ps_app_ctxt, argument, value); + } +} + +void invalid_argument_exit(CHAR *pc_err_message) +{ + print_usage(); + codec_exit(pc_err_message); +} + +void validate_params(app_ctxt_t *ps_app_ctxt) +{ + CHAR ac_error[STRLENGTH]; + + if(ps_app_ctxt->ac_ip_fname[0] == '\0') + { + invalid_argument_exit("Specify input file"); + } + if(ps_app_ctxt->ac_op_fname[0] == '\0') + { + invalid_argument_exit("Specify output file"); + } + if((1 == ps_app_ctxt->u4_recon_enable) && (ps_app_ctxt->ac_recon_fname[0] == '\0')) + { + invalid_argument_exit("Specify recon file"); + } + if((1 == ps_app_ctxt->u4_nalu_info_export_enable) && + (ps_app_ctxt->ac_nalu_info_csv_fname[0] == '\0')) + { + invalid_argument_exit("Specify NALU Info CSV File"); + } + if((1 == ps_app_ctxt->u4_chksum_enable) && (ps_app_ctxt->ac_chksum_fname[0] == '\0')) + { + invalid_argument_exit("Specify checksum file"); + } + if(0 >= (WORD32) ps_app_ctxt->u4_wd) + { + snprintf(ac_error, sizeof(ac_error) - 1, "Invalid width: %d", ps_app_ctxt->u4_wd); + invalid_argument_exit(ac_error); + } + if(0 >= (WORD32) ps_app_ctxt->u4_ht) + { + snprintf(ac_error, sizeof(ac_error) - 1, "Invalid height: %d", ps_app_ctxt->u4_ht); + invalid_argument_exit(ac_error); + } + + if(0 == (WORD32) ps_app_ctxt->u4_max_num_frms) + { + snprintf(ac_error, sizeof(ac_error) - 1, "Invalid number of frames to be encoded: %d", + ps_app_ctxt->u4_max_num_frms); + invalid_argument_exit(ac_error); + } + if((0 != (WORD32) ps_app_ctxt->u4_entropy_coding_mode) && + (1 != (WORD32) ps_app_ctxt->u4_entropy_coding_mode)) + { + snprintf(ac_error, sizeof(ac_error) - 1, "Invalid entropy codeing mode: %d", + ps_app_ctxt->u4_entropy_coding_mode); + invalid_argument_exit(ac_error); + } +} + +void init_default_params(app_ctxt_t *ps_app_ctxt) +{ + ps_app_ctxt->ps_enc = NULL; + ps_app_ctxt->ps_mem_rec = NULL; + ps_app_ctxt->u4_num_mem_rec = DEFAULT_MEM_REC_CNT; + ps_app_ctxt->u4_recon_enable = DEFAULT_RECON_ENABLE; + ps_app_ctxt->u4_nalu_info_export_enable = DEFAULT_NALU_INFO_EXPORT_ENABLE; + ps_app_ctxt->u4_chksum_enable = DEFAULT_CHKSUM_ENABLE; + ps_app_ctxt->u4_mb_info_type = 0; + ps_app_ctxt->u4_pic_info_type = 0; + ps_app_ctxt->u4_mb_info_size = 0; + ps_app_ctxt->u4_pic_info_size = 0; + ps_app_ctxt->u4_start_frm = DEFAULT_START_FRM; + ps_app_ctxt->u4_max_num_frms = DEFAULT_NUM_FRMS; + ps_app_ctxt->avg_time = 0; + ps_app_ctxt->u4_pics_cnt = 0; + ps_app_ctxt->e_inp_color_fmt = DEFAULT_INP_COLOR_FMT; + ps_app_ctxt->e_recon_color_fmt = DEFAULT_RECON_COLOR_FMT; + ps_app_ctxt->e_arch = ARCH_X86_SSE42; + ps_app_ctxt->e_soc = SOC_GENERIC; + ps_app_ctxt->header_generated = 0; + ps_app_ctxt->pv_codec_obj = NULL; + ps_app_ctxt->u4_num_cores = DEFAULT_NUM_CORES; + ps_app_ctxt->u4_pre_enc_me = 0; + ps_app_ctxt->u4_pre_enc_ipe = 0; + ps_app_ctxt->ac_ip_fname[0] = '\0'; + ps_app_ctxt->ac_op_fname[0] = '\0'; + ps_app_ctxt->ac_recon_fname[0] = '\0'; + ps_app_ctxt->ac_nalu_info_csv_fname[0] = '\0'; + ps_app_ctxt->ac_chksum_fname[0] = '\0'; + ps_app_ctxt->ac_mb_info_fname[0] = '\0'; + ps_app_ctxt->fp_ip = NULL; + ps_app_ctxt->fp_op = NULL; + ps_app_ctxt->fp_recon = NULL; + ps_app_ctxt->fp_nalu_info = NULL; + ps_app_ctxt->fp_chksum = NULL; + ps_app_ctxt->fp_psnr_ip = NULL; + ps_app_ctxt->fp_mb_info = NULL; + ps_app_ctxt->fp_pic_info = NULL; + ps_app_ctxt->u4_loopback = DEFAULT_LOOPBACK; + ps_app_ctxt->u4_max_frame_rate = DEFAULT_MAX_FRAMERATE; + ps_app_ctxt->u4_src_frame_rate = DEFAULT_SRC_FRAME_RATE; + ps_app_ctxt->u4_tgt_frame_rate = DEFAULT_TGT_FRAME_RATE; + ps_app_ctxt->u4_max_wd = DEFAULT_MAX_WD; + ps_app_ctxt->u4_max_ht = DEFAULT_MAX_HT; + ps_app_ctxt->u4_max_level = DEFAULT_MAX_LEVEL; + ps_app_ctxt->u4_strd = DEFAULT_STRIDE; + ps_app_ctxt->u4_wd = DEFAULT_WD; + ps_app_ctxt->u4_ht = DEFAULT_HT; + ps_app_ctxt->u4_psnr_enable = DEFAULT_PSNR_ENABLE; + ps_app_ctxt->u4_enc_speed = IVE_SLOWEST; + ps_app_ctxt->u4_me_speed = DEFAULT_ME_SPEED; + ps_app_ctxt->u4_enable_fast_sad = DEFAULT_ENABLE_FAST_SAD; + ps_app_ctxt->u4_enable_alt_ref = DEFAULT_ENABLE_ALT_REF; + ps_app_ctxt->u4_rc = DEFAULT_RC; + ps_app_ctxt->u4_num_bframes = DEFAULT_NUM_BFRAMES; + ps_app_ctxt->u4_air = DEFAULT_AIR; + ps_app_ctxt->u4_air_refresh_period = DEFAULT_AIR_REFRESH_PERIOD; + ps_app_ctxt->u4_srch_rng_x = DEFAULT_SRCH_RNG_X; + ps_app_ctxt->u4_srch_rng_y = DEFAULT_SRCH_RNG_Y; + ps_app_ctxt->u4_i_interval = DEFAULT_I_INTERVAL; + ps_app_ctxt->u4_idr_interval = DEFAULT_IDR_INTERVAL; + ps_app_ctxt->u4_disable_deblock_level = DEFAULT_DISABLE_DEBLK_LEVEL; + ps_app_ctxt->u4_hpel = DEFAULT_HPEL; + ps_app_ctxt->u4_qpel = DEFAULT_QPEL; + ps_app_ctxt->u4_enable_intra_4x4 = DEFAULT_I4; + ps_app_ctxt->e_profile = DEFAULT_EPROFILE; + ps_app_ctxt->u4_slice_mode = DEFAULT_SLICE_MODE; + ps_app_ctxt->u4_slice_param = DEFAULT_SLICE_PARAM; + ps_app_ctxt->pv_input_thread_handle = NULL; + ps_app_ctxt->pv_output_thread_handle = NULL; + ps_app_ctxt->pv_recon_thread_handle = NULL; + ps_app_ctxt->adbl_psnr[0] = 0.0; + ps_app_ctxt->adbl_psnr[1] = 0.0; + ps_app_ctxt->adbl_psnr[2] = 0.0; + ps_app_ctxt->u4_psnr_cnt = 0; + ps_app_ctxt->pu1_psnr_buf = NULL; + ps_app_ctxt->u4_psnr_buf_size = 0; + ps_app_ctxt->u4_entropy_coding_mode = DEFAULT_ENTROPY_CODING_MODE; + ps_app_ctxt->u1_num_temporal_layers = DEFAULT_NUM_TEMPORAL_LAYERS; + ps_app_ctxt->u1_num_spatial_layers = DEFAULT_NUM_SPATIAL_LAYERS; + ps_app_ctxt->d_spatial_res_ratio = DEFAULT_SPATIAL_RES_RATIO; + ps_app_ctxt->u4_use_default_vui = 1; +} + +static void set_dimensions(app_ctxt_t *ps_app_ctxt, UWORD32 u4_timestamp_low, + UWORD32 u4_timestamp_high) +{ + isvce_ctl_set_dimensions_ip_t s_frame_dimensions_ip; + isvce_ctl_set_dimensions_op_t s_frame_dimensions_op; + + IV_STATUS_T status; + + isvce_api_cmds_t s_api_cmds = {ISVCE_CMD_VIDEO_CTL, ISVCE_CMD_CTL_SET_DIMENSIONS}; + + s_frame_dimensions_ip.s_ive_ip.u4_ht = ps_app_ctxt->u4_ht; + s_frame_dimensions_ip.s_ive_ip.u4_wd = ps_app_ctxt->u4_wd; + + s_frame_dimensions_ip.s_ive_ip.u4_timestamp_high = u4_timestamp_high; + s_frame_dimensions_ip.s_ive_ip.u4_timestamp_low = u4_timestamp_low; + + s_frame_dimensions_ip.s_ive_ip.u4_size = sizeof(isvce_ctl_set_dimensions_ip_t); + s_frame_dimensions_op.s_ive_op.u4_size = sizeof(isvce_ctl_set_dimensions_op_t); + + status = isvce_api_function(ps_app_ctxt->ps_enc, &s_frame_dimensions_ip, &s_frame_dimensions_op, + &s_api_cmds); + if(status != IV_SUCCESS) + { + CHAR ac_error[STRLENGTH]; + snprintf(ac_error, sizeof(ac_error) - 1, "Unable to set frame dimensions = 0x%x\n", + s_frame_dimensions_op.s_ive_op.u4_error_code); + codec_exit(ac_error); + } +} + +static void get_enc_dimensions(app_ctxt_t *ps_app_ctxt) +{ + isvce_ctl_get_enc_dimensions_ip_t s_ip; + isvce_ctl_get_enc_dimensions_op_t s_op; + + IV_STATUS_T status; + + isvce_api_cmds_t s_api_cmds = {ISVCE_CMD_VIDEO_CTL, ISVCE_CMD_CTL_GET_ENC_FRAME_DIMENSIONS}; + + s_ip.u4_inp_frame_wd = ps_app_ctxt->u4_wd; + s_ip.u4_inp_frame_ht = ps_app_ctxt->u4_ht; + + status = isvce_api_function(ps_app_ctxt->ps_enc, &s_ip, &s_op, &s_api_cmds); + + if(status != IV_SUCCESS) + { + CHAR ac_error[STRLENGTH]; + + snprintf(ac_error, sizeof(ac_error) - 1, "Unable to get enc frame dimensions = 0x%x\n", + s_op.u4_error_code); + + codec_exit(ac_error); + } + + ps_app_ctxt->u4_enc_wd = s_op.u4_enc_frame_wd; + ps_app_ctxt->u4_enc_ht = s_op.u4_enc_frame_ht; +} + +void set_frame_rate(app_ctxt_t *ps_app_ctxt, UWORD32 u4_timestamp_low, UWORD32 u4_timestamp_high) +{ + isvce_ctl_set_frame_rate_ip_t s_frame_rate_ip; + isvce_ctl_set_frame_rate_op_t s_frame_rate_op; + + IV_STATUS_T status; + + isvce_api_cmds_t s_api_cmds = {ISVCE_CMD_VIDEO_CTL, ISVCE_CMD_CTL_SET_FRAMERATE}; + + s_frame_rate_ip.s_ive_ip.u4_src_frame_rate = ps_app_ctxt->u4_src_frame_rate; + s_frame_rate_ip.s_ive_ip.u4_tgt_frame_rate = ps_app_ctxt->u4_tgt_frame_rate; + + s_frame_rate_ip.s_ive_ip.u4_timestamp_high = u4_timestamp_high; + s_frame_rate_ip.s_ive_ip.u4_timestamp_low = u4_timestamp_low; + + s_frame_rate_ip.s_ive_ip.u4_size = sizeof(isvce_ctl_set_frame_rate_ip_t); + s_frame_rate_op.s_ive_op.u4_size = sizeof(isvce_ctl_set_frame_rate_op_t); + + status = + isvce_api_function(ps_app_ctxt->ps_enc, &s_frame_rate_ip, &s_frame_rate_op, &s_api_cmds); + if(status != IV_SUCCESS) + { + CHAR ac_error[STRLENGTH]; + snprintf(ac_error, sizeof(ac_error) - 1, "Unable to set frame rate = 0x%x\n", + s_frame_rate_op.s_ive_op.u4_error_code); + codec_exit(ac_error); + } +} + +void set_ipe_params(app_ctxt_t *ps_app_ctxt, UWORD32 u4_timestamp_low, UWORD32 u4_timestamp_high) +{ + isvce_ctl_set_ipe_params_ip_t s_ipe_params_ip; + isvce_ctl_set_ipe_params_op_t s_ipe_params_op; + + IV_STATUS_T status; + + isvce_api_cmds_t s_api_cmds = {ISVCE_CMD_VIDEO_CTL, ISVCE_CMD_CTL_SET_IPE_PARAMS}; + + s_ipe_params_ip.s_ive_ip.u4_enable_intra_4x4 = ps_app_ctxt->u4_enable_intra_4x4; + s_ipe_params_ip.s_ive_ip.u4_enc_speed_preset = ps_app_ctxt->u4_enc_speed; + + s_ipe_params_ip.s_ive_ip.u4_timestamp_high = u4_timestamp_high; + s_ipe_params_ip.s_ive_ip.u4_timestamp_low = u4_timestamp_low; + + s_ipe_params_ip.s_ive_ip.u4_size = sizeof(isvce_ctl_set_ipe_params_ip_t); + s_ipe_params_op.s_ive_op.u4_size = sizeof(isvce_ctl_set_ipe_params_op_t); + + status = + isvce_api_function(ps_app_ctxt->ps_enc, &s_ipe_params_ip, &s_ipe_params_op, &s_api_cmds); + if(status != IV_SUCCESS) + { + CHAR ac_error[STRLENGTH]; + snprintf(ac_error, sizeof(ac_error) - 1, "Unable to set ipe params = 0x%x\n", + s_ipe_params_op.s_ive_op.u4_error_code); + codec_exit(ac_error); + } +} + +void set_bit_rate(app_ctxt_t *ps_app_ctxt, UWORD32 u4_timestamp_low, UWORD32 u4_timestamp_high) +{ + isvce_ctl_set_bitrate_ip_t s_bitrate_ip; + isvce_ctl_set_bitrate_op_t s_bitrate_op; + + IV_STATUS_T status; + WORD8 i; + isvce_api_cmds_t s_api_cmds = {ISVCE_CMD_VIDEO_CTL, ISVCE_CMD_CTL_SET_BITRATE}; + + s_bitrate_ip.pu4_target_bitrate = + isvca_aligned_malloc(16, sizeof(UWORD32) * ps_app_ctxt->u1_num_spatial_layers); + + for(i = 0; i < ps_app_ctxt->u1_num_spatial_layers; i++) + { + s_bitrate_ip.pu4_target_bitrate[i] = ps_app_ctxt->pu4_bitrate[i]; + } + + s_bitrate_ip.s_ive_ip.u4_timestamp_high = u4_timestamp_high; + s_bitrate_ip.s_ive_ip.u4_timestamp_low = u4_timestamp_low; + + s_bitrate_ip.s_ive_ip.u4_size = sizeof(isvce_ctl_set_bitrate_ip_t); + s_bitrate_op.s_ive_op.u4_size = sizeof(isvce_ctl_set_bitrate_op_t); + + status = isvce_api_function(ps_app_ctxt->ps_enc, &s_bitrate_ip, &s_bitrate_op, &s_api_cmds); + + isvca_aligned_free(s_bitrate_ip.pu4_target_bitrate); + + if(status != IV_SUCCESS) + { + CHAR ac_error[STRLENGTH]; + snprintf(ac_error, sizeof(ac_error) - 1, "Unable to set bit rate = 0x%x\n", + s_bitrate_op.s_ive_op.u4_error_code); + codec_exit(ac_error); + } +} + +void set_frame_type(app_ctxt_t *ps_app_ctxt, UWORD32 u4_timestamp_low, UWORD32 u4_timestamp_high, + IV_PICTURE_CODING_TYPE_T e_frame_type) +{ + isvce_ctl_set_frame_type_ip_t s_frame_type_ip; + isvce_ctl_set_frame_type_op_t s_frame_type_op; + + IV_STATUS_T status; + + isvce_api_cmds_t s_api_cmds = {ISVCE_CMD_VIDEO_CTL, ISVCE_CMD_CTL_SET_FRAMETYPE}; + + s_frame_type_ip.s_ive_ip.e_frame_type = e_frame_type; + + s_frame_type_ip.s_ive_ip.u4_timestamp_high = u4_timestamp_high; + s_frame_type_ip.s_ive_ip.u4_timestamp_low = u4_timestamp_low; + + s_frame_type_ip.s_ive_ip.u4_size = sizeof(isvce_ctl_set_frame_type_ip_t); + s_frame_type_op.s_ive_op.u4_size = sizeof(isvce_ctl_set_frame_type_op_t); + + status = + isvce_api_function(ps_app_ctxt->ps_enc, &s_frame_type_ip, &s_frame_type_op, &s_api_cmds); + if(status != IV_SUCCESS) + { + CHAR ac_error[STRLENGTH]; + snprintf(ac_error, sizeof(ac_error) - 1, "Unable to set frame type = 0x%x\n", + s_frame_type_op.s_ive_op.u4_error_code); + codec_exit(ac_error); + } +} + +void set_qp(app_ctxt_t *ps_app_ctxt, UWORD32 u4_timestamp_low, UWORD32 u4_timestamp_high) +{ + isvce_ctl_set_qp_ip_t s_qp_ip; + isvce_ctl_set_qp_op_t s_qp_op; + + IV_STATUS_T status; + WORD8 i; + + isvce_api_cmds_t s_api_cmds = {ISVCE_CMD_VIDEO_CTL, ISVCE_CMD_CTL_SET_QP}; + + s_qp_ip.pu4_i_qp = + isvca_aligned_malloc(16, sizeof(UWORD32) * ps_app_ctxt->u1_num_spatial_layers); + s_qp_ip.pu4_i_qp_max = + isvca_aligned_malloc(16, sizeof(UWORD32) * ps_app_ctxt->u1_num_spatial_layers); + s_qp_ip.pu4_i_qp_min = + isvca_aligned_malloc(16, sizeof(UWORD32) * ps_app_ctxt->u1_num_spatial_layers); + + s_qp_ip.pu4_p_qp = + isvca_aligned_malloc(16, sizeof(UWORD32) * ps_app_ctxt->u1_num_spatial_layers); + s_qp_ip.pu4_p_qp_max = + isvca_aligned_malloc(16, sizeof(UWORD32) * ps_app_ctxt->u1_num_spatial_layers); + s_qp_ip.pu4_p_qp_min = + isvca_aligned_malloc(16, sizeof(UWORD32) * ps_app_ctxt->u1_num_spatial_layers); + + s_qp_ip.pu4_b_qp = + isvca_aligned_malloc(16, sizeof(UWORD32) * ps_app_ctxt->u1_num_spatial_layers); + s_qp_ip.pu4_b_qp_max = + isvca_aligned_malloc(16, sizeof(UWORD32) * ps_app_ctxt->u1_num_spatial_layers); + s_qp_ip.pu4_b_qp_min = + isvca_aligned_malloc(16, sizeof(UWORD32) * ps_app_ctxt->u1_num_spatial_layers); + + for(i = 0; i < ps_app_ctxt->u1_num_spatial_layers; i++) + { + s_qp_ip.pu4_i_qp[i] = ps_app_ctxt->pu4_i_qp[i]; + s_qp_ip.pu4_i_qp_max[i] = ps_app_ctxt->pu4_i_qp_max[i]; + s_qp_ip.pu4_i_qp_min[i] = ps_app_ctxt->pu4_i_qp_min[i]; + + s_qp_ip.pu4_p_qp[i] = ps_app_ctxt->pu4_p_qp[i]; + s_qp_ip.pu4_p_qp_max[i] = ps_app_ctxt->pu4_p_qp_max[i]; + s_qp_ip.pu4_p_qp_min[i] = ps_app_ctxt->pu4_p_qp_min[i]; + + s_qp_ip.pu4_b_qp[i] = ps_app_ctxt->pu4_b_qp[i]; + s_qp_ip.pu4_b_qp_max[i] = ps_app_ctxt->pu4_b_qp_max[i]; + s_qp_ip.pu4_b_qp_min[i] = ps_app_ctxt->pu4_b_qp_min[i]; + } + + s_qp_ip.s_ive_ip.u4_timestamp_high = u4_timestamp_high; + s_qp_ip.s_ive_ip.u4_timestamp_low = u4_timestamp_low; + + s_qp_ip.s_ive_ip.u4_size = sizeof(isvce_ctl_set_qp_ip_t); + s_qp_op.s_ive_op.u4_size = sizeof(isvce_ctl_set_qp_op_t); + + status = isvce_api_function(ps_app_ctxt->ps_enc, &s_qp_ip, &s_qp_op, &s_api_cmds); + + isvca_aligned_free(s_qp_ip.pu4_i_qp); + isvca_aligned_free(s_qp_ip.pu4_i_qp_min); + isvca_aligned_free(s_qp_ip.pu4_i_qp_max); + + isvca_aligned_free(s_qp_ip.pu4_p_qp); + isvca_aligned_free(s_qp_ip.pu4_p_qp_min); + isvca_aligned_free(s_qp_ip.pu4_p_qp_max); + + isvca_aligned_free(s_qp_ip.pu4_b_qp); + isvca_aligned_free(s_qp_ip.pu4_b_qp_min); + isvca_aligned_free(s_qp_ip.pu4_b_qp_max); + + if(status != IV_SUCCESS) + { + CHAR ac_error[STRLENGTH]; + snprintf(ac_error, sizeof(ac_error) - 1, "Unable to set qp 0x%x\n", + s_qp_op.s_ive_op.u4_error_code); + codec_exit(ac_error); + } +} + +void set_enc_mode(app_ctxt_t *ps_app_ctxt, UWORD32 u4_timestamp_low, UWORD32 u4_timestamp_high, + IVE_ENC_MODE_T e_enc_mode) +{ + IV_STATUS_T status; + + isvce_ctl_set_enc_mode_ip_t s_enc_mode_ip; + isvce_ctl_set_enc_mode_op_t s_enc_mode_op; + + isvce_api_cmds_t s_api_cmds = {ISVCE_CMD_VIDEO_CTL, ISVCE_CMD_CTL_SET_ENC_MODE}; + + s_enc_mode_ip.s_ive_ip.e_enc_mode = e_enc_mode; + + s_enc_mode_ip.s_ive_ip.u4_timestamp_high = u4_timestamp_high; + s_enc_mode_ip.s_ive_ip.u4_timestamp_low = u4_timestamp_low; + + s_enc_mode_ip.s_ive_ip.u4_size = sizeof(isvce_ctl_set_enc_mode_ip_t); + s_enc_mode_op.s_ive_op.u4_size = sizeof(isvce_ctl_set_enc_mode_op_t); + + status = isvce_api_function(ps_app_ctxt->ps_enc, &s_enc_mode_ip, &s_enc_mode_op, &s_api_cmds); + if(status != IV_SUCCESS) + { + CHAR ac_error[STRLENGTH]; + snprintf(ac_error, sizeof(ac_error) - 1, "Unable to set in header encode mode = 0x%x\n", + s_enc_mode_op.s_ive_op.u4_error_code); + codec_exit(ac_error); + } +} + +void set_vbv_params(app_ctxt_t *ps_app_ctxt, UWORD32 u4_timestamp_low, UWORD32 u4_timestamp_high) +{ + isvce_ctl_set_vbv_params_ip_t s_vbv_ip; + isvce_ctl_set_vbv_params_op_t s_vbv_op; + + IV_STATUS_T status; + int i; + + isvce_api_cmds_t s_api_cmds = {ISVCE_CMD_VIDEO_CTL, ISVCE_CMD_CTL_SET_VBV_PARAMS}; + + s_vbv_ip.pu4_vbv_buffer_delay = + isvca_aligned_malloc(16, sizeof(UWORD32) * ps_app_ctxt->u1_num_spatial_layers); + + for(i = 0; i < ps_app_ctxt->u1_num_spatial_layers; i++) + { + s_vbv_ip.pu4_vbv_buffer_delay[i] = ps_app_ctxt->pu4_vbv_buffer_delay[i]; + } + + s_vbv_ip.s_ive_ip.u4_timestamp_high = u4_timestamp_high; + s_vbv_ip.s_ive_ip.u4_timestamp_low = u4_timestamp_low; + + s_vbv_ip.s_ive_ip.u4_size = sizeof(isvce_ctl_set_vbv_params_ip_t); + s_vbv_op.s_ive_op.u4_size = sizeof(isvce_ctl_set_vbv_params_op_t); + + status = isvce_api_function(ps_app_ctxt->ps_enc, &s_vbv_ip, &s_vbv_op, &s_api_cmds); + + isvca_aligned_free(s_vbv_ip.pu4_vbv_buffer_delay); + + if(status != IV_SUCCESS) + { + CHAR ac_error[STRLENGTH]; + snprintf(ac_error, sizeof(ac_error) - 1, "Unable to set VBC params = 0x%x\n", + s_vbv_op.s_ive_op.u4_error_code); + codec_exit(ac_error); + } +} + +void set_air_params(app_ctxt_t *ps_app_ctxt, UWORD32 u4_timestamp_low, UWORD32 u4_timestamp_high) +{ + isvce_ctl_set_air_params_ip_t s_air_ip; + isvce_ctl_set_air_params_op_t s_air_op; + + IV_STATUS_T status; + + isvce_api_cmds_t s_api_cmds = {ISVCE_CMD_VIDEO_CTL, ISVCE_CMD_CTL_SET_AIR_PARAMS}; + + s_air_ip.s_ive_ip.e_air_mode = ps_app_ctxt->u4_air; + s_air_ip.s_ive_ip.u4_air_refresh_period = ps_app_ctxt->u4_air_refresh_period; + + s_air_ip.s_ive_ip.u4_timestamp_high = u4_timestamp_high; + s_air_ip.s_ive_ip.u4_timestamp_low = u4_timestamp_low; + + s_air_ip.s_ive_ip.u4_size = sizeof(isvce_ctl_set_air_params_ip_t); + s_air_op.s_ive_op.u4_size = sizeof(isvce_ctl_set_air_params_op_t); + + status = isvce_api_function(ps_app_ctxt->ps_enc, &s_air_ip, &s_air_op, &s_api_cmds); + + if(status != IV_SUCCESS) + { + CHAR ac_error[STRLENGTH]; + snprintf(ac_error, sizeof(ac_error) - 1, "Unable to set air params = 0x%x\n", + s_air_op.s_ive_op.u4_error_code); + codec_exit(ac_error); + } +} + +void set_me_params(app_ctxt_t *ps_app_ctxt, UWORD32 u4_timestamp_low, UWORD32 u4_timestamp_high) +{ + IV_STATUS_T status; + + isvce_ctl_set_me_params_ip_t s_me_params_ip; + isvce_ctl_set_me_params_op_t s_me_params_op; + + isvce_api_cmds_t s_api_cmds = {ISVCE_CMD_VIDEO_CTL, ISVCE_CMD_CTL_SET_ME_PARAMS}; + + s_me_params_ip.s_ive_ip.u4_enable_fast_sad = ps_app_ctxt->u4_enable_fast_sad; + s_me_params_ip.s_ive_ip.u4_enable_alt_ref = ps_app_ctxt->u4_enable_alt_ref; + + s_me_params_ip.s_ive_ip.u4_enable_hpel = ps_app_ctxt->u4_hpel; + s_me_params_ip.s_ive_ip.u4_enable_qpel = ps_app_ctxt->u4_qpel; + s_me_params_ip.s_ive_ip.u4_me_speed_preset = ps_app_ctxt->u4_me_speed; + s_me_params_ip.s_ive_ip.u4_srch_rng_x = ps_app_ctxt->u4_srch_rng_x; + s_me_params_ip.s_ive_ip.u4_srch_rng_y = ps_app_ctxt->u4_srch_rng_y; + + s_me_params_ip.s_ive_ip.u4_timestamp_high = u4_timestamp_high; + s_me_params_ip.s_ive_ip.u4_timestamp_low = u4_timestamp_low; + + s_me_params_ip.s_ive_ip.u4_size = sizeof(isvce_ctl_set_me_params_ip_t); + s_me_params_op.s_ive_op.u4_size = sizeof(isvce_ctl_set_me_params_op_t); + + status = isvce_api_function(ps_app_ctxt->ps_enc, &s_me_params_ip, &s_me_params_op, &s_api_cmds); + + if(status != IV_SUCCESS) + { + CHAR ac_error[STRLENGTH]; + snprintf(ac_error, sizeof(ac_error) - 1, "Unable to set me params = 0x%x\n", + s_me_params_op.s_ive_op.u4_error_code); + codec_exit(ac_error); + } +} + +void set_gop_params(app_ctxt_t *ps_app_ctxt, UWORD32 u4_timestamp_low, UWORD32 u4_timestamp_high) +{ + IV_STATUS_T status; + + isvce_ctl_set_gop_params_ip_t s_gop_params_ip; + isvce_ctl_set_gop_params_op_t s_gop_params_op; + + isvce_api_cmds_t s_api_cmds = {ISVCE_CMD_VIDEO_CTL, ISVCE_CMD_CTL_SET_GOP_PARAMS}; + + s_gop_params_ip.s_ive_ip.u4_i_frm_interval = ps_app_ctxt->u4_i_interval; + s_gop_params_ip.s_ive_ip.u4_idr_frm_interval = ps_app_ctxt->u4_idr_interval; + + s_gop_params_ip.s_ive_ip.u4_timestamp_high = u4_timestamp_high; + s_gop_params_ip.s_ive_ip.u4_timestamp_low = u4_timestamp_low; + + s_gop_params_ip.s_ive_ip.u4_size = sizeof(isvce_ctl_set_gop_params_ip_t); + s_gop_params_op.s_ive_op.u4_size = sizeof(isvce_ctl_set_gop_params_op_t); + + status = + isvce_api_function(ps_app_ctxt->ps_enc, &s_gop_params_ip, &s_gop_params_op, &s_api_cmds); + if(status != IV_SUCCESS) + { + CHAR ac_error[STRLENGTH]; + snprintf(ac_error, sizeof(ac_error) - 1, "Unable to set ME params = 0x%x\n", + s_gop_params_op.s_ive_op.u4_error_code); + codec_exit(ac_error); + } +} + +void set_profile_params(app_ctxt_t *ps_app_ctxt, UWORD32 u4_timestamp_low, + UWORD32 u4_timestamp_high) +{ + IV_STATUS_T status; + + isvce_ctl_set_profile_params_ip_t s_profile_params_ip; + isvce_ctl_set_profile_params_op_t s_profile_params_op; + + isvce_api_cmds_t s_api_cmds = {ISVCE_CMD_VIDEO_CTL, ISVCE_CMD_CTL_SET_PROFILE_PARAMS}; + + s_profile_params_ip.s_ive_ip.e_profile = ps_app_ctxt->e_profile; + + s_profile_params_ip.s_ive_ip.u4_entropy_coding_mode = ps_app_ctxt->u4_entropy_coding_mode; + + s_profile_params_ip.s_ive_ip.u4_timestamp_high = u4_timestamp_high; + s_profile_params_ip.s_ive_ip.u4_timestamp_low = u4_timestamp_low; + + s_profile_params_ip.s_ive_ip.u4_size = sizeof(isvce_ctl_set_profile_params_ip_t); + s_profile_params_op.s_ive_op.u4_size = sizeof(isvce_ctl_set_profile_params_op_t); + + status = isvce_api_function(ps_app_ctxt->ps_enc, &s_profile_params_ip, &s_profile_params_op, + &s_api_cmds); + if(status != IV_SUCCESS) + { + CHAR ac_error[STRLENGTH]; + snprintf(ac_error, sizeof(ac_error) - 1, "Unable to set profile params = 0x%x\n", + s_profile_params_op.s_ive_op.u4_error_code); + codec_exit(ac_error); + } +} + +void set_deblock_params(app_ctxt_t *ps_app_ctxt, UWORD32 u4_timestamp_low, + UWORD32 u4_timestamp_high) +{ + IV_STATUS_T status; + + isvce_ctl_set_deblock_params_ip_t s_deblock_params_ip; + isvce_ctl_set_deblock_params_op_t s_deblock_params_op; + + isvce_api_cmds_t s_api_cmds = {ISVCE_CMD_VIDEO_CTL, ISVCE_CMD_CTL_SET_DEBLOCK_PARAMS}; + + s_deblock_params_ip.s_ive_ip.u4_disable_deblock_level = ps_app_ctxt->u4_disable_deblock_level; + + s_deblock_params_ip.s_ive_ip.u4_timestamp_high = u4_timestamp_high; + s_deblock_params_ip.s_ive_ip.u4_timestamp_low = u4_timestamp_low; + + s_deblock_params_ip.s_ive_ip.u4_size = sizeof(isvce_ctl_set_deblock_params_ip_t); + s_deblock_params_op.s_ive_op.u4_size = sizeof(isvce_ctl_set_deblock_params_op_t); + + status = isvce_api_function(ps_app_ctxt->ps_enc, &s_deblock_params_ip, &s_deblock_params_op, + &s_api_cmds); + if(status != IV_SUCCESS) + { + CHAR ac_error[STRLENGTH]; + snprintf(ac_error, sizeof(ac_error) - 1, "Unable to enable/disable deblock params = 0x%x\n", + s_deblock_params_op.s_ive_op.u4_error_code); + codec_exit(ac_error); + } +} + +void set_vui_params(app_ctxt_t *ps_app_ctxt) +{ + IV_STATUS_T status; + + isvce_vui_ip_t s_vui_params_ip; + isvce_vui_op_t s_vui_params_op; + + isvce_api_cmds_t s_api_cmds = {ISVCE_CMD_VIDEO_CTL, ISVCE_CMD_CTL_SET_VUI_PARAMS}; + + s_vui_params_ip.u1_aspect_ratio_info_present_flag = 0; + s_vui_params_ip.u1_aspect_ratio_idc = 0; + s_vui_params_ip.u2_sar_width = 0; + s_vui_params_ip.u2_sar_height = 0; + s_vui_params_ip.u1_overscan_info_present_flag = 0; + s_vui_params_ip.u1_overscan_appropriate_flag = 0; + s_vui_params_ip.u1_video_signal_type_present_flag = 1; + s_vui_params_ip.u1_video_format = 0; + s_vui_params_ip.u1_video_full_range_flag = 0; + s_vui_params_ip.u1_colour_description_present_flag = 0; + s_vui_params_ip.u1_colour_primaries = 0; + s_vui_params_ip.u1_transfer_characteristics = 0; + s_vui_params_ip.u1_matrix_coefficients = 0; + s_vui_params_ip.u1_chroma_loc_info_present_flag = 0; + s_vui_params_ip.u1_chroma_sample_loc_type_top_field = 0; + s_vui_params_ip.u1_chroma_sample_loc_type_bottom_field = 0; + s_vui_params_ip.u1_vui_timing_info_present_flag = 0; + s_vui_params_ip.u4_vui_num_units_in_tick = 0; + s_vui_params_ip.u4_vui_time_scale = 0; + s_vui_params_ip.u1_fixed_frame_rate_flag = 0; + s_vui_params_ip.u1_nal_hrd_parameters_present_flag = 0; + s_vui_params_ip.u1_vcl_hrd_parameters_present_flag = 0; + s_vui_params_ip.u1_low_delay_hrd_flag = 0; + s_vui_params_ip.u1_pic_struct_present_flag = 0; + s_vui_params_ip.u1_bitstream_restriction_flag = 0; + s_vui_params_ip.u1_motion_vectors_over_pic_boundaries_flag = 0; + s_vui_params_ip.u1_max_bytes_per_pic_denom = 0; + s_vui_params_ip.u1_max_bits_per_mb_denom = 0; + s_vui_params_ip.u1_log2_max_mv_length_horizontal = 0; + s_vui_params_ip.u1_log2_max_mv_length_vertical = 0; + s_vui_params_ip.u1_num_reorder_frames = 0; + s_vui_params_ip.u1_max_dec_frame_buffering = 0; + + s_vui_params_ip.u4_size = sizeof(isvce_vui_ip_t); + s_vui_params_op.u4_size = sizeof(isvce_vui_op_t); + + status = + isvce_api_function(ps_app_ctxt->ps_enc, &s_vui_params_ip, &s_vui_params_op, &s_api_cmds); + if(status != IV_SUCCESS) + { + CHAR ac_error[STRLENGTH]; + snprintf(ac_error, sizeof(ac_error) - 1, "Unable to set vui params = 0x%x\n", + s_vui_params_op.u4_error_code); + codec_exit(ac_error); + } +} + +void set_sei_mdcv_params(app_ctxt_t *ps_app_ctxt, UWORD32 u4_timestamp_low, + UWORD32 u4_timestamp_high) +{ + WORD32 i4_count; + IV_STATUS_T status; + + isvce_ctl_set_sei_mdcv_params_ip_t s_sei_mdcv_params_ip; + isvce_ctl_set_sei_mdcv_params_op_t s_sei_mdcv_params_op; + + isvce_api_cmds_t s_api_cmds = {ISVCE_CMD_VIDEO_CTL, ISVCE_CMD_CTL_SET_SEI_MDCV_PARAMS}; + + s_sei_mdcv_params_ip.u1_sei_mdcv_params_present_flag = + (UWORD8) ps_app_ctxt->u4_sei_mdcv_params_present_flag; + + for(i4_count = 0; i4_count < NUM_SEI_MDCV_PRIMARIES; i4_count++) + { + s_sei_mdcv_params_ip.au2_display_primaries_x[i4_count] = + (UWORD16) ps_app_ctxt->au4_display_primaries_x[i4_count]; + s_sei_mdcv_params_ip.au2_display_primaries_y[i4_count] = + (UWORD16) ps_app_ctxt->au4_display_primaries_y[i4_count]; + } + + s_sei_mdcv_params_ip.u2_white_point_x = (UWORD16) ps_app_ctxt->u4_white_point_x; + s_sei_mdcv_params_ip.u2_white_point_y = (UWORD16) ps_app_ctxt->u4_white_point_y; + s_sei_mdcv_params_ip.u4_max_display_mastering_luminance = + ps_app_ctxt->u4_max_display_mastering_luminance; + s_sei_mdcv_params_ip.u4_min_display_mastering_luminance = + ps_app_ctxt->u4_min_display_mastering_luminance; + + s_sei_mdcv_params_ip.u4_timestamp_high = u4_timestamp_high; + s_sei_mdcv_params_ip.u4_timestamp_low = u4_timestamp_low; + + s_sei_mdcv_params_ip.u4_size = sizeof(isvce_ctl_set_sei_mdcv_params_ip_t); + s_sei_mdcv_params_op.u4_size = sizeof(isvce_ctl_set_sei_mdcv_params_op_t); + + if((ps_app_ctxt->s_sei_mdcv_params.au2_display_primaries_x[0] != + s_sei_mdcv_params_ip.au2_display_primaries_x[0]) || + (ps_app_ctxt->s_sei_mdcv_params.au2_display_primaries_x[1] != + s_sei_mdcv_params_ip.au2_display_primaries_x[1]) || + (ps_app_ctxt->s_sei_mdcv_params.au2_display_primaries_x[2] != + s_sei_mdcv_params_ip.au2_display_primaries_x[2]) || + (ps_app_ctxt->s_sei_mdcv_params.au2_display_primaries_y[0] != + s_sei_mdcv_params_ip.au2_display_primaries_y[0]) || + (ps_app_ctxt->s_sei_mdcv_params.au2_display_primaries_y[1] != + s_sei_mdcv_params_ip.au2_display_primaries_y[1]) || + (ps_app_ctxt->s_sei_mdcv_params.au2_display_primaries_y[2] != + s_sei_mdcv_params_ip.au2_display_primaries_y[2]) || + (ps_app_ctxt->s_sei_mdcv_params.u2_white_point_x != s_sei_mdcv_params_ip.u2_white_point_x) || + (ps_app_ctxt->s_sei_mdcv_params.u2_white_point_y != s_sei_mdcv_params_ip.u2_white_point_x) || + (ps_app_ctxt->s_sei_mdcv_params.u4_max_display_mastering_luminance != + s_sei_mdcv_params_ip.u4_max_display_mastering_luminance) || + (ps_app_ctxt->s_sei_mdcv_params.u4_min_display_mastering_luminance != + s_sei_mdcv_params_ip.u4_min_display_mastering_luminance)) + { + status = isvce_api_function(ps_app_ctxt->ps_enc, &s_sei_mdcv_params_ip, + &s_sei_mdcv_params_op, &s_api_cmds); + if(status != IV_SUCCESS) + { + printf("Unable to set sei mdcv params = 0x%x\n", s_sei_mdcv_params_op.u4_error_code); + } + ps_app_ctxt->s_sei_mdcv_params = s_sei_mdcv_params_ip; + } +} + +void set_sei_cll_params(app_ctxt_t *ps_app_ctxt, UWORD32 u4_timestamp_low, + UWORD32 u4_timestamp_high) +{ + IV_STATUS_T status; + + isvce_ctl_set_sei_cll_params_ip_t s_sei_cll_params_ip; + isvce_ctl_set_sei_cll_params_op_t s_sei_cll_params_op; + + isvce_api_cmds_t s_api_cmds = {ISVCE_CMD_VIDEO_CTL, ISVCE_CMD_CTL_SET_SEI_CLL_PARAMS}; + + s_sei_cll_params_ip.u1_sei_cll_params_present_flag = + (UWORD8) ps_app_ctxt->u4_sei_cll_params_present_flag; + + s_sei_cll_params_ip.u2_max_content_light_level = + (UWORD16) ps_app_ctxt->u4_max_content_light_level; + s_sei_cll_params_ip.u2_max_pic_average_light_level = + (UWORD16) ps_app_ctxt->u4_max_pic_average_light_level; + + s_sei_cll_params_ip.u4_timestamp_high = u4_timestamp_high; + s_sei_cll_params_ip.u4_timestamp_low = u4_timestamp_low; + + s_sei_cll_params_ip.u4_size = sizeof(isvce_ctl_set_sei_cll_params_ip_t); + s_sei_cll_params_op.u4_size = sizeof(isvce_ctl_set_sei_cll_params_op_t); + + if((ps_app_ctxt->s_sei_cll_params.u2_max_content_light_level != + s_sei_cll_params_ip.u2_max_content_light_level) || + (ps_app_ctxt->s_sei_cll_params.u2_max_pic_average_light_level != + s_sei_cll_params_ip.u2_max_pic_average_light_level)) + { + status = isvce_api_function(ps_app_ctxt->ps_enc, &s_sei_cll_params_ip, &s_sei_cll_params_op, + &s_api_cmds); + if(status != IV_SUCCESS) + { + printf("Unable to set sei cll params = 0x%x\n", s_sei_cll_params_op.u4_error_code); + } + ps_app_ctxt->s_sei_cll_params = s_sei_cll_params_ip; + } +} + +void set_sei_ave_params(app_ctxt_t *ps_app_ctxt, UWORD32 u4_timestamp_low, + UWORD32 u4_timestamp_high) +{ + IV_STATUS_T status; + + isvce_ctl_set_sei_ave_params_ip_t s_sei_ave_params_ip; + isvce_ctl_set_sei_ave_params_op_t s_sei_ave_params_op; + + isvce_api_cmds_t s_api_cmds = {ISVCE_CMD_VIDEO_CTL, ISVCE_CMD_CTL_SET_SEI_AVE_PARAMS}; + + s_sei_ave_params_ip.u1_sei_ave_params_present_flag = + (UWORD8) ps_app_ctxt->u4_sei_ave_params_present_flag; + + s_sei_ave_params_ip.u4_ambient_illuminance = ps_app_ctxt->u4_ambient_illuminance; + s_sei_ave_params_ip.u2_ambient_light_x = (UWORD16) ps_app_ctxt->u4_ambient_light_x; + s_sei_ave_params_ip.u2_ambient_light_y = (UWORD16) ps_app_ctxt->u4_ambient_light_y; + + s_sei_ave_params_ip.u4_timestamp_high = u4_timestamp_high; + s_sei_ave_params_ip.u4_timestamp_low = u4_timestamp_low; + + s_sei_ave_params_ip.u4_size = sizeof(isvce_ctl_set_sei_ave_params_ip_t); + s_sei_ave_params_op.u4_size = sizeof(isvce_ctl_set_sei_ave_params_op_t); + + if((ps_app_ctxt->s_sei_ave_params.u4_ambient_illuminance != + s_sei_ave_params_ip.u4_ambient_illuminance) || + (ps_app_ctxt->s_sei_ave_params.u2_ambient_light_x != + s_sei_ave_params_ip.u2_ambient_light_x) || + (ps_app_ctxt->s_sei_ave_params.u2_ambient_light_y != s_sei_ave_params_ip.u2_ambient_light_y)) + { + status = isvce_api_function(ps_app_ctxt->ps_enc, &s_sei_ave_params_ip, &s_sei_ave_params_op, + &s_api_cmds); + if(status != IV_SUCCESS) + { + printf("Unable to set sei ave params = 0x%x\n", s_sei_ave_params_op.u4_error_code); + } + ps_app_ctxt->s_sei_ave_params = s_sei_ave_params_ip; + } +} + +void set_sei_ccv_params(app_ctxt_t *ps_app_ctxt, UWORD32 u4_timestamp_low, + UWORD32 u4_timestamp_high) +{ + WORD32 i4_count; + IV_STATUS_T status; + + isvce_ctl_set_sei_ccv_params_ip_t s_sei_ccv_params_ip; + isvce_ctl_set_sei_ccv_params_op_t s_sei_ccv_params_op; + + isvce_api_cmds_t s_api_cmds = {ISVCE_CMD_VIDEO_CTL, ISVCE_CMD_CTL_SET_SEI_CCV_PARAMS}; + + s_sei_ccv_params_ip.u1_sei_ccv_params_present_flag = + (UWORD8) ps_app_ctxt->u4_sei_ccv_params_present_flag; + + s_sei_ccv_params_ip.u1_ccv_cancel_flag = (UWORD8) ps_app_ctxt->u4_ccv_cancel_flag; + s_sei_ccv_params_ip.u1_ccv_persistence_flag = (UWORD8) ps_app_ctxt->u4_ccv_persistence_flag; + s_sei_ccv_params_ip.u1_ccv_primaries_present_flag = + (UWORD8) ps_app_ctxt->u4_ccv_primaries_present_flag; + s_sei_ccv_params_ip.u1_ccv_min_luminance_value_present_flag = + (UWORD8) ps_app_ctxt->u4_ccv_min_luminance_value_present_flag; + s_sei_ccv_params_ip.u1_ccv_max_luminance_value_present_flag = + (UWORD8) ps_app_ctxt->u4_ccv_max_luminance_value_present_flag; + s_sei_ccv_params_ip.u1_ccv_avg_luminance_value_present_flag = + (UWORD8) ps_app_ctxt->u4_ccv_avg_luminance_value_present_flag; + s_sei_ccv_params_ip.u1_ccv_reserved_zero_2bits = + (UWORD8) ps_app_ctxt->u4_ccv_reserved_zero_2bits; + + for(i4_count = 0; i4_count < NUM_SEI_CCV_PRIMARIES; i4_count++) + { + s_sei_ccv_params_ip.ai4_ccv_primaries_x[i4_count] = + ps_app_ctxt->ai4_ccv_primaries_x[i4_count]; + s_sei_ccv_params_ip.ai4_ccv_primaries_y[i4_count] = + ps_app_ctxt->ai4_ccv_primaries_y[i4_count]; + } + + s_sei_ccv_params_ip.u4_ccv_min_luminance_value = ps_app_ctxt->u4_ccv_min_luminance_value; + s_sei_ccv_params_ip.u4_ccv_max_luminance_value = ps_app_ctxt->u4_ccv_max_luminance_value; + s_sei_ccv_params_ip.u4_ccv_avg_luminance_value = ps_app_ctxt->u4_ccv_avg_luminance_value; + + s_sei_ccv_params_ip.u4_timestamp_high = u4_timestamp_high; + s_sei_ccv_params_ip.u4_timestamp_low = u4_timestamp_low; + + s_sei_ccv_params_ip.u4_size = sizeof(isvce_ctl_set_sei_ccv_params_ip_t); + s_sei_ccv_params_op.u4_size = sizeof(isvce_ctl_set_sei_ccv_params_op_t); + + status = isvce_api_function(ps_app_ctxt->ps_enc, &s_sei_ccv_params_ip, &s_sei_ccv_params_op, + &s_api_cmds); + if(status != IV_SUCCESS) + { + printf("Unable to set sei ccv params = 0x%x\n", s_sei_ccv_params_op.u4_error_code); + } +} + +static void allocate_nalu_info_bufs(app_ctxt_t *ps_app_ctxt) +{ + UWORD32 i; + + UWORD32 u4_num_bufs = + sizeof(ps_app_ctxt->as_nalu_info_bufs) / sizeof(ps_app_ctxt->as_nalu_info_bufs[0]); + + for(i = 0; i < u4_num_bufs; i++) + { + ps_app_ctxt->as_nalu_info_bufs[i].u4_buf_size = + ps_app_ctxt->s_get_buf_info_op.u4_min_nalu_info_buf_size; + ps_app_ctxt->as_nalu_info_bufs[i].b_is_free = true; + + ps_app_ctxt->as_nalu_info_bufs[i].pu1_buf = + (UWORD8 *) isvca_aligned_malloc(16, ps_app_ctxt->as_nalu_info_bufs[i].u4_buf_size); + } +} + +static void free_nalu_info_bufs(app_ctxt_t *ps_app_ctxt) +{ + UWORD32 i; + + UWORD32 u4_num_bufs = + sizeof(ps_app_ctxt->as_nalu_info_bufs) / sizeof(ps_app_ctxt->as_nalu_info_bufs[0]); + + for(i = 0; i < u4_num_bufs; i++) + { + isvca_aligned_free(ps_app_ctxt->as_nalu_info_bufs[i].pu1_buf); + ps_app_ctxt->as_nalu_info_bufs[i].b_is_free = false; + } +} + +#define PEAK_WINDOW_SIZE 8 + +void synchronous_encode(iv_obj_t *ps_enc, app_ctxt_t *ps_app_ctxt) +{ + isvce_video_encode_ip_t s_video_encode_ip; + isvce_video_encode_op_t s_video_encode_op; + + ive_video_encode_ip_t *ps_video_encode_ip = &s_video_encode_ip.s_ive_ip; + ive_video_encode_op_t *ps_video_encode_op = &s_video_encode_op.s_ive_op; + + iv_raw_buf_t *ps_inp_raw_buf = &ps_video_encode_ip->s_inp_buf; + + IV_STATUS_T status = IV_SUCCESS; + isvce_api_cmds_t s_api_cmds = {ISVCE_CMD_VIDEO_ENCODE, ISVCE_CMD_CT_NA}; + + WORD32 i, is_last = 0, num_bytes = 0; + UWORD32 u4_total_time = 0; + UWORD8 *pu1_buf = NULL; + UWORD32 u4_timestamp_low, u4_timestamp_high; + void *pv_mb_info = NULL, *pv_pic_info = NULL; + WORD32 *pi4_nalu_info_buf_ids = isvca_aligned_malloc( + 16, ps_app_ctxt->u1_num_spatial_layers * sizeof(pi4_nalu_info_buf_ids[0])); + +#ifdef PROFILE_ENABLE + TIMER curtime; + WORD32 peak_window[PEAK_WINDOW_SIZE] = {0}; + WORD32 peak_window_idx = 0; +#endif + WORD32 peak_avg_max = 0, timetaken = 0; + iv_raw_buf_t s_inp_buf, s_recon_buf; + CHAR ac_error[2 * STRLENGTH]; + + u4_timestamp_low = 0; + u4_timestamp_high = 0; + + /*************************************************************************/ + /* Allocate I/O Buffers */ + /*************************************************************************/ + allocate_input(ps_app_ctxt); + allocate_output(ps_app_ctxt); + allocate_recon(ps_app_ctxt); + allocate_nalu_info_bufs(ps_app_ctxt); + + s_video_encode_ip.ps_nalu_info_buf = isvca_aligned_malloc( + 16, sizeof(isvce_nalu_info_buf_t) * ps_app_ctxt->u1_num_spatial_layers); + s_video_encode_op.ps_nalu_info_buf = isvca_aligned_malloc( + 16, sizeof(isvce_nalu_info_buf_t) * ps_app_ctxt->u1_num_spatial_layers); + + /* init psnr */ + init_psnr(ps_app_ctxt); + + /* open file pointers */ + ps_app_ctxt->fp_ip = fopen(ps_app_ctxt->ac_ip_fname, "rb"); + if(NULL == ps_app_ctxt->fp_ip) + { + snprintf(ac_error, sizeof(ac_error) - 1, "Unable to open input file for reading: %s", + ps_app_ctxt->ac_ip_fname); + invalid_argument_exit(ac_error); + } + + ps_app_ctxt->fp_op = fopen(ps_app_ctxt->ac_op_fname, "wb"); + if(NULL == ps_app_ctxt->fp_op) + { + snprintf(ac_error, sizeof(ac_error) - 1, "Unable to open output file for writing: %s", + ps_app_ctxt->ac_op_fname); + invalid_argument_exit(ac_error); + } + + if(1 == ps_app_ctxt->u4_recon_enable) + { + ps_app_ctxt->fp_recon = fopen(ps_app_ctxt->ac_recon_fname, "wb"); + if(NULL == ps_app_ctxt->fp_recon) + { + snprintf(ac_error, sizeof(ac_error) - 1, "Unable to open recon file for writing: %s", + ps_app_ctxt->ac_recon_fname); + invalid_argument_exit(ac_error); + } + } + + if(1 == ps_app_ctxt->u4_nalu_info_export_enable) + { + ps_app_ctxt->fp_nalu_info = fopen(ps_app_ctxt->ac_nalu_info_csv_fname, "w"); + + if(NULL == ps_app_ctxt->fp_nalu_info) + { + snprintf(ac_error, sizeof(ac_error) - 1, + "Unable to open NALU Info CSV file for writing: %s", + ps_app_ctxt->ac_nalu_info_csv_fname); + invalid_argument_exit(ac_error); + } + + fprintf(ps_app_ctxt->fp_nalu_info, + "type,length,SId,TID,isIDR,isFirstSliceInLayer,isLastSliceInLayer\n"); + } + + if(1 == ps_app_ctxt->u4_chksum_enable) + { + ps_app_ctxt->fp_chksum = fopen(ps_app_ctxt->ac_chksum_fname, "wb"); + if(NULL == ps_app_ctxt->fp_chksum) + { + snprintf(ac_error, sizeof(ac_error) - 1, "Unable to open checksum file for writing: %s", + ps_app_ctxt->ac_chksum_fname); + invalid_argument_exit(ac_error); + } + } + + /* If PSNR is enabled, open input file again and hold a different file pointer + * This makes it easy to compute PSNR without adding dependency between input + * and recon threads + */ + if(1 == ps_app_ctxt->u4_psnr_enable) + { + ps_app_ctxt->fp_psnr_ip = fopen(ps_app_ctxt->ac_ip_fname, "rb"); + if(NULL == ps_app_ctxt->fp_psnr_ip) + { + snprintf(ac_error, sizeof(ac_error) - 1, "Unable to open input file for reading: %s", + ps_app_ctxt->ac_ip_fname); + invalid_argument_exit(ac_error); + } + } + + if(0 != ps_app_ctxt->u4_mb_info_type) + { + ps_app_ctxt->fp_mb_info = fopen(ps_app_ctxt->ac_mb_info_fname, "rb"); + if(NULL == ps_app_ctxt->fp_mb_info) + { + snprintf(ac_error, sizeof(ac_error) - 1, "Unable to open MB info file for reading: %s", + ps_app_ctxt->ac_mb_info_fname); + invalid_argument_exit(ac_error); + } + } + if(ps_app_ctxt->u4_pic_info_type) + { + ps_app_ctxt->fp_pic_info = fopen(ps_app_ctxt->ac_pic_info_fname, "rb"); + if(NULL == ps_app_ctxt->fp_pic_info) + { + snprintf(ac_error, sizeof(ac_error) - 1, "Unable to open Pic info file for reading: %s", + ps_app_ctxt->ac_pic_info_fname); + invalid_argument_exit(ac_error); + } + } + + GETTIME(&ps_app_ctxt->enc_start_time); + ps_app_ctxt->enc_last_time = ps_app_ctxt->enc_start_time; + + while(1) + { + WORD32 i4_count; + + /* Default sei params values*/ + ps_app_ctxt->u4_sei_mdcv_params_present_flag = 0; + if(1 == ps_app_ctxt->u4_sei_mdcv_params_present_flag) + { + for(i4_count = 0; i4_count < NUM_SEI_MDCV_PRIMARIES; i4_count++) + { + ps_app_ctxt->au4_display_primaries_x[i4_count] = 0; + ps_app_ctxt->au4_display_primaries_y[i4_count] = 0; + } + ps_app_ctxt->u4_white_point_x = 0; + ps_app_ctxt->u4_white_point_y = 0; + ps_app_ctxt->u4_max_display_mastering_luminance = + DEFAULT_MAX_DISPLAY_MASTERING_LUMINANCE; + ps_app_ctxt->u4_min_display_mastering_luminance = + DEFAULT_MIN_DISPLAY_MASTERING_LUMINANCE; + set_sei_mdcv_params(ps_app_ctxt, u4_timestamp_low, u4_timestamp_high); + } + + ps_app_ctxt->u4_sei_cll_params_present_flag = 0; + if(1 == ps_app_ctxt->u4_sei_cll_params_present_flag) + { + ps_app_ctxt->u4_max_content_light_level = 0; + ps_app_ctxt->u4_max_pic_average_light_level = 0; + set_sei_cll_params(ps_app_ctxt, u4_timestamp_low, u4_timestamp_high); + } + + ps_app_ctxt->u4_sei_ave_params_present_flag = 0; + if(1 == ps_app_ctxt->u4_sei_ave_params_present_flag) + { + ps_app_ctxt->u4_ambient_illuminance = 1; + ps_app_ctxt->u4_ambient_light_x = 0; + ps_app_ctxt->u4_ambient_light_y = 0; + set_sei_ave_params(ps_app_ctxt, u4_timestamp_low, u4_timestamp_high); + } + + ps_app_ctxt->u4_sei_ccv_params_present_flag = 0; + if(1 == ps_app_ctxt->u4_sei_ccv_params_present_flag) + { + ps_app_ctxt->u4_ccv_cancel_flag = 0; + ps_app_ctxt->u4_ccv_persistence_flag = 1; + ps_app_ctxt->u4_ccv_primaries_present_flag = 1; + ps_app_ctxt->u4_ccv_min_luminance_value_present_flag = 1; + ps_app_ctxt->u4_ccv_max_luminance_value_present_flag = 1; + ps_app_ctxt->u4_ccv_avg_luminance_value_present_flag = 1; + ps_app_ctxt->u4_ccv_reserved_zero_2bits = 0; + for(i4_count = 0; i4_count < NUM_SEI_CCV_PRIMARIES; i4_count++) + { + ps_app_ctxt->ai4_ccv_primaries_x[i4_count] = 1; + ps_app_ctxt->ai4_ccv_primaries_y[i4_count] = 1; + } + ps_app_ctxt->u4_ccv_min_luminance_value = 1; + ps_app_ctxt->u4_ccv_max_luminance_value = 1; + ps_app_ctxt->u4_ccv_avg_luminance_value = 1; + set_sei_ccv_params(ps_app_ctxt, u4_timestamp_low, u4_timestamp_high); + } + + /******************************************************************************/ + /****************** Input Initialization + * **************************************/ + /******************************************************************************/ + + for(i = 0; i < DEFAULT_MAX_INPUT_BUFS; i++) + { + if(ps_app_ctxt->as_input_buf[i].u4_is_free) + { + pu1_buf = ps_app_ctxt->as_input_buf[i].pu1_buf; + pv_mb_info = ps_app_ctxt->as_input_buf[i].pv_mb_info; + pv_pic_info = ps_app_ctxt->as_input_buf[i].pv_pic_info; + ps_app_ctxt->as_input_buf[i].u4_is_free = 0; + break; + } + } + + if(i == DEFAULT_MAX_INPUT_BUFS) + { + printf("\n Unable to find a free input buffer!!"); + exit(0); + } + + ps_video_encode_ip->u4_size = sizeof(isvce_video_encode_ip_t); + ps_video_encode_op->u4_size = sizeof(isvce_video_encode_op_t); + + ps_video_encode_ip->pv_bufs = pu1_buf; + ps_video_encode_ip->pv_mb_info = pv_mb_info; + ps_video_encode_ip->pv_pic_info = pv_pic_info; + ps_video_encode_ip->u4_pic_info_type = ps_app_ctxt->u4_pic_info_type; + /* + * Since the buffers are used for reading, + * And after each row we have a stride we nned to calculate + * the luma size according to the stride + */ + ps_inp_raw_buf->e_color_fmt = ps_app_ctxt->e_inp_color_fmt; + + /* Initialize for 420SP */ + if(IV_YUV_420SP_UV == ps_app_ctxt->e_inp_color_fmt || + IV_YUV_420SP_VU == ps_app_ctxt->e_inp_color_fmt) + { + /*init luma buffer*/ + ps_inp_raw_buf->apv_bufs[0] = pu1_buf; + + /*Init chroma buffer*/ + pu1_buf += ps_app_ctxt->u4_strd * ps_app_ctxt->u4_ht; + ps_inp_raw_buf->apv_bufs[1] = pu1_buf; + + ps_inp_raw_buf->au4_wd[0] = ps_app_ctxt->u4_wd; + ps_inp_raw_buf->au4_wd[1] = ps_app_ctxt->u4_wd; + + ps_inp_raw_buf->au4_ht[0] = ps_app_ctxt->u4_ht; + ps_inp_raw_buf->au4_ht[1] = ps_app_ctxt->u4_ht / 2; + + ps_inp_raw_buf->au4_strd[0] = ps_app_ctxt->u4_strd; + ps_inp_raw_buf->au4_strd[1] = ps_app_ctxt->u4_strd; + } + else if(IV_YUV_420P == ps_app_ctxt->e_inp_color_fmt) + { + /* init buffers */ + ps_inp_raw_buf->apv_bufs[0] = pu1_buf; + pu1_buf += (ps_app_ctxt->u4_wd) * ps_app_ctxt->u4_ht; + ps_inp_raw_buf->apv_bufs[1] = pu1_buf; + pu1_buf += (ps_app_ctxt->u4_wd >> 1) * (ps_app_ctxt->u4_ht >> 1); + ps_inp_raw_buf->apv_bufs[2] = pu1_buf; + + ps_inp_raw_buf->au4_wd[0] = ps_app_ctxt->u4_wd; + ps_inp_raw_buf->au4_wd[1] = ps_app_ctxt->u4_wd / 2; + ps_inp_raw_buf->au4_wd[2] = ps_app_ctxt->u4_wd / 2; + + ps_inp_raw_buf->au4_ht[0] = ps_app_ctxt->u4_ht; + ps_inp_raw_buf->au4_ht[1] = ps_app_ctxt->u4_ht / 2; + ps_inp_raw_buf->au4_ht[2] = ps_app_ctxt->u4_ht / 2; + + ps_inp_raw_buf->au4_strd[0] = ps_app_ctxt->u4_strd; + ps_inp_raw_buf->au4_strd[1] = ps_app_ctxt->u4_strd / 2; + ps_inp_raw_buf->au4_strd[2] = ps_app_ctxt->u4_strd / 2; + } + else if(IV_YUV_422ILE == ps_app_ctxt->e_inp_color_fmt) + { + /*init luma buffer*/ + ps_inp_raw_buf->apv_bufs[0] = pu1_buf; + + ps_inp_raw_buf->au4_wd[0] = ps_app_ctxt->u4_wd * 2; + + ps_inp_raw_buf->au4_ht[0] = ps_app_ctxt->u4_ht; + + ps_inp_raw_buf->au4_strd[0] = ps_app_ctxt->u4_strd * 2; + } + + /* + * Here we read input and other associated buffers. Regardless of success + * we will proceed from here as we will need extra calls to flush out + * input queue in encoder. Note that this is not necessary. You can just + * send encode calls till with valid output and recon buffers till the + * queue is flushed. + */ + while(1) + { + IV_STATUS_T mb_info_status = IV_SUCCESS, pic_info_status = IV_SUCCESS; + + status = read_input(ps_app_ctxt->fp_ip, ps_inp_raw_buf); + + if(ps_app_ctxt->u4_mb_info_type != 0) + { + mb_info_status = read_mb_info(ps_app_ctxt, pv_mb_info); + } + if(ps_app_ctxt->u4_pic_info_type != 0) + { + pic_info_status = read_pic_info(ps_app_ctxt, pv_pic_info); + } + if((IV_SUCCESS != status) || (IV_SUCCESS != mb_info_status) || + (IV_SUCCESS != pic_info_status)) + { + if(0 == ps_app_ctxt->u4_loopback) + { + is_last = 1; + break; + } + else + fseek(ps_app_ctxt->fp_ip, 0, SEEK_SET); + } + break; + } + + /******************************************************************************/ + /****************** Output Initialization + * *************************************/ + /******************************************************************************/ + for(i = 0; i < DEFAULT_MAX_OUTPUT_BUFS; i++) + { + if(ps_app_ctxt->as_output_buf[i].u4_is_free) + { + ps_app_ctxt->as_output_buf[i].u4_is_free = 0; + + ps_video_encode_ip->s_out_buf.pv_buf = ps_app_ctxt->as_output_buf[i].pu1_buf; + ps_video_encode_ip->s_out_buf.u4_bytes = 0; + ps_video_encode_ip->s_out_buf.u4_bufsize = + ps_app_ctxt->as_output_buf[i].u4_buf_size; + } + } + + /******************************************************************************/ + /****************** Recon Initialization + * **************************************/ + /******************************************************************************/ + init_raw_buf_descr(ps_app_ctxt, &s_recon_buf, ps_app_ctxt->as_recon_buf[0].pu1_buf, + ps_app_ctxt->e_recon_color_fmt); + + /******************************************************************************/ + /****************** Output Initialization + * *************************************/ + /******************************************************************************/ + { + UWORD8 u1_num_layer_bufs_assigned = 0; + + for(i = 0; i < DEFAULT_MAX_NALU_INFO_BUFS; i++) + { + if(ps_app_ctxt->as_nalu_info_bufs[i].b_is_free) + { + ps_app_ctxt->as_nalu_info_bufs[i].b_is_free = false; + pi4_nalu_info_buf_ids[u1_num_layer_bufs_assigned] = i; + + s_video_encode_ip.ps_nalu_info_buf[u1_num_layer_bufs_assigned].pu1_buf = + ps_app_ctxt->as_nalu_info_bufs[i].pu1_buf; + s_video_encode_ip.ps_nalu_info_buf[u1_num_layer_bufs_assigned].u4_num_bytes = 0; + s_video_encode_ip.ps_nalu_info_buf[u1_num_layer_bufs_assigned].u4_buf_size = + ps_app_ctxt->as_nalu_info_bufs[i].u4_buf_size; + + u1_num_layer_bufs_assigned++; + + if(u1_num_layer_bufs_assigned >= ps_app_ctxt->u1_num_spatial_layers) + { + break; + } + } + } + } + + if(ps_app_ctxt->u4_psnr_enable) + init_raw_buf_descr(ps_app_ctxt, &s_inp_buf, ps_app_ctxt->pu1_psnr_buf, + ps_app_ctxt->e_inp_color_fmt); + + ps_video_encode_ip->s_recon_buf = s_recon_buf; + + /******************************************************************************/ + /************************* Un Initialized + * *************************************/ + /******************************************************************************/ + if(0 == ps_app_ctxt->u4_loopback) + { + /* If input file is read completely and loopback is not enabled, + * then exit the loop */ + if(feof(ps_app_ctxt->fp_ip)) + { + is_last = 1; + } + } + + /* If last frame, send input null to get back encoded frames */ + if(is_last == 1 || ((ps_app_ctxt->u4_max_num_frms) <= u4_timestamp_low)) + { + is_last = 1; + ps_inp_raw_buf->apv_bufs[0] = NULL; + ps_inp_raw_buf->apv_bufs[1] = NULL; + ps_inp_raw_buf->apv_bufs[2] = NULL; + } + + ps_video_encode_ip->u4_is_last = is_last; + ps_video_encode_ip->u4_mb_info_type = ps_app_ctxt->u4_mb_info_type; + ps_video_encode_ip->u4_pic_info_type = ps_app_ctxt->u4_pic_info_type; + + ps_video_encode_op->s_out_buf.pv_buf = NULL; + + for(i = 0; i < ps_app_ctxt->u1_num_spatial_layers; i++) + { + s_video_encode_op.ps_nalu_info_buf[i].pu1_buf = NULL; + } + + ps_video_encode_ip->u4_timestamp_high = u4_timestamp_high; + ps_video_encode_ip->u4_timestamp_low = u4_timestamp_low; + + GETTIME(&ps_app_ctxt->enc_last_time); + + status = isvce_api_function(ps_enc, &s_video_encode_ip, &s_video_encode_op, &s_api_cmds); + + if(IV_SUCCESS != status) + { + printf("Encode Frame failed = 0x%x\n", s_video_encode_op.s_ive_op.u4_error_code); + break; + } + +#ifdef PROFILE_ENABLE + GETTIME(&curtime); + ELAPSEDTIME(ps_app_ctxt->enc_last_time, curtime, timetaken, frequency); + ps_app_ctxt->enc_last_time = curtime; + + { + WORD32 peak_avg, id; + u4_total_time += timetaken; + peak_window[peak_window_idx++] = timetaken; + if(peak_window_idx == PEAK_WINDOW_SIZE) peak_window_idx = 0; + peak_avg = 0; + for(id = 0; id < PEAK_WINDOW_SIZE; id++) + { + peak_avg += peak_window[id]; + } + peak_avg /= PEAK_WINDOW_SIZE; + if(peak_avg > peak_avg_max) peak_avg_max = peak_avg; + } +#endif + + /******************************************************************************/ + /****************** Writing Output + * ********************************************/ + /******************************************************************************/ + num_bytes = 0; + + if(ps_video_encode_op->output_present) + { + num_bytes = ps_video_encode_op->s_out_buf.u4_bytes; + pu1_buf = (UWORD8 *) ps_video_encode_op->s_out_buf.pv_buf; + + status = write_output(ps_app_ctxt->fp_op, pu1_buf, num_bytes); + + if(IV_SUCCESS != status) + { + printf("Error: Unable to write to output file\n"); + break; + } + } + + for(i = 0; i < DEFAULT_MAX_OUTPUT_BUFS; i++) + { + if(ps_app_ctxt->as_output_buf[i].pu1_buf == ps_video_encode_op->s_out_buf.pv_buf) + { + ps_app_ctxt->as_output_buf[i].u4_is_free = 1; + + break; + } + } + + if(ps_app_ctxt->u4_nalu_info_export_enable && s_video_encode_op.b_is_nalu_info_present) + { + for(i = 0; i < ps_app_ctxt->u1_num_spatial_layers; i++) + { + fprintf(ps_app_ctxt->fp_nalu_info, "%s", + s_video_encode_op.ps_nalu_info_buf[i].pu1_buf); + } + } + + if(ps_video_encode_op->s_inp_buf.apv_bufs[0]) + { + for(i = 0; i < DEFAULT_MAX_INPUT_BUFS; i++) + { + if(ps_app_ctxt->as_input_buf[i].pu1_buf == + ps_video_encode_op->s_inp_buf.apv_bufs[0]) + { + ps_app_ctxt->as_input_buf[i].u4_is_free = 1; + break; + } + } + } + + for(i = 0; i < ps_app_ctxt->u1_num_spatial_layers; i++) + { + ps_app_ctxt->as_nalu_info_bufs[pi4_nalu_info_buf_ids[i]].b_is_free = true; + } + + /********************************************************************** + * Print stats + **********************************************************************/ + { + UWORD8 u1_pic_type[][5] = {"IDR", "I", "P", "B", "NA"}; + WORD32 lookup_idx = 0; + + if(s_video_encode_op.s_ive_op.u4_encoded_frame_type == IV_IDR_FRAME) + { + lookup_idx = 0; + } + else if(s_video_encode_op.s_ive_op.u4_encoded_frame_type == IV_I_FRAME) + { + lookup_idx = 1; + } + else if(s_video_encode_op.s_ive_op.u4_encoded_frame_type == IV_P_FRAME) + { + lookup_idx = 2; + } + else if(s_video_encode_op.s_ive_op.u4_encoded_frame_type == IV_B_FRAME) + { + lookup_idx = 3; + } + else if(s_video_encode_op.s_ive_op.u4_encoded_frame_type == IV_NA_FRAME) + { + lookup_idx = 4; + } + + if(s_video_encode_op.s_ive_op.u4_encoded_frame_type != IV_NA_FRAME) + { + ps_app_ctxt->u4_pics_cnt++; + ps_app_ctxt->avg_time = u4_total_time / ps_app_ctxt->u4_pics_cnt; + ps_app_ctxt->u4_total_bytes = ps_video_encode_op->s_out_buf.u4_bytes; + } + + if(ps_app_ctxt->u4_psnr_enable == 0) + { + printf("[%s] PicNum %4d ", u1_pic_type[lookup_idx], ps_app_ctxt->u4_pics_cnt); + printf(" Bytes : %6d \t", ps_app_ctxt->u4_total_bytes); + printf( + "TimeTaken(microsec): %6d " + "AvgTime: %6d PeakAvgTimeMax: %6d\n", + timetaken, ps_app_ctxt->avg_time, peak_avg_max); + } + } + + /* For psnr computation, we need to read the correct input frame and + * compare with recon. The difficulty with doing it is that we only know + * that the frame number of recon is monotonically increasing. There + * may be gaps in the recon if any pre or post enc skip happens. There are + * 3 senarios + * 1) A frame is encoded -> returns the pic type + * 2) A frame is not encoded -> Encoder is waiting, the frame may get + * encoded later + * 3) A frame is not encoded -> A post enc or pre enc skip happend. The + * frame is not going to be encoded + * + * The 1st and 2nd scenarios are easy, since we just needs to increment + * recon cnt whenever we get a valid recon. This cnt can we used to + * sync the recon and input + * 3rd scenario in conjuction with 2nd will pose problems. Even if + * the returning frame is NA, we donot know we should increment the + * recon cnt or not becasue it can be case 2 or case 3. + * + * Solutions: + * ------------------------- + * One way to over come this will be to return more information as of + * the frame type. We can send if a frame was skipped as a part of the + * return frame type. + * This will not work. Since the output and recon are not in sync, we + * cannot use the current output frame type to determine if a recon + * is present currently or not. We need some other way to acheive this. + * + * Other way to do this which is cleaner and maintains the seperation + * between recon and the ouptut is to set the width [& height] of output + * recon buffer to be zero. Hence we will in effect be saying :"look there + * is a recon, but due to frame not being encoded it is having a width 0". + * To be more clear we need to make height also to be zero. + * + * But are we using these variables for allocating and deallocating + * the buffers some where ? No we are not. The buffer gets re-init + * at every encode call + * + * Fixes + * ------------------------ + * Currently the recon buff width and height are set in the encoder. + * This will not work now because since recon and input are not + * in sync. Hence a recon buff sent at time stamp x will get used to + * fill recon of input at time stamp y (x > y). If we reduced the + * frame dimensions in between, the recon buffer will not have enough + * space. Hence we need to set the with and height appropriatley inside + * lib itself. + */ + + if(ps_app_ctxt->u4_recon_enable || ps_app_ctxt->u4_chksum_enable || + ps_app_ctxt->u4_psnr_enable) + { + if(ps_video_encode_op->dump_recon) + { + s_recon_buf = ps_video_encode_op->s_recon_buf; + + /* Read input for psnr computuation */ + if(ps_app_ctxt->u4_psnr_enable) read_input(ps_app_ctxt->fp_psnr_ip, &s_inp_buf); + + /* if we have a valid recon buffer do the assocated tasks */ + if(s_recon_buf.au4_wd[0]) + { + /* Dump recon when enabled, and output bytes != 0 */ + if(ps_app_ctxt->u4_recon_enable) + { + status = write_recon(ps_app_ctxt->fp_recon, &s_recon_buf); + if(IV_SUCCESS != status) + { + printf("Error: Unable to write to recon file\n"); + break; + } + } + + if(ps_app_ctxt->u4_psnr_enable) + { + compute_psnr(ps_app_ctxt, &s_recon_buf, &s_inp_buf); + } + + if(ps_app_ctxt->u4_chksum_enable) + { + WORD32 comp, num_comp = 2; + + if(IV_YUV_420P == s_recon_buf.e_color_fmt) num_comp = 3; + + for(comp = 0; comp < num_comp; comp++) + { + UWORD8 au1_chksum[16]; + calc_md5_cksum((UWORD8 *) s_recon_buf.apv_bufs[comp], + s_recon_buf.au4_strd[comp], s_recon_buf.au4_wd[comp], + s_recon_buf.au4_ht[comp], au1_chksum); + fwrite(au1_chksum, sizeof(UWORD8), 16, ps_app_ctxt->fp_chksum); + } + } + } + } + } + + u4_timestamp_low++; + + /* Break if all the encoded frames are taken from encoder */ + if(1 == ps_video_encode_op->u4_is_last) + { + break; + } + } + + /* Pic count is 1 more than actual num frames encoded, because last call is to + * just get the output */ + ps_app_ctxt->u4_pics_cnt--; + + if(ps_app_ctxt->u4_psnr_enable) + { + print_average_psnr(ps_app_ctxt); + } + + /* house keeping operations */ + fclose(ps_app_ctxt->fp_ip); + fclose(ps_app_ctxt->fp_op); + if(1 == ps_app_ctxt->u4_recon_enable) + { + fclose(ps_app_ctxt->fp_recon); + } + if(1 == ps_app_ctxt->u4_nalu_info_export_enable) + { + fclose(ps_app_ctxt->fp_nalu_info); + } + if(1 == ps_app_ctxt->u4_chksum_enable) + { + fclose(ps_app_ctxt->fp_chksum); + } + if(1 == ps_app_ctxt->u4_psnr_enable) + { + fclose(ps_app_ctxt->fp_psnr_ip); + } + + if(0 != ps_app_ctxt->u4_mb_info_type) + { + fclose(ps_app_ctxt->fp_mb_info); + } + if(ps_app_ctxt->u4_pic_info_type) + { + fclose(ps_app_ctxt->fp_pic_info); + } + + free_input(ps_app_ctxt); + free_output(ps_app_ctxt); + free_recon(ps_app_ctxt); + free_nalu_info_bufs(ps_app_ctxt); + + isvca_aligned_free(s_video_encode_ip.ps_nalu_info_buf); + isvca_aligned_free(s_video_encode_op.ps_nalu_info_buf); + + isvca_aligned_free(pi4_nalu_info_buf_ids); +} + +void init_default_rc_params(app_ctxt_t *ps_app_ctxt) +{ + UWORD8 i; + for(i = 0; i < ps_app_ctxt->u1_num_spatial_layers; i++) + { + ps_app_ctxt->pu4_max_bitrate[i] = DEFAULT_MAX_BITRATE; + ps_app_ctxt->pu4_bitrate[i] = DEFAULT_BITRATE; + ps_app_ctxt->pu4_i_qp[i] = DEFAULT_I_QP; + ps_app_ctxt->pu4_p_qp[i] = DEFAULT_P_QP; + ps_app_ctxt->pu4_b_qp[i] = DEFAULT_B_QP; + ps_app_ctxt->pu4_i_qp_min[i] = DEFAULT_QP_MIN; + ps_app_ctxt->pu4_i_qp_max[i] = DEFAULT_QP_MAX; + ps_app_ctxt->pu4_p_qp_min[i] = DEFAULT_QP_MIN; + ps_app_ctxt->pu4_p_qp_max[i] = DEFAULT_QP_MAX; + ps_app_ctxt->pu4_b_qp_min[i] = DEFAULT_QP_MIN; + ps_app_ctxt->pu4_b_qp_max[i] = DEFAULT_QP_MAX; + ps_app_ctxt->pu4_vbv_buffer_delay[i] = 1000; + } + + ps_app_ctxt->u4_total_bytes = 0; +} + +/* Allocate memory to dynamic arrays holding RC information */ +void alloc_rc_params(app_ctxt_t *ps_app_ctxt) +{ + UWORD8 u1_num_spatial_layers = ps_app_ctxt->u1_num_spatial_layers; + ps_app_ctxt->pu4_max_bitrate = + (UWORD32 *) isvca_aligned_malloc(16, sizeof(UWORD32) * u1_num_spatial_layers); + ps_app_ctxt->pu4_bitrate = + (UWORD32 *) isvca_aligned_malloc(16, sizeof(UWORD32) * u1_num_spatial_layers); + ps_app_ctxt->pu4_i_qp = + (UWORD32 *) isvca_aligned_malloc(16, sizeof(UWORD32) * u1_num_spatial_layers); + ps_app_ctxt->pu4_p_qp = + (UWORD32 *) isvca_aligned_malloc(16, sizeof(UWORD32) * u1_num_spatial_layers); + ps_app_ctxt->pu4_b_qp = + (UWORD32 *) isvca_aligned_malloc(16, sizeof(UWORD32) * u1_num_spatial_layers); + ps_app_ctxt->pu4_i_qp_min = + (UWORD32 *) isvca_aligned_malloc(16, sizeof(UWORD32) * u1_num_spatial_layers); + ps_app_ctxt->pu4_i_qp_max = + (UWORD32 *) isvca_aligned_malloc(16, sizeof(UWORD32) * u1_num_spatial_layers); + ps_app_ctxt->pu4_p_qp_min = + (UWORD32 *) isvca_aligned_malloc(16, sizeof(UWORD32) * u1_num_spatial_layers); + ps_app_ctxt->pu4_p_qp_max = + (UWORD32 *) isvca_aligned_malloc(16, sizeof(UWORD32) * u1_num_spatial_layers); + ps_app_ctxt->pu4_b_qp_min = + (UWORD32 *) isvca_aligned_malloc(16, sizeof(UWORD32) * u1_num_spatial_layers); + ps_app_ctxt->pu4_b_qp_max = + (UWORD32 *) isvca_aligned_malloc(16, sizeof(UWORD32) * u1_num_spatial_layers); + ps_app_ctxt->pu4_vbv_buffer_delay = + (UWORD32 *) isvca_aligned_malloc(16, sizeof(UWORD32) * u1_num_spatial_layers); +} + +/* Free memory allocated to dynamic arrays holding RC information */ +void free_rc_params(app_ctxt_t *ps_app_ctxt) +{ + isvca_aligned_free(ps_app_ctxt->pu4_max_bitrate); + isvca_aligned_free(ps_app_ctxt->pu4_bitrate); + isvca_aligned_free(ps_app_ctxt->pu4_i_qp); + isvca_aligned_free(ps_app_ctxt->pu4_p_qp); + isvca_aligned_free(ps_app_ctxt->pu4_b_qp); + isvca_aligned_free(ps_app_ctxt->pu4_i_qp_min); + isvca_aligned_free(ps_app_ctxt->pu4_i_qp_max); + isvca_aligned_free(ps_app_ctxt->pu4_p_qp_min); + isvca_aligned_free(ps_app_ctxt->pu4_p_qp_max); + isvca_aligned_free(ps_app_ctxt->pu4_b_qp_min); + isvca_aligned_free(ps_app_ctxt->pu4_b_qp_max); + isvca_aligned_free(ps_app_ctxt->pu4_vbv_buffer_delay); +} + +/*****************************************************************************/ +/* */ +/* Function Name : main */ +/* */ +/* Description : Application to demonstrate codec API */ +/* */ +/* */ +/* Inputs : argc - Number of arguments */ +/* argv[] - Arguments */ +/* Globals : */ +/* Processing : Shows how to use create, process, control and delete */ +/* */ +/* Outputs : Codec output in a file */ +/* Returns : */ +/* */ +/* Issues : Assumes both PROFILE_ENABLE to be */ +/* defined for multithread decode-display working */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 20 11 2013 100189 Initial Version */ +/*****************************************************************************/ +#ifdef IOS +int h264enc_main(char *homedir, char *documentdir, int screen_wd, int screen_ht) +#else +int main(int argc, char *argv[]) +#endif +{ + /* Config Parameters for Encoding */ + app_ctxt_t s_app_ctxt; + + /* error string */ + CHAR ac_error[2 * STRLENGTH]; + + /* config file name */ + CHAR ac_cfg_fname[STRLENGTH]; + + /* error status */ + IV_STATUS_T status = IV_SUCCESS; +#ifdef IOS + /* temp var */ + CHAR filename_with_path[STRLENGTH]; +#endif + WORD32 num_mem_recs; + iv_obj_t *ps_enc; + WORD32 i; + FILE *fp_cfg = NULL; + +#ifdef X86_MINGW + + /* For getting printfs without any delay in eclipse */ + setvbuf(stdout, NULL, _IONBF, 0); + setvbuf(stderr, NULL, _IONBF, 0); + +#endif + + init_default_params(&s_app_ctxt); + +#ifndef IOS + + /* Usage */ + if(argc < 2) + { + printf("Using enc.cfg as configuration file \n"); + strcpy(ac_cfg_fname, "enc.cfg"); + } + else if(argc == 2) + { + if(!strcmp(argv[1], "--help")) + { + print_usage(); + exit(-1); + } + strcpy(ac_cfg_fname, argv[1]); + } + +#else + strcpy(ac_cfg_fname, "test.cfg"); + +#endif + + /*************************************************************************/ + /* Parse arguments */ + /*************************************************************************/ + +#ifndef IOS + + /* Read command line arguments */ + if(argc > 2) + { + for(i = 1; i + 1 < argc; i += 2) + { + if(CONFIG == get_argument(argv[i])) + { + strcpy(ac_cfg_fname, argv[i + 1]); + if((fp_cfg = fopen(ac_cfg_fname, "r")) == NULL) + { + snprintf(ac_error, sizeof(ac_error) - 1, "Could not open Configuration file %s", + ac_cfg_fname); + codec_exit(ac_error); + } + read_cfg_file(&s_app_ctxt, fp_cfg); + fclose(fp_cfg); + } + else + { + parse_argument(&s_app_ctxt, argv[i], argv[i + 1]); + } + } + } + else + { + if((fp_cfg = fopen(ac_cfg_fname, "r")) == NULL) + { + snprintf(ac_error, sizeof(ac_error) - 1, "Could not open Configuration file %s", + ac_cfg_fname); + codec_exit(ac_error); + } + read_cfg_file(&s_app_ctxt, fp_cfg); + fclose(fp_cfg); + } + + alloc_rc_params(&s_app_ctxt); + init_default_rc_params(&s_app_ctxt); + + if(argc > 2) + { + for(i = 1; i + 1 < argc; i += 2) + { + if(CONFIG == get_argument(argv[i])) + { + strcpy(ac_cfg_fname, argv[i + 1]); + if((fp_cfg = fopen(ac_cfg_fname, "r")) == NULL) + { + snprintf(ac_error, sizeof(ac_error) - 1, "Could not open Configuration file %s", + ac_cfg_fname); + codec_exit(ac_error); + } + read_cfg_file_rc_params(&s_app_ctxt, fp_cfg); + fclose(fp_cfg); + } + else + { + parse_rc_argument(&s_app_ctxt, argv[i], argv[i + 1]); + } + } + } + else + { + if((fp_cfg = fopen(ac_cfg_fname, "r")) == NULL) + { + snprintf(ac_error, sizeof(ac_error) - 1, "Could not open Configuration file %s", + ac_cfg_fname); + codec_exit(ac_error); + } + read_cfg_file_rc_params(&s_app_ctxt, fp_cfg); + fclose(fp_cfg); + } + +#else + + sprintf(filename_with_path, "%s/%s", homedir, "enc.cfg"); + if((fp_cfg = fopen(filename_with_path, "r")) == NULL) + { + snprintf(ac_error, sizeof(ac_error) - 1, "Could not open Configuration file %s", + ac_cfg_fname); + codec_exit(ac_error); + } + read_cfg_file(&s_app_ctxt, fp_cfg); + fclose(fp_cfg); + +#endif + + validate_params(&s_app_ctxt); + + s_app_ctxt.u4_max_wd = MAX(s_app_ctxt.u4_max_wd, s_app_ctxt.u4_wd); + s_app_ctxt.u4_max_ht = MAX(s_app_ctxt.u4_max_ht, s_app_ctxt.u4_ht); + + /*************************************************************************/ + /* Getting Number of MemRecords */ + /*************************************************************************/ + { + isvce_num_mem_rec_ip_t s_num_mem_rec_ip; + isvce_num_mem_rec_op_t s_num_mem_rec_op; + + isvce_api_cmds_t s_api_cmds = {ISVCE_CMD_GET_NUM_MEM_REC, ISVCE_CMD_CT_NA}; + + s_num_mem_rec_ip.s_ive_ip.u4_size = sizeof(isvce_num_mem_rec_ip_t); + s_num_mem_rec_op.s_ive_op.u4_size = sizeof(isvce_num_mem_rec_op_t); + + status = isvce_api_function(0, &s_num_mem_rec_ip, &s_num_mem_rec_op, &s_api_cmds); + + if(status != IV_SUCCESS) + { + snprintf(ac_error, sizeof(ac_error) - 1, "Get number of memory records failed = 0x%x\n", + s_num_mem_rec_op.s_ive_op.u4_error_code); + codec_exit(ac_error); + } + + s_app_ctxt.u4_num_mem_rec = num_mem_recs = s_num_mem_rec_op.s_ive_op.u4_num_mem_rec; + } + + /* Allocate array to hold memory records */ + s_app_ctxt.ps_mem_rec = (iv_mem_rec_t *) malloc(num_mem_recs * sizeof(iv_mem_rec_t)); + if(NULL == s_app_ctxt.ps_mem_rec) + { + snprintf(ac_error, sizeof(ac_error) - 1, + "Unable to allocate memory for hold memory records: Size %d", + (WORD32) (num_mem_recs * sizeof(iv_mem_rec_t))); + codec_exit(ac_error); + } + + { + iv_mem_rec_t *ps_mem_rec; + ps_mem_rec = s_app_ctxt.ps_mem_rec; + for(i = 0; i < num_mem_recs; i++) + { + ps_mem_rec->u4_size = sizeof(iv_mem_rec_t); + ps_mem_rec->pv_base = NULL; + ps_mem_rec->u4_mem_size = 0; + ps_mem_rec->u4_mem_alignment = 0; + ps_mem_rec->e_mem_type = IV_NA_MEM_TYPE; + + ps_mem_rec++; + } + } + + /*************************************************************************/ + /* Getting MemRecords Attributes */ + /*************************************************************************/ + { + isvce_fill_mem_rec_ip_t s_fill_mem_rec_ip; + isvce_fill_mem_rec_op_t s_fill_mem_rec_op; + + isvce_api_cmds_t s_api_cmds = {ISVCE_CMD_FILL_NUM_MEM_REC, ISVCE_CMD_CT_NA}; + + s_fill_mem_rec_ip.s_ive_ip.u4_size = sizeof(isvce_fill_mem_rec_ip_t); + s_fill_mem_rec_op.s_ive_op.u4_size = sizeof(isvce_fill_mem_rec_op_t); + + s_fill_mem_rec_ip.s_ive_ip.ps_mem_rec = s_app_ctxt.ps_mem_rec; + s_fill_mem_rec_ip.s_ive_ip.u4_num_mem_rec = s_app_ctxt.u4_num_mem_rec; + s_fill_mem_rec_ip.s_ive_ip.u4_max_wd = s_app_ctxt.u4_max_wd; + s_fill_mem_rec_ip.s_ive_ip.u4_max_ht = s_app_ctxt.u4_max_ht; + s_fill_mem_rec_ip.u4_wd = s_app_ctxt.u4_wd; + s_fill_mem_rec_ip.u4_ht = s_app_ctxt.u4_ht; + s_fill_mem_rec_ip.s_ive_ip.u4_max_level = s_app_ctxt.u4_max_level; + s_fill_mem_rec_ip.s_ive_ip.e_color_format = DEFAULT_INP_COLOR_FMT; + s_fill_mem_rec_ip.s_ive_ip.u4_max_ref_cnt = DEFAULT_MAX_REF_FRM; + s_fill_mem_rec_ip.s_ive_ip.u4_max_reorder_cnt = DEFAULT_MAX_REORDER_FRM; + s_fill_mem_rec_ip.s_ive_ip.u4_max_srch_rng_x = DEFAULT_MAX_SRCH_RANGE_X; + s_fill_mem_rec_ip.s_ive_ip.u4_max_srch_rng_y = DEFAULT_MAX_SRCH_RANGE_Y; + s_fill_mem_rec_ip.s_svc_inp_params.u1_num_temporal_layers = + s_app_ctxt.u1_num_temporal_layers; + s_fill_mem_rec_ip.s_svc_inp_params.u1_num_spatial_layers = s_app_ctxt.u1_num_spatial_layers; + s_fill_mem_rec_ip.s_svc_inp_params.d_spatial_res_ratio = s_app_ctxt.d_spatial_res_ratio; + + status = isvce_api_function(0, &s_fill_mem_rec_ip, &s_fill_mem_rec_op, &s_api_cmds); + + if(status != IV_SUCCESS) + { + snprintf(ac_error, sizeof(ac_error) - 1, "Fill memory records failed = 0x%x\n", + s_fill_mem_rec_op.s_ive_op.u4_error_code); + codec_exit(ac_error); + } + } + + /*************************************************************************/ + /* Allocating Memory for Mem Records */ + /*************************************************************************/ + { + WORD32 total_size; + iv_mem_rec_t *ps_mem_rec; + total_size = 0; + + ps_mem_rec = s_app_ctxt.ps_mem_rec; + for(i = 0; i < num_mem_recs; i++) + { + ps_mem_rec->pv_base = + isvca_aligned_malloc(ps_mem_rec->u4_mem_alignment, ps_mem_rec->u4_mem_size); + if(ps_mem_rec->pv_base == NULL) + { + snprintf(ac_error, sizeof(ac_error) - 1, + "Allocation failure for mem record id %d size %d\n", i, + ps_mem_rec->u4_mem_size); + codec_exit(ac_error); + } + total_size += ps_mem_rec->u4_mem_size; + + ps_mem_rec++; + } + printf("\nTotal memory for codec %d\n", total_size); + } + + /*************************************************************************/ + /* Codec Instance Creation */ + /*************************************************************************/ + { + isvce_init_ip_t s_init_ip; + isvce_init_op_t s_init_op; + + isvce_api_cmds_t s_api_cmds = {ISVCE_CMD_INIT, ISVCE_CMD_CT_NA}; + + ps_enc = s_app_ctxt.ps_mem_rec[0].pv_base; + ps_enc->u4_size = sizeof(iv_obj_t); + ps_enc->pv_fxns = isvce_api_function; + s_app_ctxt.ps_enc = ps_enc; + + s_init_ip.pu4_max_bitrate = + isvca_aligned_malloc(16, sizeof(UWORD32) * s_app_ctxt.u1_num_spatial_layers); + + s_init_ip.s_ive_ip.u4_size = sizeof(isvce_init_ip_t); + s_init_op.s_ive_op.u4_size = sizeof(isvce_init_op_t); + + s_init_ip.s_ive_ip.u4_num_mem_rec = s_app_ctxt.u4_num_mem_rec; + s_init_ip.s_ive_ip.ps_mem_rec = s_app_ctxt.ps_mem_rec; + s_init_ip.s_ive_ip.u4_max_wd = s_app_ctxt.u4_max_wd; + s_init_ip.s_ive_ip.u4_max_ht = s_app_ctxt.u4_max_ht; + s_init_ip.u4_wd = s_app_ctxt.u4_wd; + s_init_ip.u4_ht = s_app_ctxt.u4_ht; + s_init_ip.s_ive_ip.u4_max_ref_cnt = DEFAULT_MAX_REF_FRM; + s_init_ip.s_ive_ip.u4_max_reorder_cnt = DEFAULT_MAX_REORDER_FRM; + s_init_ip.s_ive_ip.u4_max_level = s_app_ctxt.u4_max_level; + s_init_ip.s_ive_ip.e_inp_color_fmt = s_app_ctxt.e_inp_color_fmt; + + if(s_app_ctxt.u4_recon_enable || s_app_ctxt.u4_psnr_enable || s_app_ctxt.u4_chksum_enable) + { + s_init_ip.s_ive_ip.u4_enable_recon = 1; + } + else + { + s_init_ip.s_ive_ip.u4_enable_recon = 0; + } + + s_init_ip.b_nalu_info_export_enable = !!s_app_ctxt.u4_nalu_info_export_enable; + s_init_ip.s_ive_ip.e_recon_color_fmt = s_app_ctxt.e_recon_color_fmt; + s_init_ip.s_ive_ip.e_rc_mode = s_app_ctxt.u4_rc; + s_init_ip.s_ive_ip.u4_max_framerate = s_app_ctxt.u4_max_frame_rate; + for(i = 0; i < s_app_ctxt.u1_num_spatial_layers; i++) + { + s_init_ip.pu4_max_bitrate[i] = s_app_ctxt.pu4_max_bitrate[i]; + } + s_init_ip.s_ive_ip.u4_num_bframes = s_app_ctxt.u4_num_bframes; + s_init_ip.s_ive_ip.e_content_type = IV_PROGRESSIVE; + s_init_ip.s_ive_ip.u4_max_srch_rng_x = DEFAULT_MAX_SRCH_RANGE_X; + s_init_ip.s_ive_ip.u4_max_srch_rng_y = DEFAULT_MAX_SRCH_RANGE_Y; + s_init_ip.s_ive_ip.e_slice_mode = s_app_ctxt.u4_slice_mode; + s_init_ip.s_ive_ip.u4_slice_param = s_app_ctxt.u4_slice_param; + s_init_ip.s_ive_ip.e_arch = s_app_ctxt.e_arch; + s_init_ip.s_ive_ip.e_soc = s_app_ctxt.e_soc; + s_init_ip.b_use_default_vui = s_app_ctxt.u4_use_default_vui; + + s_init_ip.s_svc_inp_params.u1_num_temporal_layers = s_app_ctxt.u1_num_temporal_layers; + s_init_ip.s_svc_inp_params.u1_num_spatial_layers = s_app_ctxt.u1_num_spatial_layers; + s_init_ip.s_svc_inp_params.d_spatial_res_ratio = s_app_ctxt.d_spatial_res_ratio; + + status = isvce_api_function(ps_enc, &s_init_ip, &s_init_op, &s_api_cmds); + + isvca_aligned_free(s_init_ip.pu4_max_bitrate); + + if(status != IV_SUCCESS) + { + snprintf(ac_error, sizeof(ac_error) - 1, "Init memory records failed = 0x%x\n", + s_init_op.s_ive_op.u4_error_code); + codec_exit(ac_error); + } + } + + /*************************************************************************/ + /* set processor details */ + /*************************************************************************/ + { + isvce_ctl_set_num_cores_ip_t s_ctl_set_num_cores_ip; + isvce_ctl_set_num_cores_op_t s_ctl_set_num_cores_op; + + isvce_api_cmds_t s_api_cmds = {ISVCE_CMD_VIDEO_CTL, ISVCE_CMD_CTL_SET_NUM_CORES}; + + s_ctl_set_num_cores_ip.s_ive_ip.u4_num_cores = s_app_ctxt.u4_num_cores; + s_ctl_set_num_cores_ip.s_ive_ip.u4_timestamp_high = 0; + s_ctl_set_num_cores_ip.s_ive_ip.u4_timestamp_low = 0; + s_ctl_set_num_cores_ip.s_ive_ip.u4_size = sizeof(isvce_ctl_set_num_cores_ip_t); + + s_ctl_set_num_cores_op.s_ive_op.u4_size = sizeof(isvce_ctl_set_num_cores_op_t); + + status = isvce_api_function(ps_enc, (void *) &s_ctl_set_num_cores_ip, + (void *) &s_ctl_set_num_cores_op, &s_api_cmds); + if(status != IV_SUCCESS) + { + snprintf(ac_error, sizeof(ac_error) - 1, "Unable to set processor params = 0x%x\n", + s_ctl_set_num_cores_op.s_ive_op.u4_error_code); + codec_exit(ac_error); + } + } + + /*************************************************************************/ + /* Get Codec Version */ + /*************************************************************************/ + { + isvce_ctl_getversioninfo_ip_t s_ctl_set_getversioninfo_ip; + isvce_ctl_getversioninfo_op_t s_ctl_set_getversioninfo_op; + + CHAR ac_version_string[STRLENGTH]; + + isvce_api_cmds_t s_api_cmds = {ISVCE_CMD_VIDEO_CTL, ISVCE_CMD_CTL_GETVERSION}; + + s_ctl_set_getversioninfo_ip.s_ive_ip.pu1_version = (UWORD8 *) ac_version_string; + s_ctl_set_getversioninfo_ip.s_ive_ip.u4_version_bufsize = sizeof(ac_version_string); + s_ctl_set_getversioninfo_ip.s_ive_ip.u4_size = sizeof(isvce_ctl_getversioninfo_ip_t); + s_ctl_set_getversioninfo_op.s_ive_op.u4_size = sizeof(isvce_ctl_getversioninfo_op_t); + + status = isvce_api_function(ps_enc, (void *) &s_ctl_set_getversioninfo_ip, + (void *) &s_ctl_set_getversioninfo_op, &s_api_cmds); + if(status != IV_SUCCESS) + { + snprintf(ac_error, sizeof(ac_error) - 1, "Unable to get codec version = 0x%x\n", + s_ctl_set_getversioninfo_op.s_ive_op.u4_error_code); + codec_exit(ac_error); + } + printf("CODEC VERSION %s\n", ac_version_string); + } + + /*************************************************************************/ + /* Get I/O Buffer Requirement */ + /*************************************************************************/ + { + isvce_ctl_getbufinfo_ip_t s_get_buf_info_ip; + isvce_ctl_getbufinfo_op_t s_get_buf_info_op; + + isvce_api_cmds_t s_api_cmds = {ISVCE_CMD_VIDEO_CTL, ISVCE_CMD_CTL_GETBUFINFO}; + + s_get_buf_info_ip.s_ive_ip.u4_size = sizeof(isvce_ctl_getbufinfo_ip_t); + s_get_buf_info_op.s_ive_op.u4_size = sizeof(isvce_ctl_getbufinfo_op_t); + + s_get_buf_info_ip.s_ive_ip.u4_max_ht = s_app_ctxt.u4_max_ht; + s_get_buf_info_ip.s_ive_ip.u4_max_wd = s_app_ctxt.u4_max_wd; + s_get_buf_info_ip.s_ive_ip.e_inp_color_fmt = s_app_ctxt.e_inp_color_fmt; + + status = isvce_api_function(ps_enc, &s_get_buf_info_ip, &s_get_buf_info_op, &s_api_cmds); + + if(status != IV_SUCCESS) + { + snprintf(ac_error, sizeof(ac_error) - 1, + "Unable to get I/O buffer requirements = 0x%x\n", + s_get_buf_info_op.s_ive_op.u4_error_code); + codec_exit(ac_error); + } + s_app_ctxt.s_get_buf_info_op = s_get_buf_info_op; + } + + /*****************************************************************************/ + /* Add the following initializations based on the parameters in context */ + /*****************************************************************************/ + + /*****************************************************************************/ + /* Video control Set Frame dimensions */ + /*****************************************************************************/ + set_dimensions(&s_app_ctxt, 0, 0); + s_app_ctxt.u4_strd = s_app_ctxt.u4_wd; + + /*****************************************************************************/ + /* Video control Set Frame rates */ + /*****************************************************************************/ + set_frame_rate(&s_app_ctxt, 0, 0); + + /*****************************************************************************/ + /* Video control Set IPE Params */ + /*****************************************************************************/ + set_ipe_params(&s_app_ctxt, 0, 0); + + /*****************************************************************************/ + /* Video control Set Bitrate */ + /*****************************************************************************/ + set_bit_rate(&s_app_ctxt, 0, 0); + + /*****************************************************************************/ + /* Video control Set QP */ + /*****************************************************************************/ + set_qp(&s_app_ctxt, 0, 0); + + /*****************************************************************************/ + /* Video control Set AIR params */ + /*****************************************************************************/ + set_air_params(&s_app_ctxt, 0, 0); + + /*****************************************************************************/ + /* Video control Set VBV params */ + /*****************************************************************************/ + set_vbv_params(&s_app_ctxt, 0, 0); + + /*****************************************************************************/ + /* Video control Set Motion estimation params */ + /*****************************************************************************/ + set_me_params(&s_app_ctxt, 0, 0); + + /*****************************************************************************/ + /* Video control Set GOP params */ + /*****************************************************************************/ + set_gop_params(&s_app_ctxt, 0, 0); + + /*****************************************************************************/ + /* Video control Set Deblock params */ + /*****************************************************************************/ + set_deblock_params(&s_app_ctxt, 0, 0); + + /*****************************************************************************/ + /* Video control Set Profile params */ + /*****************************************************************************/ + set_profile_params(&s_app_ctxt, 0, 0); + + /*****************************************************************************/ + /* Video control Set in Encode header mode */ + /*****************************************************************************/ + set_enc_mode(&s_app_ctxt, 0, 0, IVE_ENC_MODE_PICTURE); + + /*****************************************************************************/ + /* Video usability information */ + /*****************************************************************************/ + set_vui_params(&s_app_ctxt); + +#ifdef IOS + /* Correct file paths */ + sprintf(filename_with_path, "%s/%s", documentdir, s_app_ctxt.ac_ip_fname); + strcpy(s_app_ctxt.ac_ip_fname, filename_with_path); + + sprintf(filename_with_path, "%s/%s", documentdir, s_app_ctxt.ac_op_fname); + strcpy(s_app_ctxt.ac_op_fname, filename_with_path); + + sprintf(filename_with_path, "%s/%s", documentdir, s_app_ctxt.ac_recon_fname); + strcpy(s_app_ctxt.ac_recon_fname, filename_with_path); + + sprintf(filename_with_path, "%s/%s", documentdir, s_app_ctxt.ac_chksum_fname); + strcpy(s_app_ctxt.ac_chksum_fname, filename_with_path); + + sprintf(filename_with_path, "%s/%s", documentdir, s_app_ctxt.ac_mb_info_fname); + strcpy(s_app_ctxt.ac_mb_info_fname, filename_with_path); + + sprintf(filename_with_path, "%s/%s", documentdir, s_app_ctxt.ac_pic_info_fname); + strcpy(s_app_ctxt.ac_pic_info_fname, filename_with_path); +#endif + + get_enc_dimensions(&s_app_ctxt); + + /*************************************************************************/ + /* begin encoding */ + /*************************************************************************/ + + synchronous_encode(ps_enc, &s_app_ctxt); + + { + printf("\nEncoding Completed\n"); + printf("Summary\n"); + printf("Input filename : %s\n", s_app_ctxt.ac_ip_fname); + printf("Output filename : %s\n", s_app_ctxt.ac_op_fname); + printf("Output Width : %-4d\n", s_app_ctxt.u4_wd); + printf("Output Height : %-4d\n", s_app_ctxt.u4_ht); + + { + DOUBLE bytes_per_frame; + DOUBLE bytes_per_second; + WORD32 achieved_bitrate; + if(s_app_ctxt.u4_pics_cnt != 0) + { + bytes_per_frame = (s_app_ctxt.u4_total_bytes) / (s_app_ctxt.u4_pics_cnt); + } + else + { + bytes_per_frame = 0; + } + bytes_per_second = (bytes_per_frame * s_app_ctxt.u4_tgt_frame_rate); + achieved_bitrate = (WORD32) (bytes_per_second * 8); + printf("Target Bitrate (bps) : %-4d\n", s_app_ctxt.pu4_bitrate[0]); + printf("Achieved Bitrate (bps) : %-4d\n", achieved_bitrate); + } + + printf("Average Time per Frame : %-4d\n", s_app_ctxt.avg_time); + printf("Achieved FPS : %-4.2f\n", 1000000.0 / s_app_ctxt.avg_time); + } + + free_rc_params(&s_app_ctxt); + + /*************************************************************************/ + /* Close Codec Instance */ + /*************************************************************************/ + { + isvce_retrieve_mem_rec_ip_t s_retrieve_mem_ip; + isvce_retrieve_mem_rec_op_t s_retrieve_mem_op; + iv_mem_rec_t *ps_mem_rec; + + isvce_api_cmds_t s_api_cmds = {ISVCE_CMD_RETRIEVE_MEMREC, ISVCE_CMD_CT_NA}; + + s_retrieve_mem_ip.s_ive_ip.u4_size = sizeof(isvce_retrieve_mem_rec_ip_t); + s_retrieve_mem_op.s_ive_op.u4_size = sizeof(isvce_retrieve_mem_rec_op_t); + + s_retrieve_mem_ip.s_ive_ip.ps_mem_rec = s_app_ctxt.ps_mem_rec; + + status = isvce_api_function(ps_enc, &s_retrieve_mem_ip, &s_retrieve_mem_op, &s_api_cmds); + + if(status != IV_SUCCESS) + { + snprintf(ac_error, sizeof(ac_error) - 1, "Unable to retrieve memory records = 0x%x\n", + s_retrieve_mem_op.s_ive_op.u4_error_code); + codec_exit(ac_error); + } + + /* Free memory records */ + ps_mem_rec = s_app_ctxt.ps_mem_rec; + for(i = 0; i < num_mem_recs; i++) + { + isvca_aligned_free(ps_mem_rec->pv_base); + ps_mem_rec++; + } + + free(s_app_ctxt.ps_mem_rec); + } + + return 0; +} + +#ifdef ANDROID_NDK +int raise(int a) +{ + printf("Divide by zero\n"); + return 0; +} +void __aeabi_assert(const char *assertion, const char *file, unsigned int line) {} +#endif diff --git a/test/svcenc/output.c b/test/svcenc/output.c new file mode 100644 index 0000000..d124a2d --- /dev/null +++ b/test/svcenc/output.c @@ -0,0 +1,95 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +#include +#include +#include +#include + +#include "ih264_typedefs.h" +#include "iv2.h" +#include "ive2.h" +#include "isvce.h" +#include "app.h" + +/*****************************************************************************/ +/* Constant Macros */ +/*****************************************************************************/ +#define PEAK_WINDOW_SIZE 8 +/*****************************************************************************/ +/* Macros */ +/*****************************************************************************/ +/*****************************************************************************/ +/* Function Declarations */ +/*****************************************************************************/ +IV_STATUS_T write_output(FILE *fp, UWORD8 *pu1_buf, WORD32 num_bytes) +{ + WORD32 bytes; + + bytes = (WORD32) fwrite(pu1_buf, sizeof(UWORD8), num_bytes, fp); + if(bytes != num_bytes) return IV_FAIL; + fflush(fp); + + return IV_SUCCESS; +} + +void allocate_output(app_ctxt_t *ps_app_ctxt) +{ + WORD32 num_bufs; + WORD32 i; + UWORD8 *pu1_buf; + WORD32 buf_size; + num_bufs = + MAX(DEFAULT_NUM_OUTPUT_BUFS, ps_app_ctxt->s_get_buf_info_op.s_ive_op.u4_min_out_bufs); + num_bufs = MIN(DEFAULT_MAX_OUTPUT_BUFS, num_bufs); + + buf_size = ps_app_ctxt->s_get_buf_info_op.s_ive_op.au4_min_out_buf_size[0]; + /* Memset the output buffer array to set is_free to 0 */ + memset(ps_app_ctxt->as_output_buf, 0, sizeof(ps_app_ctxt->as_output_buf)); + + for(i = 0; i < num_bufs; i++) + { + pu1_buf = (UWORD8 *) isvca_aligned_malloc(16, buf_size); + if(NULL == pu1_buf) + { + CHAR ac_error[2 * STRLENGTH]; + snprintf(ac_error, sizeof(ac_error) - 1, + "Allocation failed for output buffer of size %d\n", buf_size); + codec_exit(ac_error); + } + ps_app_ctxt->as_output_buf[i].pu1_buf = pu1_buf; + ps_app_ctxt->as_output_buf[i].u4_buf_size = buf_size; + ps_app_ctxt->as_output_buf[i].u4_is_free = 1; + } +} + +void free_output(app_ctxt_t *ps_app_ctxt) +{ + WORD32 num_bufs; + WORD32 i; + + num_bufs = + MAX(DEFAULT_NUM_OUTPUT_BUFS, ps_app_ctxt->s_get_buf_info_op.s_ive_op.u4_min_out_bufs); + num_bufs = MIN(DEFAULT_MAX_OUTPUT_BUFS, num_bufs); + for(i = 0; i < num_bufs; i++) + { + isvca_aligned_free(ps_app_ctxt->as_output_buf[i].pu1_buf); + } +} diff --git a/test/svcenc/psnr.c b/test/svcenc/psnr.c new file mode 100644 index 0000000..f207d23 --- /dev/null +++ b/test/svcenc/psnr.c @@ -0,0 +1,245 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +#include +#include +#include +#include + +#include "ih264_typedefs.h" +#include "iv2.h" +#include "ive2.h" +#include "isvce.h" +#include "app.h" +#include "psnr.h" + +/*****************************************************************************/ +/* */ +/* Function Name : init_psnr */ +/* */ +/* Description : Initialize PSNR for the Y, U, V component */ +/* */ +/* Inputs : */ +/* */ +/* Globals : */ +/* */ +/* Processing : */ +/* */ +/* Outputs : */ +/* */ +/* Returns : */ +/* */ +/* Issues : */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 28 12 2005 Ittiam Draft */ +/* */ +/*****************************************************************************/ +void init_psnr(app_ctxt_t *ps_app_ctxt) +{ + ps_app_ctxt->adbl_psnr[0] = 0; + ps_app_ctxt->adbl_psnr[1] = 0; + ps_app_ctxt->adbl_psnr[2] = 0; + ps_app_ctxt->u4_psnr_cnt = 0; +} + +/*****************************************************************************/ +/* */ +/* Function Name : compute_psnr */ +/* */ +/* Description : Computes the PSNR for the Y, U, V component */ +/* */ +/* Inputs : */ +/* */ +/* Globals : */ +/* */ +/* Processing : */ +/* */ +/* Outputs : */ +/* */ +/* Returns : */ +/* */ +/* Issues : */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 28 12 2005 Ittiam Draft */ +/* */ +/*****************************************************************************/ +void compute_psnr(app_ctxt_t *ps_app_ctxt, iv_raw_buf_t *ps_buf1, iv_raw_buf_t *ps_buf2) +{ + WORD32 i, j; + WORD32 comp; + DOUBLE df_psnr[3]; + WORD32 wd, ht, strd1, strd2; + UWORD8 *pu1_buf1, *pu1_buf2; + WORD32 incr1, incr2; + + printf("\nPicNum %4d\t ", ps_app_ctxt->u4_psnr_cnt); + + for(comp = 0; comp < 3; comp++) + { + df_psnr[comp] = 0; + pu1_buf1 = (UWORD8 *) ps_buf1->apv_bufs[comp]; + pu1_buf2 = (UWORD8 *) ps_buf2->apv_bufs[comp]; + wd = ps_buf1->au4_wd[comp]; + ht = ps_buf1->au4_ht[comp]; + strd1 = ps_buf1->au4_strd[comp] - ps_buf1->au4_wd[comp]; + strd2 = ps_buf2->au4_strd[comp] - ps_buf2->au4_wd[comp]; + incr1 = 1; + incr2 = 1; + + if((IV_YUV_420SP_UV == ps_buf1->e_color_fmt) || (IV_YUV_420SP_VU == ps_buf1->e_color_fmt)) + { + switch(comp) + { + case 0: + pu1_buf1 = ps_buf1->apv_bufs[0]; + break; + case 1: + if(IV_YUV_420SP_UV == ps_buf1->e_color_fmt) + pu1_buf1 = (UWORD8 *) ps_buf1->apv_bufs[1]; + else + pu1_buf1 = (UWORD8 *) ps_buf1->apv_bufs[1] + 1; + incr1 = 2; + wd = ps_buf1->au4_wd[0] >> 1; + ht = ps_buf1->au4_ht[0] >> 1; + break; + case 2: + if(IV_YUV_420SP_UV == ps_buf1->e_color_fmt) + pu1_buf1 = (UWORD8 *) ps_buf1->apv_bufs[1] + 1; + else + pu1_buf1 = ps_buf1->apv_bufs[1]; + incr1 = 2; + wd = ps_buf1->au4_wd[0] >> 1; + ht = ps_buf1->au4_ht[0] >> 1; + strd1 = ps_buf1->au4_strd[1] - ps_buf1->au4_wd[1]; + break; + } + } + if((IV_YUV_420SP_UV == ps_buf2->e_color_fmt) || (IV_YUV_420SP_VU == ps_buf2->e_color_fmt)) + { + switch(comp) + { + case 0: + pu1_buf2 = ps_buf2->apv_bufs[0]; + break; + case 1: + if(IV_YUV_420SP_UV == ps_buf2->e_color_fmt) + pu1_buf2 = ps_buf2->apv_bufs[1]; + else + pu1_buf2 = (UWORD8 *) ps_buf2->apv_bufs[1] + 1; + incr2 = 2; + wd = ps_buf2->au4_wd[0] >> 1; + ht = ps_buf2->au4_ht[0] >> 1; + + break; + case 2: + if(IV_YUV_420SP_UV == ps_buf2->e_color_fmt) + pu1_buf2 = (UWORD8 *) ps_buf2->apv_bufs[1] + 1; + else + pu1_buf2 = ps_buf2->apv_bufs[1]; + incr2 = 2; + wd = ps_buf2->au4_wd[0] >> 1; + ht = ps_buf2->au4_ht[0] >> 1; + strd2 = ps_buf2->au4_strd[1] - ps_buf2->au4_wd[1]; + + break; + } + } + + for(i = 0; i < ht; i++) + { + for(j = 0; j < wd; j++) + { + WORD32 diff; + diff = (*pu1_buf1 - *pu1_buf2); + pu1_buf1 += incr1; + pu1_buf2 += incr2; + df_psnr[comp] += diff * diff; + } + pu1_buf1 += strd1; + pu1_buf2 += strd2; + } + df_psnr[comp] /= (wd * ht); + if(df_psnr[comp]) + df_psnr[comp] = 20 * log10(255 / sqrt(df_psnr[comp])); + else + df_psnr[comp] = 100; + + ps_app_ctxt->adbl_psnr[comp] += df_psnr[comp]; + switch(comp) + { + case 0: + printf("Y :"); + break; + case 1: + printf("U :"); + break; + case 2: + printf("V :"); + break; + default: + break; + } + printf("%2.2f\t", df_psnr[comp]); + } + + ps_app_ctxt->u4_psnr_cnt++; +} + +/*****************************************************************************/ +/* */ +/* Function Name : print_average_psnr */ +/* */ +/* Description : Computes the average PSNR for the Y, U, V component */ +/* */ +/* Inputs : */ +/* */ +/* Globals : */ +/* */ +/* Processing : */ +/* */ +/* Outputs : */ +/* */ +/* Returns : */ +/* */ +/* Issues : */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 28 12 2005 Ittiam Draft */ +/* */ +/*****************************************************************************/ +void print_average_psnr(app_ctxt_t *ps_app_ctxt) +{ + printf("\n"); + + printf("Avg PSNR Y : %-2.2f\n", + (ps_app_ctxt->adbl_psnr[0] / ps_app_ctxt->u4_psnr_cnt)); + printf("Avg PSNR U : %-2.2f\n", + (ps_app_ctxt->adbl_psnr[1] / ps_app_ctxt->u4_psnr_cnt)); + printf("Avg PSNR V : %-2.2f\n", + (ps_app_ctxt->adbl_psnr[2] / ps_app_ctxt->u4_psnr_cnt)); +} diff --git a/test/svcenc/psnr.h b/test/svcenc/psnr.h new file mode 100644 index 0000000..51082d2 --- /dev/null +++ b/test/svcenc/psnr.h @@ -0,0 +1,58 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ +/****************************************************************************/ +/* */ +/* File Name : psnr.h */ +/* */ +/* Description : Contains functions for psnr computation */ +/* */ +/* List of Functions : */ +/* compute_psnr */ +/* print_average_psnr */ +/* Issues / Problems : */ +/* */ +/* Revision History : */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes) */ +/* */ +/****************************************************************************/ +#ifndef _SVCE_APP_PSNR_H_ +#define _SVCE_APP_PSNR_H_ + +/*****************************************************************************/ +/* Function Declarations */ +/*****************************************************************************/ +void init_psnr(app_ctxt_t *ps_app_ctxt); + +void compute_psnr(app_ctxt_t *ps_app_ctxt, iv_raw_buf_t *ps_buf1, iv_raw_buf_t *ps_buf2); + +void print_average_psnr(app_ctxt_t *ps_app_ctxt); + +#if COMPUTE_PSNR + +#define GET_AVERAGE_PSNR_Y(print) print_average_psnr(print) + +#else /* COMPUTE_PSNR */ + +#define GET_AVERAGE_PSNR_Y(print) 0 + +#endif /* COMPUTE_PSNR */ + +#endif diff --git a/test/svcenc/recon.c b/test/svcenc/recon.c new file mode 100644 index 0000000..c43d39e --- /dev/null +++ b/test/svcenc/recon.c @@ -0,0 +1,215 @@ +/****************************************************************************** + * + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System include files */ + +#include +#include +#include +#include +/* User include files */ + +#include "ih264_typedefs.h" +#include "iv2.h" +#include "ive2.h" +#include "isvce.h" +#include "app.h" + +/*****************************************************************************/ +/* Constant Macros */ +/*****************************************************************************/ + +/*****************************************************************************/ +/* Macros */ +/*****************************************************************************/ + +/*****************************************************************************/ +/* Function Declarations */ +/*****************************************************************************/ + +IV_STATUS_T write_recon(FILE *fp, iv_raw_buf_t *ps_raw_buf) +{ + WORD32 bytes; + WORD32 wd, ht; + UWORD8 *pu1_buf; + WORD32 i; + WORD32 comp; + WORD32 num_comp; + + num_comp = 2; + if(IV_YUV_420P == ps_raw_buf->e_color_fmt) num_comp = 3; + + for(comp = 0; comp < num_comp; comp++) + { + wd = ps_raw_buf->au4_wd[comp]; + ht = ps_raw_buf->au4_ht[comp]; + pu1_buf = ps_raw_buf->apv_bufs[comp]; + for(i = 0; i < ht; i++) + { + bytes = (WORD32) fwrite(pu1_buf, sizeof(UWORD8), wd, fp); + if(bytes != wd) + { + return (IV_FAIL); + } + pu1_buf += wd; + } + } + + fflush(fp); + return IV_SUCCESS; +} + +void allocate_recon(app_ctxt_t *ps_app_ctxt) +{ + WORD32 num_bufs; + WORD32 pic_size; + WORD32 luma_size; + WORD32 chroma_size; + WORD32 i; + UWORD8 *pu1_buf; + + num_bufs = DEFAULT_NUM_RECON_BUFS; + + assert(ps_app_ctxt->s_get_buf_info_op.u4_rec_comp_cnt == 3); + + /* Size of buffer for YUV420/420SP */ + luma_size = ps_app_ctxt->s_get_buf_info_op.au4_min_rec_buf_size[0]; + chroma_size = ps_app_ctxt->s_get_buf_info_op.au4_min_rec_buf_size[1] + + ps_app_ctxt->s_get_buf_info_op.au4_min_rec_buf_size[2]; + pic_size = luma_size + chroma_size; + + for(i = 0; i < num_bufs; i++) + { + pu1_buf = (UWORD8 *) isvca_aligned_malloc(16, pic_size); + + if(NULL == pu1_buf) + { + CHAR ac_error[STRLENGTH]; + snprintf(ac_error, sizeof(ac_error) - 1, + "Allocation failed for recon buffer of size %d\n", pic_size); + codec_exit(ac_error); + } + + ps_app_ctxt->as_recon_buf[i].pu1_buf = pu1_buf; + ps_app_ctxt->as_recon_buf[i].u4_buf_size = pic_size; + ps_app_ctxt->as_recon_buf[i].u4_is_free = 1; + } + + if(ps_app_ctxt->u4_psnr_enable) + { + pu1_buf = (UWORD8 *) isvca_aligned_malloc(16, pic_size); + + if(NULL == pu1_buf) + { + CHAR ac_error[STRLENGTH]; + snprintf(ac_error, sizeof(ac_error) - 1, + "Allocation failed for recon buffer of size %d\n", pic_size); + codec_exit(ac_error); + } + + ps_app_ctxt->pu1_psnr_buf = pu1_buf; + ps_app_ctxt->u4_psnr_buf_size = pic_size; + } +} + +void free_recon(app_ctxt_t *ps_app_ctxt) +{ + WORD32 num_bufs; + WORD32 i; + + num_bufs = DEFAULT_NUM_RECON_BUFS; + + for(i = 0; i < num_bufs; i++) + { + isvca_aligned_free(ps_app_ctxt->as_recon_buf[i].pu1_buf); + } + + if(ps_app_ctxt->u4_psnr_enable) + { + isvca_aligned_free(ps_app_ctxt->pu1_psnr_buf); + } +} + +void init_raw_buf_descr(app_ctxt_t *ps_app_ctxt, iv_raw_buf_t *ps_raw_buf, UWORD8 *pu1_buf, + IV_COLOR_FORMAT_T e_color_fmt) +{ + WORD32 luma_size; + WORD32 au4_chroma_sizes[2]; + + assert(IV_YUV_420P == e_color_fmt); + + luma_size = ps_app_ctxt->s_get_buf_info_op.au4_min_rec_buf_size[0]; + au4_chroma_sizes[0] = ps_app_ctxt->s_get_buf_info_op.au4_min_rec_buf_size[1]; + au4_chroma_sizes[1] = ps_app_ctxt->s_get_buf_info_op.au4_min_rec_buf_size[2]; + + ps_raw_buf->apv_bufs[0] = pu1_buf; + pu1_buf += luma_size; + + ps_raw_buf->apv_bufs[1] = pu1_buf; + pu1_buf += au4_chroma_sizes[0]; + + ps_raw_buf->apv_bufs[2] = NULL; + if(IV_YUV_420P == e_color_fmt) + { + ps_raw_buf->apv_bufs[2] = pu1_buf; + } + + ps_raw_buf->e_color_fmt = e_color_fmt; + ps_raw_buf->au4_wd[0] = ps_app_ctxt->u4_enc_wd; + ps_raw_buf->au4_ht[0] = ps_app_ctxt->u4_enc_ht; + ps_raw_buf->au4_strd[0] = ps_app_ctxt->u4_enc_wd; + + /* Initialize for 420SP */ + { + ps_raw_buf->au4_wd[1] = ps_app_ctxt->u4_enc_wd; + ps_raw_buf->au4_wd[2] = 0; + + ps_raw_buf->au4_ht[1] = ps_app_ctxt->u4_enc_ht / 2; + ps_raw_buf->au4_ht[2] = 0; + + ps_raw_buf->au4_strd[1] = ps_app_ctxt->u4_enc_wd; + ps_raw_buf->au4_strd[2] = 0; + } + + if(IV_YUV_420P == e_color_fmt) + { + ps_raw_buf->au4_wd[1] = ps_app_ctxt->u4_enc_wd / 2; + ps_raw_buf->au4_wd[2] = ps_app_ctxt->u4_enc_wd / 2; + + ps_raw_buf->au4_ht[1] = ps_app_ctxt->u4_enc_ht / 2; + ps_raw_buf->au4_ht[2] = ps_app_ctxt->u4_enc_ht / 2; + + ps_raw_buf->au4_strd[1] = ps_app_ctxt->u4_enc_wd / 2; + ps_raw_buf->au4_strd[2] = ps_app_ctxt->u4_enc_wd / 2; + } + /* If stride is not initialized, then use width as stride */ + if(0 == ps_raw_buf->au4_strd[0]) + { + ps_raw_buf->au4_strd[0] = ps_raw_buf->au4_wd[0]; + ps_raw_buf->au4_strd[1] = ps_raw_buf->au4_wd[1]; + ps_raw_buf->au4_strd[2] = ps_raw_buf->au4_wd[2]; + } + + ps_raw_buf->u4_size = sizeof(iv_raw_buf_t); +} diff --git a/test/svcenc/svcenc.cmake b/test/svcenc/svcenc.cmake new file mode 100644 index 0000000..0ea599f --- /dev/null +++ b/test/svcenc/svcenc.cmake @@ -0,0 +1,12 @@ +list( + APPEND + SVCENC_SRCS + "${AVC_ROOT}/test/svcenc/main.c" + "${AVC_ROOT}/test/svcenc/input.c" + "${AVC_ROOT}/test/svcenc/output.c" + "${AVC_ROOT}/test/svcenc/psnr.c" + "${AVC_ROOT}/test/svcenc/recon.c") + +libavc_add_executable(svcenc libsvcenc SOURCES ${SVCENC_SRCS} INCLUDES + "${AVC_ROOT}/test/svcenc/") +target_compile_definitions(svcenc PRIVATE PROFILE_ENABLE MD5_DISABLE)